From 8f7399de58d3b197b5fd0c77149459dc6da4cb93 Mon Sep 17 00:00:00 2001 From: Dierk Date: Wed, 13 May 2026 11:31:47 +0200 Subject: [PATCH] Initial implementation of generic Excel-to-DB import tool Supports .xls and .xlsx, Oracle and PostgreSQL via SQLAlchemy. Includes CLI (run/inspect/generate-config), YAML config, auto schema detection, and append/replace/upsert modes. Co-Authored-By: Claude Sonnet 4.6 --- .venv/bin/python | 1 + .venv/bin/python3 | 1 + .venv/bin/python3.12 | 1 + .venv/lib64 | 1 + .venv/pyvenv.cfg | 5 + examples/import_config.yaml | 40 ++++++ excel_import/__init__.py | 4 + .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 292 bytes .../__pycache__/config.cpython-312.pyc | Bin 0 -> 2711 bytes .../__pycache__/importer.cpython-312.pyc | Bin 0 -> 8643 bytes .../__pycache__/reader.cpython-312.pyc | Bin 0 -> 3201 bytes .../__pycache__/schema.cpython-312.pyc | Bin 0 -> 4658 bytes excel_import/cli.py | 87 ++++++++++++ excel_import/config.py | 48 +++++++ excel_import/importer.py | 127 ++++++++++++++++++ excel_import/reader.py | 49 +++++++ excel_import/schema.py | 72 ++++++++++ pyproject.toml | 24 ++++ tests/__init__.py | 0 tests/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 161 bytes .../test_config.cpython-312-pytest-9.0.3.pyc | Bin 0 -> 6348 bytes ...test_importer.cpython-312-pytest-9.0.3.pyc | Bin 0 -> 6857 bytes .../test_reader.cpython-312-pytest-9.0.3.pyc | Bin 0 -> 9088 bytes tests/test_config.py | 39 ++++++ tests/test_importer.py | 80 +++++++++++ tests/test_reader.py | 84 ++++++++++++ 26 files changed, 663 insertions(+) create mode 120000 .venv/bin/python create mode 120000 .venv/bin/python3 create mode 120000 .venv/bin/python3.12 create mode 120000 .venv/lib64 create mode 100644 .venv/pyvenv.cfg create mode 100644 examples/import_config.yaml create mode 100644 excel_import/__init__.py create mode 100644 excel_import/__pycache__/__init__.cpython-312.pyc create mode 100644 excel_import/__pycache__/config.cpython-312.pyc create mode 100644 excel_import/__pycache__/importer.cpython-312.pyc create mode 100644 excel_import/__pycache__/reader.cpython-312.pyc create mode 100644 excel_import/__pycache__/schema.cpython-312.pyc create mode 100644 excel_import/cli.py create mode 100644 excel_import/config.py create mode 100644 excel_import/importer.py create mode 100644 excel_import/reader.py create mode 100644 excel_import/schema.py create mode 100644 pyproject.toml create mode 100644 tests/__init__.py create mode 100644 tests/__pycache__/__init__.cpython-312.pyc create mode 100644 tests/__pycache__/test_config.cpython-312-pytest-9.0.3.pyc create mode 100644 tests/__pycache__/test_importer.cpython-312-pytest-9.0.3.pyc create mode 100644 tests/__pycache__/test_reader.cpython-312-pytest-9.0.3.pyc create mode 100644 tests/test_config.py create mode 100644 tests/test_importer.py create mode 100644 tests/test_reader.py diff --git a/.venv/bin/python b/.venv/bin/python new file mode 120000 index 0000000..b8a0adb --- /dev/null +++ b/.venv/bin/python @@ -0,0 +1 @@ +python3 \ No newline at end of file diff --git a/.venv/bin/python3 b/.venv/bin/python3 new file mode 120000 index 0000000..ae65fda --- /dev/null +++ b/.venv/bin/python3 @@ -0,0 +1 @@ +/usr/bin/python3 \ No newline at end of file diff --git a/.venv/bin/python3.12 b/.venv/bin/python3.12 new file mode 120000 index 0000000..b8a0adb --- /dev/null +++ b/.venv/bin/python3.12 @@ -0,0 +1 @@ +python3 \ No newline at end of file diff --git a/.venv/lib64 b/.venv/lib64 new file mode 120000 index 0000000..7951405 --- /dev/null +++ b/.venv/lib64 @@ -0,0 +1 @@ +lib \ No newline at end of file diff --git a/.venv/pyvenv.cfg b/.venv/pyvenv.cfg new file mode 100644 index 0000000..c57c6f3 --- /dev/null +++ b/.venv/pyvenv.cfg @@ -0,0 +1,5 @@ +home = /usr/bin +include-system-site-packages = false +version = 3.12.3 +executable = /usr/bin/python3.12 +command = /usr/bin/python3 -m venv /home/dierk/Programmierung/claude/excel-import/.venv diff --git a/examples/import_config.yaml b/examples/import_config.yaml new file mode 100644 index 0000000..7c61e6b --- /dev/null +++ b/examples/import_config.yaml @@ -0,0 +1,40 @@ +# SQLAlchemy DSN — Beispiele: +# PostgreSQL: postgresql+psycopg2://user:pass@localhost/mydb +# Oracle: oracle+oracledb://user:pass@localhost:1521/?service_name=MYDB +dsn: "postgresql+psycopg2://user:pass@localhost/mydb" + +default_varchar_length: 255 + +sheets: + - sheet: "Artikel" # Sheet-Name oder Index (0, 1, ...) + header_row: 0 # 0-basierter Zeilenindex der Kopfzeile + skip_rows: 0 # Zeilen vor der Kopfzeile überspringen + target_table: "artikel" + mode: "replace" # append | replace | upsert + upsert_keys: [] + columns: + - source: "Artikelnummer" + target: "artikelnummer" + dtype: "VARCHAR(50)" + - source: "Bezeichnung" + target: "bezeichnung" + - source: "Preis" + target: "preis" + dtype: "NUMERIC(12,2)" + - source: "Interne Notiz" + target: "interne_notiz" + skip: true # Spalte nicht importieren + + - sheet: "Kunden" + header_row: 0 + target_table: "kunden" + mode: "upsert" + upsert_keys: ["kundennummer"] + columns: + - source: "Kundennummer" + target: "kundennummer" + dtype: "VARCHAR(20)" + - source: "Name" + target: "name" + - source: "E-Mail" + target: "email" diff --git a/excel_import/__init__.py b/excel_import/__init__.py new file mode 100644 index 0000000..1e35936 --- /dev/null +++ b/excel_import/__init__.py @@ -0,0 +1,4 @@ +from .reader import ExcelReader +from .importer import Importer + +__all__ = ["ExcelReader", "Importer"] diff --git a/excel_import/__pycache__/__init__.cpython-312.pyc b/excel_import/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6be3ca2dc58fce48237c646ae55a8669a397c8e7 GIT binary patch literal 292 zcmX@j%ge<81kapUveJO`V-N=hn4pZ$DnQ0`h7^Vr#vF!R#wbQc5SuB7DVI5l8OUZ% zVM%9-VyR@+WP8a7RI15%i`%s#IW;FJH8CZ%2*~B|%q_?-DoHIWVg||=u>c7_P1aj% zMPLPB!CM@eFf}0QTkP@ii8(p(@hcfV1Gx;p-1Rf^b5r$GGE(=lmP&S{7v%! literal 0 HcmV?d00001 diff --git a/excel_import/__pycache__/config.cpython-312.pyc b/excel_import/__pycache__/config.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..340f9e09e2726e1acbf93242f41566aed72fbbe1 GIT binary patch literal 2711 zcmZ`*U2Gf25#GI99(hO75+#|G8M{?2%ktH&)WkOI*nx{$c5FAX5Wr4>&=3x1-b$i; zO7$|(ZW1p?H8{op<|k7=I@^q~r<7WB481GF!BldS><(iZK^ z9Vy8_U4q}u?Cj0W&fo4I(rJUh_|F+O_pcZse@CV}fX6{Me-5}x9O6)ylxcxdq!m{w zs|B^p3M}MRH&)gPT1YcDUQQGeAsut|g3eWioF|U<9&zG#lu#|X6Fo@5=tvVF)t`|j zJ5mFr$!Dagg_xQ80yUXbChbba3+$leReV#Cx?=}+(Y1YF#->Z$bzlj5!wzQQ8GpSL zaACWX#f}x>LyZ`^c^ZgS!byQTq@cVBJLjsSyaKZ!rG^yj+=;=w7IN`I%t-*JBe%dz zL#8J@w^puPva8inWk#xgAf)DdHBsbJ3v4mNgULk$W{2uSlU{|(m=i2i`Kyv$^E_8F z|9YuvYBFJ26}!wWOB$9{_MDoFbjq^cuGwx!(qmaW_HJ1s4MvC_fRD*0Ao=p#_^emv z<4%c->*H?-Z${YVGBCBu%sBX{=J0X;PLaFcES0OC2qMPnFh#F2U78uIE(jgrxvoL|sFuIutRxST~UR|rpV{jPC`fAb zKC@pYFVAuwOoW>jn5~O?6s1xnkTD;HQbpRB<+j6xCA@j5W4Ms=B@tLuG8M(x3hZkx z|Cq`ihfmuzH?XGdV&I8|#eTQs2UnlSk|`N4hFRlF4f2LpoW+BkRJ}SHO^kkHXjJg3 zYZp0^wW`lWFghg}pEZXC(X3z1*1d;p$}mXLO;R_2zwE}f`D}| z8R`*VLw*?H0Kx!57|$$HgIz$){uKa7hTI^ez{@Q**v{sf-(68#EZ5$hZC+gRTP)ii z%r{?O8E&!s{ozBO46F~IZk}ID54G6o_K{P~vunn;TI|&Qef#enUEg=2`P#~!b#@~9 zAb#ul354H$GJ(3a3V~)kc30g>2}~ZflS*14?!Lz zpkC-abn|DBx~nAUBrqX$+O_-`X($1#iN+l3QrpkE(p}q7p*zE`+bb!VAr1BebuPZW zx=T59W^3J9GIf=ZMywv2!vcS5ILgkI?`uWii^>%AnT8f5yNeyYMC#f{^e6O&vfRT+ zBi_*dk3Mxg;afPRE~@$uN15_;g;ORV)<7Gb8SG_-kQ8k52V%~3&MEttwS zEe@gRFv8bSr0$pq-@>{v6OwsQ(2_0KWf$6oJ*WedNxnBAGiV_WlB-2NW{n&cnp z%*ufu7d8mh58Qn5uYK9ou^VHzGOfPhrHRKXOm1kTZ(vDlX9iYtH*z-zZ@tvY9DGDn z{d;ujJMAM!e{<~D$3EHf>6zA%^MB4>xNZNm@mbEi^>!;~u4OMQod;;=U--qy2P3!A zw?!-e(h?jPeeYlT(WMU$f0n;;o37=r+&ljJWGjE=PpLOI;;8qbL3;b!z5OfaZW!(1 zgTFfd%j0W@CqBL08ot=>-}BEn>oqnA(~ZadXdhbs??(p7U8FwtxaI7blj?7G_f2Tp z?@q=h)cB;y#1Rm5Ph7kRNKNpdCMt`&Mt8bs)HI%MmSLa7#wbP*!l+`vWiq^AlzA}g zIpPG$o@$b#IER2;);|gWO;S(2d*S}j$USF$=v4F7wbVXnd+q%rYb<|rqQyo&kM%xM zcEyZMm1voVNJUK$)tG6#2Ta$%Zny^sOk$bby6JFVU~`CicpMYQEnO2DF4Ev~0rwDr zZ9t46VAe!UJT&r2!@o?t5?wYLq{O z^UQc`LY-UPY6twz`~C5J{@!Q)+3B=V@cdgJJM|McMg0y7`on4&dh;J4vqbR}Plu@p zJxZ)v_h*iFn7oYTg>&5vd%nq$!3Hn5hnZQ^mKi^CMa-jaM_&a};lT zo#O2uG5NY0zKnOg&W_jeF5U_6x>?3o{(Efn(TY6~jmD&a6pBT~VJNfD2c*kBqv8w_ zAs`8UAvzg~3JNO;*CoYrN{|Bm0V$vu#{!qaf?^Iu#c3fZ!9Wva1tR?(cj99d$BD>v zj7WX4=tO8zv5#IBgnR)P9k?D8!XrX}7YG!cmu5m?-XDyGXChHCsBK$I^Jkzpk3xBg z5~y*Sr-0fPo`#bccq`Aqo8@i10p3R5&a?0~@ebYyZ!_=YP4KqNntf%8Mca}b-KftLeE(s>#&biOhNVR#0dQq120Au zlSWLS4l4@b3F3hk5%#<4@Eru|N3Q~KBkC*MDr zt=yMn|H{4zjnqT*!R@Q`NkL|blJb&9Q8KNcKEZ^jgh8gJw$c?Rf~OSK`l$;|6qR6M zgjwsgHI*5e)nzwfl#Q}MW~VG$duVx-h?^}eB^yS8;_>fkr{x0SFsD&|h6p?-qTq=g z+=w8}kf_N0KtjRwM*|Vz2uEVq#5@fJKGESbk}{Z7<%?p{Mk>yH2ftcY98!Qx3X)%) z#%CgxFhH@Xtnp8S>L@1A8xU*7OayTzEQyL$9q12EOe!ocD4Jjhr#_?#M;n02@w~nO z523UJs>M-A=BTeL>Qdv`iaklwy4#yHuUFT$bM~K{s#-v3I*Jc%HfwGT>`#N z6?hCS`A237+&0LDQk*8cq=JT&;ww+fOesFtkWuENJWXM4W(vh(+rzV0_JDBx z;1MhD+}0h6Q#IeMLyh2#>P#-)RQi-Zrru?uWxV;(tqBw8RQ-2nmmbsWWRp%`{jJh$ zm)>4{Yo{+s?L0*i=4j6&Xkt-M;su&C6=&3Q57QN9sfT`l{(z)e|LhUe zD}9bFa%CQ+ny4o!k-o;z)Jqf%e>V?ZH@rk$qkXpFc-<%(B(8;UA(0az)6(o-PP`JD zo(@GPI|v%k_-=HJTnn$7CvJkoBAOM$LG9g~YIz`!7G^pe2nt}Tu7*RB5O?$s3=ND8 zaK}bYpW@=Kv5~XGeZ5c~>;2x)0H>H{rqM&k8}ojlxJD={#c=jLkr*#L(<1tOZm?Cb z3&p5zQ89&MlapxFt_4UGcB&XPYsiM86EVdQ7NP{DiJ)>Q#!CWV1~8mLR0JEWnM#Gl zxfL@X3WNbLC@flF#VlMGf-{mp&;}_E|D~Dm6>wj;O~vfj_Up5#MwXyxDh4#83_qc; zVB@1?zdB3;lNm&?0(2maP$iW^vs{X zaX#hAI_s0ny4^Y7^_N|D9Paty8^f!$hi`TM>hLcPuQ>+RJ>IV^W%KqM_EqoEUj{#I z{QJpI#b3{UHoH1*O{lmvw+n_pdw3bB>0Lqah`w zN0vQnj`q7Q_x$x6*HedYyPDT)8s2MryDeAKo~dci)$GsI>|Zd0@!t7Zde7~u)(rzy z)s*hcHFaj1I#(N?%6Jdlt!-WI%(Xq8X?uFL_2_E#GaF{AqJG0hRqR-DEIQs9OYhCr z?^`Q>V!f(1SG6ZowP)G8OjbH`oqd_kzHC+hBD-GUUD~<0Gxg%_iWca#BrnS8{^iEy zV76-iB764{;}+O&AHyTvYox3$jTl|Gnm;wcatk7^c*#fL^1$!SpyDuEEm(Y4%JK0xtC_y(if?$;hUk(myx-kV=?k zV=4DjgjqBtdG-0G5IJmnz`>S2c^;O5gM~jIGaN7P1Lc+g9qG2j=O9P`#dWUvun2YcP0YaCK+va$j!m(aheXUpSuGuv6tv zXwYh_>|q`xl-~Ro$UG#)MK}dgEYh=d@v2djN{|TDgKZC7Q4Evx&zPb!gMMg0w>`1} zp&qz;da5AZ#V)dGsszN+*DgYEkh&u1Vs$M7$ZZ%_-PMP1o4FK<@(LS^hG%_drOeMy z_@$Vi2*DWPMS?PfzG_0X`zDRO!_>Q7|-{MKs0q39{gu@N2&$^qF=6`inf(^gps(K?1qGEB(J8n2q&FQK1 z%Sp$YW&e7)cgenJPo2z^Kb|z^+ZWC*y}bDHn-|mP)-3IJT<(QKZ^)~b+V8R*cc-%b zm!NupsHrKa*#4Bl(4`Dlf-M@XEhrptpei;6*6OSShL>b5icKjKwoQ~wrA%7sk*au^ zWtKOnDDHzT=FY4ON=cJu)LRo4**sM+YDEh5Q}fm!+$w|2g=ay8t+KTgqJ~|SK+Uhv zB7L*&5hE<6Fm>s^c)B;4o4qWRu)z*2rSNj;9muwy6>qQ>P`?7IE}l@f=rjNYYtifF z0%b-ipsR-STV*5MnBgvvuuBE>3*0oz_R{MY(df-O_7Ss7P5PQ}i(v7_pzr53Ita)$ z$Rw=SKrb~VCj-!1u}#NBX%g;s9%DoxtNv#0ZzifJZo=M@5V(j3b?}1UZh0>UB#BB%^*X}oK9FNAN~8}E4PQfITCrktlW<7r(!l=XB%0HJ5# z*UrzJU)Dr#J-K>jBzNY8%$XNfF9vcKBbkel+h-!FC*M2p_JMTs2kq~-uh5?uKQ?9> z4`yl)uGU18gOI2(iS~@A{nqHO{J-$8j$T~#v}Zkjm>OK^(2b##cg@joube9PVW3-4=|Gd6_pql;s>D`bH`xq6$5Nz~ez=fB=sU{c*I5JWJ9hJsE`kDm7^HFerV9s~35d2CQ63ilV z^I6EVm`~87uop0TYRfe(pKk#p2uKH29fRC45&~lf(Ookj926172siJDs4&xjfI?z4 zke39A>cy@p^c97EMMM->RC(3JVKM%e~82lNE1`ES+0Cmpb#-_>HmjnT&P!7nYuT25p$p zbpN-Op8IN1#8A}I&faFr-!%6&lz-+iLH4r-Odq;%6&U#sz$o~}Q8Ee$(RPTp_ES1* zbOHlLDKHQsBn7s>()s5*BLKeuV@*Lu6$3yqh+AZX>b)(p3D{!?&udPYctfdpB*ghC zoZ&n3S{~8g2>s0^`ddr)FM8qxd|N2F1JfgRV28*XT}mK-Oz{@k0;v@}E^m{qklJM% zqz>5*sqBKEsI}!$g4L z;QyJ9CrAXc`7ao%U3p+VIw?ej>(it=&OPK|JC259!9ZB-?kMz#;b8Re|4IGN9Cf>) zd!-ldWxeUnv;ZRBx%Sw;OhfnmP1K&(++I4)?EMp+$7gszABnSE?LZ_~#E7FGaQ_FN(eCJ}OuKPZbpu?YrALJ@brLuus z!=?ZKJI1|iA$?uX%B~bi?@D{X*Y1LdRDFA+YT#yyNmL99tS(9DMUs*3~$7e7&~mOV@#gK>F$m`_sf< z)2prnbH^dM>#15gym%ONWaY)Ry6&u}du|BeqgL0ma(%6?C+q2fIzYm{MPKT8*4;8U zm@fhQmoKl?9?H58%?)m_w9C8htzJ60cyi(B21RpK>s3t)QOLD8*PT0)ryy6edwuu* z^hL-$_4xYZ`_qw?Be$;Hx&Y-t+PGmi*)4N@8)cNy`WHigKJ=D%A^2u>>ev@-)BSJi zsq%v&n&H2*_SLYTQ7oi+Z%@7t)he8e+mMgv5gH(9RC$I`F_m!&i}fn|;#RdEpuh{h zdW3WnGk`k*(h<}##f$-W478i^qhdI8iJ%!&Ow)lV9}o$~6BHXTDG&}`79z9iM`ksM zu3etg714_*4aH6!`FoexQM|`#A(#>9S%k$Jv@O70_`HB?1hhDy6+V6rsUHK#B^=d? z38F&n>ZS?@I*20Hd|)}M!PRc^Luf!4C?cE!Bu~>{QH{T)%D$p@{*LneJJtUM)&CXM z{U22OSJY!)QM>M1Ym!wtYfZ*llj_P^o99g5nnr25?!PHaH^YpPHr*?)pqtgz{uk9S BO{xF@ literal 0 HcmV?d00001 diff --git a/excel_import/__pycache__/reader.cpython-312.pyc b/excel_import/__pycache__/reader.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3bdd9ab99a7ac00250ccfe76d6b74d1958cc111b GIT binary patch literal 3201 zcmbUjTWlN0aqr2eM3E9L*^(m4aUETQNJ)d*i0!CNtfv#GiS4*B9pP}{9VJr7J9h7A zivkq{g;7w207gjwN(~A*UyRY&+uNO)otd4Po!Kv2TD%B=JIy9V9-)7dMGaVILhZc>lOmFlOsObEMJbBRX_Zbf zQO2G#DjVfwR_0Vb<%+rh=aKoVY}8GoaU{F0BH4X|cBDl;Sw`@FM-~EQ`6Nx#P038? zngL7pCCQwf`i`m+&&?`|d0N-viJ2I|)qsd%5Tf=*U{XXml&n$e{z>3ygd0_aOwA1- zglU;Rhh*j)64T{#c29e$xxX9x$O8 z8EQ~-|A8k- zT-E?dUjqV`HN7%@nZk$7mKul5G}qls?!)$EN%|}tuss-wCsZW@DIJOH8BIPB872H^+$V zQH&v3$O*BZFnKRXYDTfKkhQF#sBwF@K^z*1;8w!q7R7{?Fhy~pqb}luH7wC)m@vG7 z?sRmmcD#Ki&n|n)f%ZanDZ3l!+Y0pU1p4oU9xWX`wR!&5#g8s-zECG7#8-_)H@PpR+Nhvwh1f6eY5AKyAYUOF~W>YdyU zohkXxR2by{4Tal4@k96gLGt|pC=VrH(=@52$RcGvNVp^vwMJ+j=p6L{RVz}9bdGKo zc&HTg6qEqoxvm0dUsfkbbXvANwo^%zET2J&m#9f8#Sj>L2#mnJFj(|_+#sbXQjH0Y zz+$1ZJ{)yhJAh~m!T=Eo_N<;?zqodBJNQ_hzta*Zyt?%2>XCI}P1qRQX?Z$Nmjl7V z>r1bf+`W!3>#q{YHv1xf0ob45OZxxwWi#*PsHEL&z>+!uEXm&nz+scrK^vNL)S|6H zb<5EZyK2lAnH-Z0*7olIWm;xFU~3Vk5J(N}Q)@H)d2{>-DzGtn7RBi4ZiE)u9GfJe zX#j&-ZUf4J1~e?+qL+JFw8+6uCVBA3^c?4Cm?0Y49Sy+Q#KOx6w1jf(3cVa=fqRjK zb8-LcT#obvkv{xi?0>Sk^?)S_+yHaOA*VZk%8s|`5GT#|kM9Q0R$kEhZM9sTK%1Zc#DIZ;N)e8pIC5EvBTjLHZ?B z4+2hAXNi#uZS^IDU`U!7ozR<`)sE?<>9W7AFu62Y?mSe?u4K#Y2a3Zh!{sNQygB`s z>33dPeR}=bwP$z2!Y6FG{owaLzSWzjD{aW*D|nVXZz|W$=RG^_pI3O~4-`)R@nm`M zshe}x=f3J5*$8gFeCxu;7gnEJpLlno)IE}a5r(qAwJ^RkzTy9>XX4sZo7CpnThWiA ze-HfW<=Z_IrL!0E<2(M}R@~@N?|S!I_rEyyQ0q4x;c{nJ@$y@jul2t_@ZP|NbaVFl z?CrzHDjW)iitZKnYXADc+Q3%Z;R*x%`7;#+zls+@afFktCO!&f7Z{E@zit7XoQ^-XuG^GcnuRr zauM=CC67X^m>H}scvNW2mw+@au=tlC4wHU^Pr|U^Ps@WNk|~YBn@|yW(tGU?ik4Rt zQ@Wf{6;ZVOqWHUvq&gG~wFTJ?0iS@`b!69ZJII)n0(&6U(jcp=XpiQ&7pT2fhI{0D zZxN7^zuiBh LLU$1iwum19)r+1| literal 0 HcmV?d00001 diff --git a/excel_import/__pycache__/schema.cpython-312.pyc b/excel_import/__pycache__/schema.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f14dc233b9b9ed1eb574bfbf901eac211f7b7ace GIT binary patch literal 4658 zcmb_gTTCO@8J_XjGvhnP7ua?QX0rsmH+HkxbaUC=8Za!8fh@4u#0{}#J+olrvE4Ic z7O0()C{@_9(t=nCqAC)RBE_pjD!n|UPt8N6Y?X)DF)4KFN|D-^eOqNyCEb_yKVy#v z5NV~RNAkJ+w{y;a=KSA({E6LeA)vJM7!%#l`x_o;#Zc{#4<@0rO7H|vCJB*@kR(D0)Of_$(J^(v7g5-NFjl{d(BorG~W504Vq%AS%i?;*f2XLuj42nw>u)@qL+ z-mi5=iT`01yJkgicg^C|=-+7;mu5w8cg^C~=-+7;uVzJWcg@nI(e+t!2Dw?IPIZD` zKZg_izDChoO(W5w)3hE>YNwK?8WB_1W@LmKrkfhdxAiMaXpbz+8kiMUP%Ohfm0 zU?Xe`;ZOe_4rzgA#l*|A89^~*c!lCJam9p(l46fb(W?*|qAD9^|H7<*}boy#Sh(T21Q96_26yv18!Td=frC8;3 zT~RTIW@17H2RDU+nH8Fo)YVdwj1)@bW)K7BkPWJUNuebOV6b+_G>2l79EyUy33(DV zs#8jwm`Osw11-#u@Y?;jJPdVBrbVHPj|*t3>v@!(K%6MTAe)-#iY2)$FLVj8j^y#U zm`NkKI*8T=QfyKXxf7XL#T(6VDV~FU<26QMAJUJL916fZ*nLtEBG^2!ZEL(0z7d`e zRT%S|)2~gxnR_j_#k7@}w)OT+rnBPoEzTC%=3U&wCFbz@k%!MHBtxbL8E}K0W@&l!qf64LE624Zt&cv zpnWLe!~8?TBmE@tF=_6%Qy<$%sADyB0471;lU0%F8~EB)WZKD9MRe>))FV@k0gZO6 z@??%oU~7Ad9{12W-jE}w33N=xU!qz-XCDOP>j=hE2S6@it%>hwc%yd5wHD%o2{5#d zReDq(g~-6uFtbJ82dRL^sJuG{=v-2wdmv z^egXBeZCKZ!Ff1U9$uVUcIh>!2yBt-_`8eiQZ?)NtMhbRZJfO~GR zC!LxWknHFDSqP2CrEDgXj0?OU$0t98P)O~;qCwhx#!SWs6RBKP|#86 zq058ap%HW%L?O$(JQx}o=uw#7P|v_%F#M1_reYEpF=vnvi%U4zo1_?*6u^|JFFYIs z%Ojy+SfP7|FL#GSAj(Q~`;Pa5s0uh)@_waBy6dY2FxHH#$IW?6^ zUr+hfup7sYC6ccy^=g8O>8O0*W9FnE$ali~eUad(2e*x+sm9JwB!Z88i z13^|?VZ=ii*bu(>Rj{pktJm##Ai%f_2?;-f))L16yosiES;jZS%oxhQ*k&G_c7$vVG?4 zhrwUHc-y^nY4ytTm9^28m)F~tqg&piCGXKfPuY8N({<{j(-8Qb-lc1cuNK)BP%qjJ zmY9PTr+aaF!LE)wOFA-X_AhQSE$goL4!(1+5Z`Qrd43=MUHI;S&1Xlqt&XL(8)vsY z-qrr){!Ei~uV)LF-kEv#++A`3Ed(p>@2y`dT-|h^D%wtID5x&2qr`O7fKMH_mzZ`9 z>wvJ(_xewE?xeNEv}*bli>KJswP7ygi{5i(%lV@5`~z?<5!(p~IJCeR-gn?~028v8 zxJAVbaUy0I4-!P4%2E40d?fN_18{JKI4DZRw_Fszf-pG`Sh3v_uV$K;FDLd~-vL4*sP;t}>PyoL?@HN{aCjmFZcYw-yQ zwF65bV~VYQyF@O7MlqEFQH6ynn&RMvY{vOpRLzI+y;iTLem(71v+*PkX7PJlLN9~F zIrvF;pn^2-nSnpK`3>&wrBAPXas@J^t?^iSJhl{Ey|{ewGf(F_`5yZYTWI@b$DNLn z=TySfDCy-;%U)c7fftx|eb#>!StR#*3eIp06|q z3dc5PN=JTJZVuiU_7{B!p7eLEPu z?;vdM`JsP)=_Nb@DdF7VXRhup>Qd+{aR6M(Cc8NUn^x_b1;fu#btQa>1cVs{8M zgb6s{K4hO2(|k55oJTPj;+#=B0Tm>kB>7k3&|e7m*9HS=_?m#~A8N%C+&mu!4+ PvYEW_)st4zt=juHsc+$E literal 0 HcmV?d00001 diff --git a/excel_import/cli.py b/excel_import/cli.py new file mode 100644 index 0000000..67afa75 --- /dev/null +++ b/excel_import/cli.py @@ -0,0 +1,87 @@ +from __future__ import annotations +import logging +import sys +from pathlib import Path + +import click + +from .config import ImportConfig, SheetConfig +from .importer import Importer +from .reader import ExcelReader + + +def _setup_logging(verbose: bool): + level = logging.DEBUG if verbose else logging.INFO + logging.basicConfig(format="%(levelname)s %(message)s", level=level) + + +@click.group() +def main(): + """Generic Excel-to-database import tool (Oracle & PostgreSQL).""" + + +@main.command() +@click.argument("excel_file", type=click.Path(exists=True)) +@click.argument("config_file", type=click.Path(exists=True)) +@click.option("-v", "--verbose", is_flag=True) +def run(excel_file: str, config_file: str, verbose: bool): + """Import EXCEL_FILE using CONFIG_FILE (YAML).""" + _setup_logging(verbose) + cfg = ImportConfig.from_yaml(config_file) + importer = Importer(cfg) + try: + results = importer.run(excel_file) + except Exception as exc: + click.echo(f"ERROR: {exc}", err=True) + sys.exit(1) + + for table, rows in results.items(): + click.echo(f" {table}: {rows} rows imported") + + +@main.command() +@click.argument("excel_file", type=click.Path(exists=True)) +def inspect(excel_file: str): + """Show sheet names and column preview of EXCEL_FILE.""" + reader = ExcelReader(excel_file) + names = reader.sheet_names() + click.echo(f"Sheets in {Path(excel_file).name}:") + for i, name in enumerate(names): + click.echo(f" [{i}] {name}") + # read first few rows for preview + from .config import SheetConfig as SC + df = reader.read(SC(sheet=i)) + click.echo(f" Columns ({len(df.columns)}): {', '.join(str(c) for c in df.columns[:8])}") + if len(df.columns) > 8: + click.echo(f" ... and {len(df.columns) - 8} more") + click.echo(f" Rows: {len(df)}") + + +@main.command("generate-config") +@click.argument("excel_file", type=click.Path(exists=True)) +@click.option("--dsn", default="postgresql+psycopg2://user:pass@localhost/dbname", show_default=True) +@click.option("--output", "-o", default="import_config.yaml", show_default=True) +def generate_config(excel_file: str, dsn: str, output: str): + """Generate a starter YAML config from EXCEL_FILE's structure.""" + import yaml + reader = ExcelReader(excel_file) + names = reader.sheet_names() + + sheets = [] + for i, name in enumerate(names): + from .config import SheetConfig as SC + df = reader.read(SC(sheet=i)) + table_name = name.lower().replace(" ", "_") + columns = [{"source": str(c), "target": str(c).lower().replace(" ", "_")} for c in df.columns] + sheets.append({ + "sheet": name, + "header_row": 0, + "target_table": table_name, + "mode": "append", + "columns": columns, + }) + + config = {"dsn": dsn, "sheets": sheets} + with open(output, "w") as f: + yaml.dump(config, f, allow_unicode=True, sort_keys=False) + click.echo(f"Config written to {output}") diff --git a/excel_import/config.py b/excel_import/config.py new file mode 100644 index 0000000..3433cc5 --- /dev/null +++ b/excel_import/config.py @@ -0,0 +1,48 @@ +from __future__ import annotations +from dataclasses import dataclass, field +from pathlib import Path +from typing import Literal +import yaml + + +@dataclass +class ColumnMapping: + source: str + target: str + dtype: str | None = None # override detected type, e.g. "VARCHAR(100)", "NUMBER" + skip: bool = False + + +@dataclass +class SheetConfig: + sheet: str | int = 0 # sheet name or index + header_row: int = 0 # 0-based row index of the header + skip_rows: int = 0 # rows to skip before header + target_table: str = "" + columns: list[ColumnMapping] = field(default_factory=list) + mode: Literal["append", "replace", "upsert"] = "append" + upsert_keys: list[str] = field(default_factory=list) # column names for upsert PK + + +@dataclass +class ImportConfig: + dsn: str # SQLAlchemy DSN + sheets: list[SheetConfig] = field(default_factory=list) + default_varchar_length: int = 255 + + @classmethod + def from_yaml(cls, path: str | Path) -> "ImportConfig": + with open(path) as f: + raw = yaml.safe_load(f) + + sheets = [] + for s in raw.get("sheets", []): + columns = [ColumnMapping(**c) for c in s.pop("columns", [])] + upsert_keys = s.pop("upsert_keys", []) + sheets.append(SheetConfig(**s, columns=columns, upsert_keys=upsert_keys)) + + return cls( + dsn=raw["dsn"], + default_varchar_length=raw.get("default_varchar_length", 255), + sheets=sheets, + ) diff --git a/excel_import/importer.py b/excel_import/importer.py new file mode 100644 index 0000000..478e9c8 --- /dev/null +++ b/excel_import/importer.py @@ -0,0 +1,127 @@ +from __future__ import annotations +import logging +from pathlib import Path + +import pandas as pd +from sqlalchemy import create_engine, text, MetaData, Table, inspect +from sqlalchemy.dialects.postgresql import insert as pg_insert + +from .config import ImportConfig, SheetConfig +from .reader import ExcelReader +from .schema import build_columns + +logger = logging.getLogger(__name__) + + +class Importer: + def __init__(self, config: ImportConfig): + self.config = config + self.engine = create_engine(config.dsn) + + def run(self, excel_path: str | Path) -> dict[str, int]: + """Import all configured sheets. Returns {table_name: rows_imported}.""" + reader = ExcelReader(excel_path) + results = {} + for sheet_cfg in self.config.sheets: + rows = self._import_sheet(reader, sheet_cfg) + results[sheet_cfg.target_table] = rows + return results + + def _import_sheet(self, reader: ExcelReader, cfg: SheetConfig) -> int: + df = reader.read(cfg) + if df.empty: + logger.warning("Sheet %r is empty, skipping.", cfg.sheet) + return 0 + + logger.info("Read %d rows from sheet %r -> table %r", len(df), cfg.sheet, cfg.target_table) + + with self.engine.begin() as conn: + self._ensure_table(conn, df, cfg) + + if cfg.mode == "replace": + dialect = self.engine.dialect.name + truncate_sql = ( + f"DELETE FROM {cfg.target_table}" + if dialect == "sqlite" + else f"TRUNCATE TABLE {cfg.target_table}" + ) + conn.execute(text(truncate_sql)) + rows = self._bulk_insert(conn, df, cfg.target_table) + elif cfg.mode == "upsert": + rows = self._upsert(conn, df, cfg) + else: # append + rows = self._bulk_insert(conn, df, cfg.target_table) + + logger.info("Imported %d rows into %r (mode=%s)", rows, cfg.target_table, cfg.mode) + return rows + + def _ensure_table(self, conn, df: pd.DataFrame, cfg: SheetConfig): + insp = inspect(conn) + if not insp.has_table(cfg.target_table): + meta = MetaData() + cols = build_columns(df, cfg.columns, self.config.default_varchar_length) + table = Table(cfg.target_table, meta, *cols) + meta.create_all(conn) + logger.info("Created table %r", cfg.target_table) + + def _bulk_insert(self, conn, df: pd.DataFrame, table_name: str) -> int: + records = _df_to_records(df) + if not records: + return 0 + meta = MetaData() + meta.reflect(bind=conn, only=[table_name]) + table = meta.tables[table_name] + conn.execute(table.insert(), records) + return len(records) + + def _upsert(self, conn, df: pd.DataFrame, cfg: SheetConfig) -> int: + dialect = self.engine.dialect.name + records = _df_to_records(df) + if not records: + return 0 + + meta = MetaData() + meta.reflect(bind=conn, only=[cfg.target_table]) + table = meta.tables[cfg.target_table] + + if dialect == "postgresql": + stmt = pg_insert(table).values(records) + update_cols = {c.key: stmt.excluded[c.key] for c in table.columns if c.key not in cfg.upsert_keys} + stmt = stmt.on_conflict_do_update(index_elements=cfg.upsert_keys, set_=update_cols) + conn.execute(stmt) + elif dialect == "oracle": + # Oracle MERGE via raw SQL + for record in records: + _oracle_merge(conn, table, record, cfg.upsert_keys) + else: + raise NotImplementedError(f"Upsert not implemented for dialect: {dialect}") + + return len(records) + + +def _df_to_records(df: pd.DataFrame) -> list[dict]: + # Replace pandas NA/NaT with None so SQLAlchemy handles nulls correctly + return [ + {k: (None if pd.isna(v) else v) for k, v in row.items()} + for row in df.to_dict(orient="records") + ] + + +def _oracle_merge(conn, table: Table, record: dict, keys: list[str]): + key_clauses = " AND ".join(f"t.{k} = s.{k}" for k in keys) + all_cols = list(record.keys()) + non_keys = [c for c in all_cols if c not in keys] + + select_parts = ", ".join(f":{c} AS {c}" for c in all_cols) + update_parts = ", ".join(f"t.{c} = s.{c}" for c in non_keys) + insert_cols = ", ".join(all_cols) + insert_vals = ", ".join(f"s.{c}" for c in all_cols) + + sql = f""" + MERGE INTO {table.name} t + USING (SELECT {select_parts} FROM dual) s + ON ({key_clauses}) + WHEN MATCHED THEN UPDATE SET {update_parts} + WHEN NOT MATCHED THEN INSERT ({insert_cols}) VALUES ({insert_vals}) + """ + conn.execute(text(sql), record) diff --git a/excel_import/reader.py b/excel_import/reader.py new file mode 100644 index 0000000..05d7619 --- /dev/null +++ b/excel_import/reader.py @@ -0,0 +1,49 @@ +from __future__ import annotations +from pathlib import Path +import pandas as pd + +from .config import SheetConfig + + +def _engine_for(path: Path) -> str: + return "xlrd" if path.suffix.lower() == ".xls" else "openpyxl" + + +class ExcelReader: + def __init__(self, path: str | Path): + self.path = Path(path) + if not self.path.exists(): + raise FileNotFoundError(f"Excel file not found: {self.path}") + if self.path.suffix.lower() not in {".xls", ".xlsx", ".xlsm", ".xlsb"}: + raise ValueError(f"Unsupported file type: {self.path.suffix}") + + def sheet_names(self) -> list[str]: + engine = _engine_for(self.path) + xf = pd.ExcelFile(self.path, engine=engine) + return xf.sheet_names + + def read(self, cfg: SheetConfig) -> pd.DataFrame: + engine = _engine_for(self.path) + df = pd.read_excel( + self.path, + sheet_name=cfg.sheet, + header=cfg.header_row, + skiprows=range(cfg.skip_rows) if cfg.skip_rows else None, + engine=engine, + ) + # drop completely empty rows + df.dropna(how="all", inplace=True) + + # apply column mapping: rename and drop skipped columns + if cfg.columns: + skip_sources = {c.source for c in cfg.columns if c.skip} + df.drop(columns=[c for c in skip_sources if c in df.columns], inplace=True) + + rename_map = { + c.source: c.target + for c in cfg.columns + if not c.skip and c.source != c.target + } + df.rename(columns=rename_map, inplace=True) + + return df diff --git a/excel_import/schema.py b/excel_import/schema.py new file mode 100644 index 0000000..017de04 --- /dev/null +++ b/excel_import/schema.py @@ -0,0 +1,72 @@ +from __future__ import annotations +import pandas as pd +from sqlalchemy import ( + Column, Integer, Float, String, DateTime, Date, Boolean, Numeric, Text +) + +from .config import ColumnMapping + + +def _pandas_dtype_to_sqla(series: pd.Series, varchar_length: int): + dtype = series.dtype + if pd.api.types.is_bool_dtype(dtype): + return Boolean() + if pd.api.types.is_integer_dtype(dtype): + return Integer() + if pd.api.types.is_float_dtype(dtype): + return Float() + if pd.api.types.is_datetime64_any_dtype(dtype): + return DateTime() + # object columns: check if they look like dates + if dtype == object: + sample = series.dropna().head(100) + if len(sample) > 0: + try: + pd.to_datetime(sample) + return DateTime() + except Exception: + pass + max_len = int(series.dropna().astype(str).str.len().max()) if len(series.dropna()) > 0 else 1 + return String(max(max_len + 10, varchar_length)) + return Text() + + +def _override_to_sqla(dtype_str: str): + """Convert a user-supplied type string like 'VARCHAR(100)' to a SQLAlchemy type.""" + s = dtype_str.upper().strip() + if s.startswith("VARCHAR"): + length = int(s.split("(")[1].rstrip(")")) if "(" in s else 255 + return String(length) + if s in ("TEXT", "CLOB"): + return Text() + if s in ("INTEGER", "INT", "NUMBER"): + return Integer() + if s.startswith("NUMBER") or s.startswith("NUMERIC") or s.startswith("DECIMAL"): + if "(" in s: + parts = s.split("(")[1].rstrip(")").split(",") + p, sc = int(parts[0]), int(parts[1]) if len(parts) > 1 else 0 + return Numeric(precision=p, scale=sc) + return Numeric() + if s in ("FLOAT", "REAL", "DOUBLE"): + return Float() + if s in ("DATETIME", "TIMESTAMP"): + return DateTime() + if s == "DATE": + return Date() + if s in ("BOOLEAN", "BOOL"): + return Boolean() + raise ValueError(f"Unknown dtype override: {dtype_str!r}") + + +def build_columns(df: pd.DataFrame, column_configs: list[ColumnMapping], varchar_length: int) -> list[Column]: + override_map = {c.target or c.source: c.dtype for c in column_configs if c.dtype and not c.skip} + + columns = [] + for col in df.columns: + col_name = str(col) + if col_name in override_map and override_map[col_name]: + sqla_type = _override_to_sqla(override_map[col_name]) + else: + sqla_type = _pandas_dtype_to_sqla(df[col], varchar_length) + columns.append(Column(col_name, sqla_type)) + return columns diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..28cae8b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,24 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.backends.legacy:build" + +[project] +name = "excel-import" +version = "0.1.0" +requires-python = ">=3.10" +dependencies = [ + "pandas>=2.0", + "openpyxl>=3.1", + "xlrd>=2.0", + "sqlalchemy>=2.0", + "psycopg2-binary>=2.9", + "oracledb>=2.0", + "pyyaml>=6.0", + "click>=8.1", +] + +[project.scripts] +excel-import = "excel_import.cli:main" + +[project.optional-dependencies] +dev = ["pytest>=8.0", "pytest-mock>=3.0"] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/__pycache__/__init__.cpython-312.pyc b/tests/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b5ddee538e1801ea84466f7a1ad96884cacf5c30 GIT binary patch literal 161 zcmX@j%ge<81QE_GSs?l`h(HIQS%4zb87dhx8U0o=6fpsLpFwJVIp}BP=cekXWTqBn z>jxC&rxzvW<^nmTdFlGeIfnw}S#wWTN*1@K_D zx~jS!-Ceak-Sy{eHqF69C#A9}aopdL@Sm86JY59jbB=I?H@GR@ZH0{EiKHh;Oi#>6Bz{kN=ygnGNdkBR zN#2vDa`U{BdV)PS72c6P%_YNm`YSDYdOCi~bGGmkPU*V2`+B`*TC-F)KQhkL&G}-jK07i#G}IWcziHHp znlT66LuA@XT&Gs)j&4u^tUC#fQBH#DbwexaPS)$MHYydJI&lzCt8`cX+4T=6-@X3f zsf*_oMRek3twBLLZfSH@x11QU=Igo>yG8Yq>BQ>TC<4uc584Gli8(QIP6x^KoURd_ zs#(Wq2G+YyOb1y87EF33eUFoW}2#ee~%Kc*#1S1tXn zdI{xclo9OKMB^HFCPe{eXNQg-SAZ>A&uJI5&naY~_9Bu?q)j#Gcceup?&!EWr2l14Q;#NaGinT}aT za+qYzjGZYT^0O^$VNb}v(4NqR`$#0$x}Q7OqW|;`xn^hl89sAQ2qYo5x}On=mv2wV zl#hgQ@G*!-&kMz)_q%vRGT=q_=uG*TABC`mc=WwcJYM-O9{+PZ3Sk_4>>IzC2VU@- z`RYsm&3xH+*8lHsW~SUB%e-%9*3Ons_?Z{B%>+5v%-IP$=bq3+TQnCVx!(DFCc^ow zA9ccSW<;mc86)d|2cIGLyz8Iy?U6&?X*W*}lOuNCJBy^B`z(_7E%~i?7Rkan#66ER zbMu^X^d`lnQ21S8VnX2;&b)T2K0l(E>0f$CVWNQ8dBq%r>tMQBm|%>Hib-)RhP$8$ z-3UeyTwKU$rm0iQRU1=GWR5+-TM?N55!wP(W*(5x{3+}j^}7J^x`x3syMv~iO6dPG zS~m2m5~HtyZS*pXV3fY$k(U?pk&z6;4B$pOKYZT3c~dMQz<|zq)9f@~*K6nzXN;dL ziZvEcz3?rn(Tnw73wS95<+IA`62}BRnv*Pg%i^`sG4)NhCy6Wb=Ig&Yld3YD!P+WO>IU; z3r_C3n{}mH^=(RP)ag}cYP6zRs(u#=D^<;c9*SpcdK!TnNWAXRR}tXViFN}}y4)MS zo7#?y!(llz93?J9iBU!{ZB2_+6+-B zE-hVQ+zqr9hzAG=gR^b$=;0@#vHqeGO~4ADof zFk^3^xj;TZKpLkvzC6Azomc^XCzi%nz}<;$3EwS7Ga`U3;DR?4*bLD}D9gATXe$s8 z5D;$X`|dZFo8Sim-`vD$o>=P!fYa;_{66*=fht45F~;6=?4yAYxLs-l-9C!rYBDYe zfS&+g>kd={1ZIp2GO;|d0>e4HE{&|qhqvY9E%`X$Yqwp-@wMA6d1PC{cMBze!Kh6p z@AhoCJ&X^yFTxlMK9)u(9%z)jll1pOdIA5&{VLl%2icwkJK7-C-9oIB3RqZ5c6oFJ zbrM*FHA>SIYm`o$8K8Knb$Z=;tIF!FgRIEPw(Z1B=`IxAS&5b6M)3#E9h1AA>?VHz zpQwhRU!^~WF8s8c?*iBnd7l3($KB($xPxDD*{?YHAKY1}$O_5Nj(u`$N9f_ZR=&5x dL9=$@pV&MVIibtt@0?EYqia9?7YB{&>ECEizlQ(- literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_importer.cpython-312-pytest-9.0.3.pyc b/tests/__pycache__/test_importer.cpython-312-pytest-9.0.3.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c973a2445bf75dac84b10cda023819a8b12526c GIT binary patch literal 6857 zcmeHMU2GKB6`sGH+1=S6|A9^T$zl@1q_K^Qfj~om7#tG7kdR2lRkdCAj?FH!Ke;mw zHl0Y5j^Cf4}DBqDKG7VjZN)NQYBJTRlGSdQY1h1oIAfZn>y)p zukADE-Z}T&nK?7x`R?&w(&;1ztuZK-e;enxe`3dPF;7{Z2jwP5IKms;2+xi}MJx(D zSCK|yJf0B@xuT3HOy`aGNP<}-vymjPawDBY1=&SXAUBaT z$R{RwE%OjP4Qsq3oh?`wHNi<2sa~-3yj~qERdq+Q^a)E7ozyFpdW~9xwd!bT%u&x> z)OEj;9;_MTmFnq2yQ1$~CnQZe!zNg&v{PCR4pX0Rn z@RbB9ScTpR!<^8>hv?%Wnmokog4>QAHA+R@5nrlZaAJc6S~E0p%=LQyh^w@eJmnL5 z1ZD^&;z-p(MR(*vr8Zu*hVJlAtW+gBEZ_)r;v|m2ibE7l;#MuMPZV|VO7S&xB=)cb zGHT)l852enRiEY z+FPG=5_pIC(UPIlZQvOfGY^B9;=WYdZ(N#Hw@)4WJfU9Sa&3!~>2z8&r|p@q6{%BR z=A@)D_0m$3iz)A&eD~xH>DKY7lk-ydqMDsLxePA2RY8Ss$PMZ~P;PQzC<$-jJ+Vdv zA`(s1OO?bTZx?UxB zAV?;@jZVrc&@tW0TZIb-yttJb(eDV3NX)7uGYeA_D86%40ip5iL6{iw9Nmhp)9#x2 zB74c{k6>&g{F(SUPH|tUor|g5SEW=xu+v@Fl1|*E%__2V;A%7;2RXK!0HW!$V9P~63=>Z3%+Q# zMLebavG8ceeNX=JR{*pVd_#gi8SYppXF_XVE4TVuc=Um9FV{%eiE?{*#@DP3zD6Ef zwcP2S3y;3X27)>K_^&Dex6qK^N!l`tg=(wjZz6ZuJQga%B4NXvFcu_fVQ5(^KjCZP z(O>^<&fF03za_NrH9P4)JsP!a7U96M%ES8M8`R8J_Pr9-i*kyjNZL+$QBHsFD5rf- z{?UuF+K?x?9b7+W@|Pj4p67V@`S9Zj@jQ1K($p~RhFhdr5O??$;n1Nw{FSz+chx8R zH8XeUP!81r&BX5=#!w8bWD2IKQ;VJ3r<}K>w2%2r3tt^M+9hgl1J*3h{X8LS+t|Z&FEQ@|KYbHypMpO#1sFcXeV*;Sm7dG)oI0_X*?8T!X52b3mw9^X zx|Sr-3nqx-8$Vmt9CC)`1`;caC>vC=j!*--c_R#OukICUVrl1lM9L5 zY$7+8=w9OZ*hzkxEN0uU+t=*d$7Z%4p4)O{E_-w#dtx?wVlMmY`_i`}8hzQ`F(Y-n zKX9Y}+JObB1AZU4UAGg+SsZnHbD=sfb=>F=Yy#!J)bVW$y}K8(Vj{f$X}>m<6+g|! zhPsqh`3S#nmXAJMlaIb{f^G;QTG)dB9XB9Dl`+??X<-fn&b5w319B7*S(b|t1#(-= zLsrNqkmd06Vy(<5yWewXs${*nZdJkDHX(yXytuUvM$56MtIrQXf zewJydk7YTfp}eEo3d?dSI|W&8y;!6m78x7ngs~u53uzW>Wh9|%n!i5Fa-sj+8Z+qH zs{iyL%Y~NBA{QK<3R*mp+f$Pf)x7LTe4_FUwW* z5T1Gw1?~mt5fn#Jyo6#9#W57eQDCZ}LnvNGaRS9FARzH!G~Er1SR5UruVVKkic=^~ zqZmeU2E|zvuc5%qKwn334#gWFOuWAc{G;lofc9t#@Sn?hu@71!yu4^%1C;G(?^gAPoSSZ$dNuj|H|kB5rY-!F)r8`LYic z4Fyt!@;29q+i~-cRvK=FwPsrd*Z{==lwvp|1f|HFW^f%O2qIg-Mxxx|uM-~qD`Mak zUI*?U*R*hJ1>hB0Hd{T|gaLTT>(~YySE4(D11@bp03;YQ2!lc= z2^D~XQM{;ECh5;`6d};bu32dH3Xc=Y3 z&%4~-W!e3^O%?p3}O{mjzHnRr0%~1S$_WJ;a?w~ zQ+G@qTU3Fq_TG{fx}KZudJa^Ob6vflynkQ@j%x3Mguf5G0k=1YJy#8+>xLP_Y4FI@ zd8v1%>p54ykGkhCm1-rcp0Z5Cq;h7g4%_^}u;&wzP_gv3Qn+B8P(YVo~Z*6b^D?}EMA zt(+DqMv0V-lnA#%s+%7z79@@Gqsrg>?jM_7f_p>MMtw;9suM?v#IK$+v-e@M7)bJ~ z>ec$pnVB=^&Yj2k&YAIFfmFmLyna|18J-=YrWGFOE0D##pH;&n@ERB*`G*aVE)dpuB)Qt3Tjaa|Fw)!LMV zqA+UCgrjkhSKQY<*J}EBuE@W@MZALrpRQ_p_gG3Bi-?9a(AyiObUUSeQ09IqsU%e0 za6cW#bTA?&UTyi{=`Zd+b|}I0cO6PVzR>c)e-6%wMsR@W$#ZHdlTW8rV#tTpf|?vY zo5^PqhU*kjliKka-f$%|ii)oka*E+Q8rS2;Nj$B>h-G6c4roLqI6j6%hfh#OG#skV z&d$3>dd_CkYLAjs$+@0WB%6SSX~@6}dWKW+yrT9{4PD7}E=%+t{6LM;7@@P&of|iN z_{Fi2WJ)D#K`$<9?I4f|?(cGQ`P`hmapLH0pZwOkOY4lbs1c4DO`E^;NMZLPC;2@S zhZp>u%k%ol*G`tDsbdo-tJ0=B7j%NTpbu%o7c|ljb$mV93B{}7PkR^01a~{QW~!|cd~D+Qyxeq4?wFH1 zW~DBm6?p?N(hsq6tGHQaoo!+jz@1cC}7VY)R;W_W`oDZChEVZQFd7ZSyO#!?v}Wqvv=duKZ=seQjIKu7Pi{ zW?2)+mP8$wDSe7opGz+AQ3CJrC2!GH^is6)6@6u1^E>)b2YF!i_+a$}v6Ta_LJvJx z&b3f)^&sHW=b=~6JuP7FLjN8g&H?zv z;RN4)XJ(I6te-@&A$25b*c{blD5^m-CJde%C_Lkr#LmRE%NL_|HR?ja%hsg(LXDN%W)u+{pI+j{kNnr{BE)=%UE*%l^BZdKvort$@4nZ$umTkW zcHVajNfqge?K<2)7cH&1EdYjsO6Z|`)auY*r$_c^ffKW0Pi#p9!UxFO@UpT_w>b^~FDyXw zQh4#!;KgS<8ICuuv&Y`#iat}5nbzRN1<>Ltxhb?18q{=7ACHmj1;oDR41U1i4<9Fph!l@VKXSW)+(CGv_(S$1hqf03GcBLe$x~Qz7zrBiNCUyu zLJ%Pdjy*Np zs-+TK51~}B`K{BJPG4!i8vVGh+IFxKJT!3}jrI9A_RUJ|QyYOy?wi^;hXxCOH(8!# zEIKE(qqRnXip^TpH=xmZtJ-9%sEU<>z|=-d++slDUMq-0g|5ZopMk+kjp*{x{vL%gNG8l6%K!- zdPg_R&YkB=H9=0}S5V}UkfT6^ALWQ0Xt1y>fUshR z89$2_P>&%Fk0cldAsRW||6T?|h)T{7VHfFxlDe1_wlRrCCveJ$XP_hlEwD@4HX06m zE=6zqTjxU!^Q~*X2((Ra_;^h{xDn(>{;+gP5!SvUfDfy0&ZtF-Oi!= zZ|7CB(&nj6zuWp7*kPL|`=?-sZN4Sp?x!Spc$mYtk4C|0bXnBIlM{ka+R^4vK07#HO<{sP>t zzYMLgWK4XSyTC_;!9sgaCYw>m;BZOJ=skFq+JkqVEZ~t0H@p0Sm?fUnRLy8Q4wZvh z{dhK?QRuY|4O3<~`hhHFXr_~gT4Df0aH#{LE;_6s`~&E8o<+b#DF$fE9>qF! zfT63ICypDnhSnChF9Q_ux?~O|9da~`^sz5BA8E*4g9Wbwm)YIJ+WCSfzU_=Zx^255 z|0JX3b9fo8D&RD@;1+E7Y{X^wFOa0J#&mT|CmOt#;6DNw{-4HEdDR~Mh`?^b`4J7{ zP4l>n#cp|9GhD2}ENgJBMt%u$r{GV+_!D9`Z)17yoVR0QU|w>){@iQNmDg6Kb+t)) zr*O5S>g&hZ2*WIu2i^?f9EEX?!nE`*J+J#HFyIkC@wf&WJ&`uUYhDV{6Go#g##T}{ zJEnv_bQ6_+o#C^J3{S0$`WWClWepE5$yD+T!3l%q7|$qijYY~hAj@1AcIC+;=Ok29 z|Hbf(B*)-?E7(Ooz0#v6hctktP6I*1LYF8V7=l~UUeI^F`_ow^pHiP9lTd*xODh9e z5P6>e8^>Mbe(n35Tk{#${23Sg7x(1n+>;A#PVm0I{s1;4~U!%yk(-vf{?ggpGZt1XKhBnvGezH1sPAX(TX^N&xzxX3~B U)jEOi!)D>DEnh>cFzxYw0egkUvH$=8 literal 0 HcmV?d00001 diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..7556060 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,39 @@ +from pathlib import Path +import pytest +import yaml + +from excel_import.config import ImportConfig + + +@pytest.fixture +def config_file(tmp_path: Path) -> Path: + cfg = { + "dsn": "postgresql+psycopg2://u:p@localhost/db", + "sheets": [ + { + "sheet": "Artikel", + "header_row": 0, + "target_table": "artikel", + "mode": "replace", + "columns": [ + {"source": "Artikelnummer", "target": "art_nr", "dtype": "VARCHAR(50)"}, + {"source": "Preis", "target": "preis"}, + ], + } + ], + } + path = tmp_path / "config.yaml" + path.write_text(yaml.dump(cfg)) + return path + + +def test_load_from_yaml(config_file: Path): + cfg = ImportConfig.from_yaml(config_file) + assert cfg.dsn == "postgresql+psycopg2://u:p@localhost/db" + assert len(cfg.sheets) == 1 + sheet = cfg.sheets[0] + assert sheet.sheet == "Artikel" + assert sheet.target_table == "artikel" + assert sheet.mode == "replace" + assert len(sheet.columns) == 2 + assert sheet.columns[0].dtype == "VARCHAR(50)" diff --git a/tests/test_importer.py b/tests/test_importer.py new file mode 100644 index 0000000..1762b0a --- /dev/null +++ b/tests/test_importer.py @@ -0,0 +1,80 @@ +from pathlib import Path +import pandas as pd +import pytest +from sqlalchemy import create_engine, text + +from excel_import.config import ImportConfig, SheetConfig, ColumnMapping +from excel_import.importer import Importer + + +@pytest.fixture +def xlsx_file(tmp_path: Path) -> Path: + path = tmp_path / "data.xlsx" + df = pd.DataFrame({ + "id": [1, 2, 3], + "name": ["Alice", "Bob", "Carol"], + "amount": [100.0, 200.5, 300.0], + }) + df.to_excel(path, index=False) + return path + + +@pytest.fixture +def sqlite_config(xlsx_file): + return ImportConfig( + dsn="sqlite:///:memory:", + sheets=[ + SheetConfig( + sheet=0, + target_table="persons", + mode="append", + ) + ], + ) + + +def test_import_append(xlsx_file, sqlite_config): + importer = Importer(sqlite_config) + results = importer.run(xlsx_file) + assert results["persons"] == 3 + + with importer.engine.connect() as conn: + rows = conn.execute(text("SELECT COUNT(*) FROM persons")).scalar() + assert rows == 3 + + +def test_import_replace(xlsx_file, tmp_path): + cfg = ImportConfig( + dsn="sqlite:///:memory:", + sheets=[SheetConfig(sheet=0, target_table="persons", mode="replace")], + ) + importer = Importer(cfg) + importer.run(xlsx_file) + results = importer.run(xlsx_file) # second run should truncate+insert + assert results["persons"] == 3 + + with importer.engine.connect() as conn: + rows = conn.execute(text("SELECT COUNT(*) FROM persons")).scalar() + assert rows == 3 + + +def test_import_creates_table(xlsx_file, sqlite_config): + importer = Importer(sqlite_config) + importer.run(xlsx_file) + + from sqlalchemy import inspect + insp = inspect(importer.engine) + assert "persons" in insp.get_table_names() + + +def test_import_empty_sheet(tmp_path): + path = tmp_path / "empty.xlsx" + pd.DataFrame({"a": [], "b": []}).to_excel(path, index=False) + + cfg = ImportConfig( + dsn="sqlite:///:memory:", + sheets=[SheetConfig(sheet=0, target_table="empty_table", mode="append")], + ) + importer = Importer(cfg) + results = importer.run(path) + assert results["empty_table"] == 0 diff --git a/tests/test_reader.py b/tests/test_reader.py new file mode 100644 index 0000000..416698a --- /dev/null +++ b/tests/test_reader.py @@ -0,0 +1,84 @@ +import io +from pathlib import Path +import pandas as pd +import pytest + +from excel_import.reader import ExcelReader +from excel_import.config import SheetConfig + + +@pytest.fixture +def xlsx_file(tmp_path: Path) -> Path: + path = tmp_path / "test.xlsx" + df = pd.DataFrame({ + "Artikelnummer": ["A001", "A002", "A003"], + "Bezeichnung": ["Widget", "Gadget", None], + "Preis": [9.99, 14.50, 0.99], + }) + df.to_excel(path, index=False) + return path + + +def test_sheet_names(xlsx_file: Path): + reader = ExcelReader(xlsx_file) + assert reader.sheet_names() == ["Sheet1"] + + +def test_read_basic(xlsx_file: Path): + reader = ExcelReader(xlsx_file) + df = reader.read(SheetConfig(sheet=0, target_table="t")) + assert len(df) == 3 + assert list(df.columns) == ["Artikelnummer", "Bezeichnung", "Preis"] + + +def test_read_drops_empty_rows(tmp_path: Path): + path = tmp_path / "empty_rows.xlsx" + df = pd.DataFrame({"A": ["x", None, "y"], "B": [1, None, 3]}) + df.to_excel(path, index=False) + + reader = ExcelReader(path) + result = reader.read(SheetConfig(sheet=0, target_table="t")) + assert len(result) == 2 + + +def test_read_column_rename(xlsx_file: Path): + from excel_import.config import ColumnMapping + cfg = SheetConfig( + sheet=0, + target_table="t", + columns=[ + ColumnMapping(source="Artikelnummer", target="art_nr"), + ColumnMapping(source="Bezeichnung", target="bez"), + ColumnMapping(source="Preis", target="preis"), + ], + ) + reader = ExcelReader(xlsx_file) + df = reader.read(cfg) + assert "art_nr" in df.columns + assert "Artikelnummer" not in df.columns + + +def test_read_column_skip(xlsx_file: Path): + from excel_import.config import ColumnMapping + cfg = SheetConfig( + sheet=0, + target_table="t", + columns=[ + ColumnMapping(source="Preis", target="Preis", skip=True), + ], + ) + reader = ExcelReader(xlsx_file) + df = reader.read(cfg) + assert "Preis" not in df.columns + + +def test_file_not_found(): + with pytest.raises(FileNotFoundError): + ExcelReader("/nonexistent/path/file.xlsx") + + +def test_unsupported_extension(tmp_path: Path): + f = tmp_path / "data.csv" + f.write_text("a,b\n1,2") + with pytest.raises(ValueError, match="Unsupported"): + ExcelReader(f)