Initial implementation of generic Excel-to-DB import tool

Supports .xls and .xlsx, Oracle and PostgreSQL via SQLAlchemy. Includes CLI (run/inspect/generate-config), YAML config, auto schema detection, and append/replace/upsert modes. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-13 11:31:47 +02:00
commit 8f7399de58
26 changed files with 663 additions and 0 deletions
@@ -0,0 +1,39 @@
+from pathlib import Path
+import pytest
+import yaml
+
+from excel_import.config import ImportConfig
+
+
+@pytest.fixture
+def config_file(tmp_path: Path) -> Path:
+    cfg = {
+        "dsn": "postgresql+psycopg2://u:p@localhost/db",
+        "sheets": [
+            {
+                "sheet": "Artikel",
+                "header_row": 0,
+                "target_table": "artikel",
+                "mode": "replace",
+                "columns": [
+                    {"source": "Artikelnummer", "target": "art_nr", "dtype": "VARCHAR(50)"},
+                    {"source": "Preis", "target": "preis"},
+                ],
+            }
+        ],
+    }
+    path = tmp_path / "config.yaml"
+    path.write_text(yaml.dump(cfg))
+    return path
+
+
+def test_load_from_yaml(config_file: Path):
+    cfg = ImportConfig.from_yaml(config_file)
+    assert cfg.dsn == "postgresql+psycopg2://u:p@localhost/db"
+    assert len(cfg.sheets) == 1
+    sheet = cfg.sheets[0]
+    assert sheet.sheet == "Artikel"
+    assert sheet.target_table == "artikel"
+    assert sheet.mode == "replace"
+    assert len(sheet.columns) == 2
+    assert sheet.columns[0].dtype == "VARCHAR(50)"
@@ -0,0 +1,80 @@
+from pathlib import Path
+import pandas as pd
+import pytest
+from sqlalchemy import create_engine, text
+
+from excel_import.config import ImportConfig, SheetConfig, ColumnMapping
+from excel_import.importer import Importer
+
+
+@pytest.fixture
+def xlsx_file(tmp_path: Path) -> Path:
+    path = tmp_path / "data.xlsx"
+    df = pd.DataFrame({
+        "id": [1, 2, 3],
+        "name": ["Alice", "Bob", "Carol"],
+        "amount": [100.0, 200.5, 300.0],
+    })
+    df.to_excel(path, index=False)
+    return path
+
+
+@pytest.fixture
+def sqlite_config(xlsx_file):
+    return ImportConfig(
+        dsn="sqlite:///:memory:",
+        sheets=[
+            SheetConfig(
+                sheet=0,
+                target_table="persons",
+                mode="append",
+            )
+        ],
+    )
+
+
+def test_import_append(xlsx_file, sqlite_config):
+    importer = Importer(sqlite_config)
+    results = importer.run(xlsx_file)
+    assert results["persons"] == 3
+
+    with importer.engine.connect() as conn:
+        rows = conn.execute(text("SELECT COUNT(*) FROM persons")).scalar()
+    assert rows == 3
+
+
+def test_import_replace(xlsx_file, tmp_path):
+    cfg = ImportConfig(
+        dsn="sqlite:///:memory:",
+        sheets=[SheetConfig(sheet=0, target_table="persons", mode="replace")],
+    )
+    importer = Importer(cfg)
+    importer.run(xlsx_file)
+    results = importer.run(xlsx_file)  # second run should truncate+insert
+    assert results["persons"] == 3
+
+    with importer.engine.connect() as conn:
+        rows = conn.execute(text("SELECT COUNT(*) FROM persons")).scalar()
+    assert rows == 3
+
+
+def test_import_creates_table(xlsx_file, sqlite_config):
+    importer = Importer(sqlite_config)
+    importer.run(xlsx_file)
+
+    from sqlalchemy import inspect
+    insp = inspect(importer.engine)
+    assert "persons" in insp.get_table_names()
+
+
+def test_import_empty_sheet(tmp_path):
+    path = tmp_path / "empty.xlsx"
+    pd.DataFrame({"a": [], "b": []}).to_excel(path, index=False)
+
+    cfg = ImportConfig(
+        dsn="sqlite:///:memory:",
+        sheets=[SheetConfig(sheet=0, target_table="empty_table", mode="append")],
+    )
+    importer = Importer(cfg)
+    results = importer.run(path)
+    assert results["empty_table"] == 0
@@ -0,0 +1,84 @@
+import io
+from pathlib import Path
+import pandas as pd
+import pytest
+
+from excel_import.reader import ExcelReader
+from excel_import.config import SheetConfig
+
+
+@pytest.fixture
+def xlsx_file(tmp_path: Path) -> Path:
+    path = tmp_path / "test.xlsx"
+    df = pd.DataFrame({
+        "Artikelnummer": ["A001", "A002", "A003"],
+        "Bezeichnung": ["Widget", "Gadget", None],
+        "Preis": [9.99, 14.50, 0.99],
+    })
+    df.to_excel(path, index=False)
+    return path
+
+
+def test_sheet_names(xlsx_file: Path):
+    reader = ExcelReader(xlsx_file)
+    assert reader.sheet_names() == ["Sheet1"]
+
+
+def test_read_basic(xlsx_file: Path):
+    reader = ExcelReader(xlsx_file)
+    df = reader.read(SheetConfig(sheet=0, target_table="t"))
+    assert len(df) == 3
+    assert list(df.columns) == ["Artikelnummer", "Bezeichnung", "Preis"]
+
+
+def test_read_drops_empty_rows(tmp_path: Path):
+    path = tmp_path / "empty_rows.xlsx"
+    df = pd.DataFrame({"A": ["x", None, "y"], "B": [1, None, 3]})
+    df.to_excel(path, index=False)
+
+    reader = ExcelReader(path)
+    result = reader.read(SheetConfig(sheet=0, target_table="t"))
+    assert len(result) == 2
+
+
+def test_read_column_rename(xlsx_file: Path):
+    from excel_import.config import ColumnMapping
+    cfg = SheetConfig(
+        sheet=0,
+        target_table="t",
+        columns=[
+            ColumnMapping(source="Artikelnummer", target="art_nr"),
+            ColumnMapping(source="Bezeichnung", target="bez"),
+            ColumnMapping(source="Preis", target="preis"),
+        ],
+    )
+    reader = ExcelReader(xlsx_file)
+    df = reader.read(cfg)
+    assert "art_nr" in df.columns
+    assert "Artikelnummer" not in df.columns
+
+
+def test_read_column_skip(xlsx_file: Path):
+    from excel_import.config import ColumnMapping
+    cfg = SheetConfig(
+        sheet=0,
+        target_table="t",
+        columns=[
+            ColumnMapping(source="Preis", target="Preis", skip=True),
+        ],
+    )
+    reader = ExcelReader(xlsx_file)
+    df = reader.read(cfg)
+    assert "Preis" not in df.columns
+
+
+def test_file_not_found():
+    with pytest.raises(FileNotFoundError):
+        ExcelReader("/nonexistent/path/file.xlsx")
+
+
+def test_unsupported_extension(tmp_path: Path):
+    f = tmp_path / "data.csv"
+    f.write_text("a,b\n1,2")
+    with pytest.raises(ValueError, match="Unsupported"):
+        ExcelReader(f)