Initial implementation of generic Excel-to-DB import tool

Supports .xls and .xlsx, Oracle and PostgreSQL via SQLAlchemy.
Includes CLI (run/inspect/generate-config), YAML config, auto schema
detection, and append/replace/upsert modes.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-13 11:31:47 +02:00
commit 8f7399de58
26 changed files with 663 additions and 0 deletions
+80
View File
@@ -0,0 +1,80 @@
from pathlib import Path
import pandas as pd
import pytest
from sqlalchemy import create_engine, text
from excel_import.config import ImportConfig, SheetConfig, ColumnMapping
from excel_import.importer import Importer
@pytest.fixture
def xlsx_file(tmp_path: Path) -> Path:
path = tmp_path / "data.xlsx"
df = pd.DataFrame({
"id": [1, 2, 3],
"name": ["Alice", "Bob", "Carol"],
"amount": [100.0, 200.5, 300.0],
})
df.to_excel(path, index=False)
return path
@pytest.fixture
def sqlite_config(xlsx_file):
return ImportConfig(
dsn="sqlite:///:memory:",
sheets=[
SheetConfig(
sheet=0,
target_table="persons",
mode="append",
)
],
)
def test_import_append(xlsx_file, sqlite_config):
importer = Importer(sqlite_config)
results = importer.run(xlsx_file)
assert results["persons"] == 3
with importer.engine.connect() as conn:
rows = conn.execute(text("SELECT COUNT(*) FROM persons")).scalar()
assert rows == 3
def test_import_replace(xlsx_file, tmp_path):
cfg = ImportConfig(
dsn="sqlite:///:memory:",
sheets=[SheetConfig(sheet=0, target_table="persons", mode="replace")],
)
importer = Importer(cfg)
importer.run(xlsx_file)
results = importer.run(xlsx_file) # second run should truncate+insert
assert results["persons"] == 3
with importer.engine.connect() as conn:
rows = conn.execute(text("SELECT COUNT(*) FROM persons")).scalar()
assert rows == 3
def test_import_creates_table(xlsx_file, sqlite_config):
importer = Importer(sqlite_config)
importer.run(xlsx_file)
from sqlalchemy import inspect
insp = inspect(importer.engine)
assert "persons" in insp.get_table_names()
def test_import_empty_sheet(tmp_path):
path = tmp_path / "empty.xlsx"
pd.DataFrame({"a": [], "b": []}).to_excel(path, index=False)
cfg = ImportConfig(
dsn="sqlite:///:memory:",
sheets=[SheetConfig(sheet=0, target_table="empty_table", mode="append")],
)
importer = Importer(cfg)
results = importer.run(path)
assert results["empty_table"] == 0