Initial implementation of generic Excel-to-DB import tool
Supports .xls and .xlsx, Oracle and PostgreSQL via SQLAlchemy. Includes CLI (run/inspect/generate-config), YAML config, auto schema detection, and append/replace/upsert modes. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,39 @@
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from excel_import.config import ImportConfig
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def config_file(tmp_path: Path) -> Path:
|
||||
cfg = {
|
||||
"dsn": "postgresql+psycopg2://u:p@localhost/db",
|
||||
"sheets": [
|
||||
{
|
||||
"sheet": "Artikel",
|
||||
"header_row": 0,
|
||||
"target_table": "artikel",
|
||||
"mode": "replace",
|
||||
"columns": [
|
||||
{"source": "Artikelnummer", "target": "art_nr", "dtype": "VARCHAR(50)"},
|
||||
{"source": "Preis", "target": "preis"},
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
path = tmp_path / "config.yaml"
|
||||
path.write_text(yaml.dump(cfg))
|
||||
return path
|
||||
|
||||
|
||||
def test_load_from_yaml(config_file: Path):
|
||||
cfg = ImportConfig.from_yaml(config_file)
|
||||
assert cfg.dsn == "postgresql+psycopg2://u:p@localhost/db"
|
||||
assert len(cfg.sheets) == 1
|
||||
sheet = cfg.sheets[0]
|
||||
assert sheet.sheet == "Artikel"
|
||||
assert sheet.target_table == "artikel"
|
||||
assert sheet.mode == "replace"
|
||||
assert len(sheet.columns) == 2
|
||||
assert sheet.columns[0].dtype == "VARCHAR(50)"
|
||||
@@ -0,0 +1,80 @@
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
from excel_import.config import ImportConfig, SheetConfig, ColumnMapping
|
||||
from excel_import.importer import Importer
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def xlsx_file(tmp_path: Path) -> Path:
|
||||
path = tmp_path / "data.xlsx"
|
||||
df = pd.DataFrame({
|
||||
"id": [1, 2, 3],
|
||||
"name": ["Alice", "Bob", "Carol"],
|
||||
"amount": [100.0, 200.5, 300.0],
|
||||
})
|
||||
df.to_excel(path, index=False)
|
||||
return path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sqlite_config(xlsx_file):
|
||||
return ImportConfig(
|
||||
dsn="sqlite:///:memory:",
|
||||
sheets=[
|
||||
SheetConfig(
|
||||
sheet=0,
|
||||
target_table="persons",
|
||||
mode="append",
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def test_import_append(xlsx_file, sqlite_config):
|
||||
importer = Importer(sqlite_config)
|
||||
results = importer.run(xlsx_file)
|
||||
assert results["persons"] == 3
|
||||
|
||||
with importer.engine.connect() as conn:
|
||||
rows = conn.execute(text("SELECT COUNT(*) FROM persons")).scalar()
|
||||
assert rows == 3
|
||||
|
||||
|
||||
def test_import_replace(xlsx_file, tmp_path):
|
||||
cfg = ImportConfig(
|
||||
dsn="sqlite:///:memory:",
|
||||
sheets=[SheetConfig(sheet=0, target_table="persons", mode="replace")],
|
||||
)
|
||||
importer = Importer(cfg)
|
||||
importer.run(xlsx_file)
|
||||
results = importer.run(xlsx_file) # second run should truncate+insert
|
||||
assert results["persons"] == 3
|
||||
|
||||
with importer.engine.connect() as conn:
|
||||
rows = conn.execute(text("SELECT COUNT(*) FROM persons")).scalar()
|
||||
assert rows == 3
|
||||
|
||||
|
||||
def test_import_creates_table(xlsx_file, sqlite_config):
|
||||
importer = Importer(sqlite_config)
|
||||
importer.run(xlsx_file)
|
||||
|
||||
from sqlalchemy import inspect
|
||||
insp = inspect(importer.engine)
|
||||
assert "persons" in insp.get_table_names()
|
||||
|
||||
|
||||
def test_import_empty_sheet(tmp_path):
|
||||
path = tmp_path / "empty.xlsx"
|
||||
pd.DataFrame({"a": [], "b": []}).to_excel(path, index=False)
|
||||
|
||||
cfg = ImportConfig(
|
||||
dsn="sqlite:///:memory:",
|
||||
sheets=[SheetConfig(sheet=0, target_table="empty_table", mode="append")],
|
||||
)
|
||||
importer = Importer(cfg)
|
||||
results = importer.run(path)
|
||||
assert results["empty_table"] == 0
|
||||
@@ -0,0 +1,84 @@
|
||||
import io
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from excel_import.reader import ExcelReader
|
||||
from excel_import.config import SheetConfig
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def xlsx_file(tmp_path: Path) -> Path:
|
||||
path = tmp_path / "test.xlsx"
|
||||
df = pd.DataFrame({
|
||||
"Artikelnummer": ["A001", "A002", "A003"],
|
||||
"Bezeichnung": ["Widget", "Gadget", None],
|
||||
"Preis": [9.99, 14.50, 0.99],
|
||||
})
|
||||
df.to_excel(path, index=False)
|
||||
return path
|
||||
|
||||
|
||||
def test_sheet_names(xlsx_file: Path):
|
||||
reader = ExcelReader(xlsx_file)
|
||||
assert reader.sheet_names() == ["Sheet1"]
|
||||
|
||||
|
||||
def test_read_basic(xlsx_file: Path):
|
||||
reader = ExcelReader(xlsx_file)
|
||||
df = reader.read(SheetConfig(sheet=0, target_table="t"))
|
||||
assert len(df) == 3
|
||||
assert list(df.columns) == ["Artikelnummer", "Bezeichnung", "Preis"]
|
||||
|
||||
|
||||
def test_read_drops_empty_rows(tmp_path: Path):
|
||||
path = tmp_path / "empty_rows.xlsx"
|
||||
df = pd.DataFrame({"A": ["x", None, "y"], "B": [1, None, 3]})
|
||||
df.to_excel(path, index=False)
|
||||
|
||||
reader = ExcelReader(path)
|
||||
result = reader.read(SheetConfig(sheet=0, target_table="t"))
|
||||
assert len(result) == 2
|
||||
|
||||
|
||||
def test_read_column_rename(xlsx_file: Path):
|
||||
from excel_import.config import ColumnMapping
|
||||
cfg = SheetConfig(
|
||||
sheet=0,
|
||||
target_table="t",
|
||||
columns=[
|
||||
ColumnMapping(source="Artikelnummer", target="art_nr"),
|
||||
ColumnMapping(source="Bezeichnung", target="bez"),
|
||||
ColumnMapping(source="Preis", target="preis"),
|
||||
],
|
||||
)
|
||||
reader = ExcelReader(xlsx_file)
|
||||
df = reader.read(cfg)
|
||||
assert "art_nr" in df.columns
|
||||
assert "Artikelnummer" not in df.columns
|
||||
|
||||
|
||||
def test_read_column_skip(xlsx_file: Path):
|
||||
from excel_import.config import ColumnMapping
|
||||
cfg = SheetConfig(
|
||||
sheet=0,
|
||||
target_table="t",
|
||||
columns=[
|
||||
ColumnMapping(source="Preis", target="Preis", skip=True),
|
||||
],
|
||||
)
|
||||
reader = ExcelReader(xlsx_file)
|
||||
df = reader.read(cfg)
|
||||
assert "Preis" not in df.columns
|
||||
|
||||
|
||||
def test_file_not_found():
|
||||
with pytest.raises(FileNotFoundError):
|
||||
ExcelReader("/nonexistent/path/file.xlsx")
|
||||
|
||||
|
||||
def test_unsupported_extension(tmp_path: Path):
|
||||
f = tmp_path / "data.csv"
|
||||
f.write_text("a,b\n1,2")
|
||||
with pytest.raises(ValueError, match="Unsupported"):
|
||||
ExcelReader(f)
|
||||
Reference in New Issue
Block a user