Initial implementation of generic Excel-to-DB import tool
Supports .xls and .xlsx, Oracle and PostgreSQL via SQLAlchemy. Includes CLI (run/inspect/generate-config), YAML config, auto schema detection, and append/replace/upsert modes. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Symlink
+1
@@ -0,0 +1 @@
|
|||||||
|
python3
|
||||||
Symlink
+1
@@ -0,0 +1 @@
|
|||||||
|
/usr/bin/python3
|
||||||
Symlink
+1
@@ -0,0 +1 @@
|
|||||||
|
python3
|
||||||
Symlink
+1
@@ -0,0 +1 @@
|
|||||||
|
lib
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
home = /usr/bin
|
||||||
|
include-system-site-packages = false
|
||||||
|
version = 3.12.3
|
||||||
|
executable = /usr/bin/python3.12
|
||||||
|
command = /usr/bin/python3 -m venv /home/dierk/Programmierung/claude/excel-import/.venv
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
# SQLAlchemy DSN — Beispiele:
|
||||||
|
# PostgreSQL: postgresql+psycopg2://user:pass@localhost/mydb
|
||||||
|
# Oracle: oracle+oracledb://user:pass@localhost:1521/?service_name=MYDB
|
||||||
|
dsn: "postgresql+psycopg2://user:pass@localhost/mydb"
|
||||||
|
|
||||||
|
default_varchar_length: 255
|
||||||
|
|
||||||
|
sheets:
|
||||||
|
- sheet: "Artikel" # Sheet-Name oder Index (0, 1, ...)
|
||||||
|
header_row: 0 # 0-basierter Zeilenindex der Kopfzeile
|
||||||
|
skip_rows: 0 # Zeilen vor der Kopfzeile überspringen
|
||||||
|
target_table: "artikel"
|
||||||
|
mode: "replace" # append | replace | upsert
|
||||||
|
upsert_keys: []
|
||||||
|
columns:
|
||||||
|
- source: "Artikelnummer"
|
||||||
|
target: "artikelnummer"
|
||||||
|
dtype: "VARCHAR(50)"
|
||||||
|
- source: "Bezeichnung"
|
||||||
|
target: "bezeichnung"
|
||||||
|
- source: "Preis"
|
||||||
|
target: "preis"
|
||||||
|
dtype: "NUMERIC(12,2)"
|
||||||
|
- source: "Interne Notiz"
|
||||||
|
target: "interne_notiz"
|
||||||
|
skip: true # Spalte nicht importieren
|
||||||
|
|
||||||
|
- sheet: "Kunden"
|
||||||
|
header_row: 0
|
||||||
|
target_table: "kunden"
|
||||||
|
mode: "upsert"
|
||||||
|
upsert_keys: ["kundennummer"]
|
||||||
|
columns:
|
||||||
|
- source: "Kundennummer"
|
||||||
|
target: "kundennummer"
|
||||||
|
dtype: "VARCHAR(20)"
|
||||||
|
- source: "Name"
|
||||||
|
target: "name"
|
||||||
|
- source: "E-Mail"
|
||||||
|
target: "email"
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
from .reader import ExcelReader
|
||||||
|
from .importer import Importer
|
||||||
|
|
||||||
|
__all__ = ["ExcelReader", "Importer"]
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,87 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import click
|
||||||
|
|
||||||
|
from .config import ImportConfig, SheetConfig
|
||||||
|
from .importer import Importer
|
||||||
|
from .reader import ExcelReader
|
||||||
|
|
||||||
|
|
||||||
|
def _setup_logging(verbose: bool):
|
||||||
|
level = logging.DEBUG if verbose else logging.INFO
|
||||||
|
logging.basicConfig(format="%(levelname)s %(message)s", level=level)
|
||||||
|
|
||||||
|
|
||||||
|
@click.group()
|
||||||
|
def main():
|
||||||
|
"""Generic Excel-to-database import tool (Oracle & PostgreSQL)."""
|
||||||
|
|
||||||
|
|
||||||
|
@main.command()
|
||||||
|
@click.argument("excel_file", type=click.Path(exists=True))
|
||||||
|
@click.argument("config_file", type=click.Path(exists=True))
|
||||||
|
@click.option("-v", "--verbose", is_flag=True)
|
||||||
|
def run(excel_file: str, config_file: str, verbose: bool):
|
||||||
|
"""Import EXCEL_FILE using CONFIG_FILE (YAML)."""
|
||||||
|
_setup_logging(verbose)
|
||||||
|
cfg = ImportConfig.from_yaml(config_file)
|
||||||
|
importer = Importer(cfg)
|
||||||
|
try:
|
||||||
|
results = importer.run(excel_file)
|
||||||
|
except Exception as exc:
|
||||||
|
click.echo(f"ERROR: {exc}", err=True)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
for table, rows in results.items():
|
||||||
|
click.echo(f" {table}: {rows} rows imported")
|
||||||
|
|
||||||
|
|
||||||
|
@main.command()
|
||||||
|
@click.argument("excel_file", type=click.Path(exists=True))
|
||||||
|
def inspect(excel_file: str):
|
||||||
|
"""Show sheet names and column preview of EXCEL_FILE."""
|
||||||
|
reader = ExcelReader(excel_file)
|
||||||
|
names = reader.sheet_names()
|
||||||
|
click.echo(f"Sheets in {Path(excel_file).name}:")
|
||||||
|
for i, name in enumerate(names):
|
||||||
|
click.echo(f" [{i}] {name}")
|
||||||
|
# read first few rows for preview
|
||||||
|
from .config import SheetConfig as SC
|
||||||
|
df = reader.read(SC(sheet=i))
|
||||||
|
click.echo(f" Columns ({len(df.columns)}): {', '.join(str(c) for c in df.columns[:8])}")
|
||||||
|
if len(df.columns) > 8:
|
||||||
|
click.echo(f" ... and {len(df.columns) - 8} more")
|
||||||
|
click.echo(f" Rows: {len(df)}")
|
||||||
|
|
||||||
|
|
||||||
|
@main.command("generate-config")
|
||||||
|
@click.argument("excel_file", type=click.Path(exists=True))
|
||||||
|
@click.option("--dsn", default="postgresql+psycopg2://user:pass@localhost/dbname", show_default=True)
|
||||||
|
@click.option("--output", "-o", default="import_config.yaml", show_default=True)
|
||||||
|
def generate_config(excel_file: str, dsn: str, output: str):
|
||||||
|
"""Generate a starter YAML config from EXCEL_FILE's structure."""
|
||||||
|
import yaml
|
||||||
|
reader = ExcelReader(excel_file)
|
||||||
|
names = reader.sheet_names()
|
||||||
|
|
||||||
|
sheets = []
|
||||||
|
for i, name in enumerate(names):
|
||||||
|
from .config import SheetConfig as SC
|
||||||
|
df = reader.read(SC(sheet=i))
|
||||||
|
table_name = name.lower().replace(" ", "_")
|
||||||
|
columns = [{"source": str(c), "target": str(c).lower().replace(" ", "_")} for c in df.columns]
|
||||||
|
sheets.append({
|
||||||
|
"sheet": name,
|
||||||
|
"header_row": 0,
|
||||||
|
"target_table": table_name,
|
||||||
|
"mode": "append",
|
||||||
|
"columns": columns,
|
||||||
|
})
|
||||||
|
|
||||||
|
config = {"dsn": dsn, "sheets": sheets}
|
||||||
|
with open(output, "w") as f:
|
||||||
|
yaml.dump(config, f, allow_unicode=True, sort_keys=False)
|
||||||
|
click.echo(f"Config written to {output}")
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Literal
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ColumnMapping:
|
||||||
|
source: str
|
||||||
|
target: str
|
||||||
|
dtype: str | None = None # override detected type, e.g. "VARCHAR(100)", "NUMBER"
|
||||||
|
skip: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SheetConfig:
|
||||||
|
sheet: str | int = 0 # sheet name or index
|
||||||
|
header_row: int = 0 # 0-based row index of the header
|
||||||
|
skip_rows: int = 0 # rows to skip before header
|
||||||
|
target_table: str = ""
|
||||||
|
columns: list[ColumnMapping] = field(default_factory=list)
|
||||||
|
mode: Literal["append", "replace", "upsert"] = "append"
|
||||||
|
upsert_keys: list[str] = field(default_factory=list) # column names for upsert PK
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ImportConfig:
|
||||||
|
dsn: str # SQLAlchemy DSN
|
||||||
|
sheets: list[SheetConfig] = field(default_factory=list)
|
||||||
|
default_varchar_length: int = 255
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_yaml(cls, path: str | Path) -> "ImportConfig":
|
||||||
|
with open(path) as f:
|
||||||
|
raw = yaml.safe_load(f)
|
||||||
|
|
||||||
|
sheets = []
|
||||||
|
for s in raw.get("sheets", []):
|
||||||
|
columns = [ColumnMapping(**c) for c in s.pop("columns", [])]
|
||||||
|
upsert_keys = s.pop("upsert_keys", [])
|
||||||
|
sheets.append(SheetConfig(**s, columns=columns, upsert_keys=upsert_keys))
|
||||||
|
|
||||||
|
return cls(
|
||||||
|
dsn=raw["dsn"],
|
||||||
|
default_varchar_length=raw.get("default_varchar_length", 255),
|
||||||
|
sheets=sheets,
|
||||||
|
)
|
||||||
@@ -0,0 +1,127 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from sqlalchemy import create_engine, text, MetaData, Table, inspect
|
||||||
|
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||||
|
|
||||||
|
from .config import ImportConfig, SheetConfig
|
||||||
|
from .reader import ExcelReader
|
||||||
|
from .schema import build_columns
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Importer:
|
||||||
|
def __init__(self, config: ImportConfig):
|
||||||
|
self.config = config
|
||||||
|
self.engine = create_engine(config.dsn)
|
||||||
|
|
||||||
|
def run(self, excel_path: str | Path) -> dict[str, int]:
|
||||||
|
"""Import all configured sheets. Returns {table_name: rows_imported}."""
|
||||||
|
reader = ExcelReader(excel_path)
|
||||||
|
results = {}
|
||||||
|
for sheet_cfg in self.config.sheets:
|
||||||
|
rows = self._import_sheet(reader, sheet_cfg)
|
||||||
|
results[sheet_cfg.target_table] = rows
|
||||||
|
return results
|
||||||
|
|
||||||
|
def _import_sheet(self, reader: ExcelReader, cfg: SheetConfig) -> int:
|
||||||
|
df = reader.read(cfg)
|
||||||
|
if df.empty:
|
||||||
|
logger.warning("Sheet %r is empty, skipping.", cfg.sheet)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
logger.info("Read %d rows from sheet %r -> table %r", len(df), cfg.sheet, cfg.target_table)
|
||||||
|
|
||||||
|
with self.engine.begin() as conn:
|
||||||
|
self._ensure_table(conn, df, cfg)
|
||||||
|
|
||||||
|
if cfg.mode == "replace":
|
||||||
|
dialect = self.engine.dialect.name
|
||||||
|
truncate_sql = (
|
||||||
|
f"DELETE FROM {cfg.target_table}"
|
||||||
|
if dialect == "sqlite"
|
||||||
|
else f"TRUNCATE TABLE {cfg.target_table}"
|
||||||
|
)
|
||||||
|
conn.execute(text(truncate_sql))
|
||||||
|
rows = self._bulk_insert(conn, df, cfg.target_table)
|
||||||
|
elif cfg.mode == "upsert":
|
||||||
|
rows = self._upsert(conn, df, cfg)
|
||||||
|
else: # append
|
||||||
|
rows = self._bulk_insert(conn, df, cfg.target_table)
|
||||||
|
|
||||||
|
logger.info("Imported %d rows into %r (mode=%s)", rows, cfg.target_table, cfg.mode)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
def _ensure_table(self, conn, df: pd.DataFrame, cfg: SheetConfig):
|
||||||
|
insp = inspect(conn)
|
||||||
|
if not insp.has_table(cfg.target_table):
|
||||||
|
meta = MetaData()
|
||||||
|
cols = build_columns(df, cfg.columns, self.config.default_varchar_length)
|
||||||
|
table = Table(cfg.target_table, meta, *cols)
|
||||||
|
meta.create_all(conn)
|
||||||
|
logger.info("Created table %r", cfg.target_table)
|
||||||
|
|
||||||
|
def _bulk_insert(self, conn, df: pd.DataFrame, table_name: str) -> int:
|
||||||
|
records = _df_to_records(df)
|
||||||
|
if not records:
|
||||||
|
return 0
|
||||||
|
meta = MetaData()
|
||||||
|
meta.reflect(bind=conn, only=[table_name])
|
||||||
|
table = meta.tables[table_name]
|
||||||
|
conn.execute(table.insert(), records)
|
||||||
|
return len(records)
|
||||||
|
|
||||||
|
def _upsert(self, conn, df: pd.DataFrame, cfg: SheetConfig) -> int:
|
||||||
|
dialect = self.engine.dialect.name
|
||||||
|
records = _df_to_records(df)
|
||||||
|
if not records:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
meta = MetaData()
|
||||||
|
meta.reflect(bind=conn, only=[cfg.target_table])
|
||||||
|
table = meta.tables[cfg.target_table]
|
||||||
|
|
||||||
|
if dialect == "postgresql":
|
||||||
|
stmt = pg_insert(table).values(records)
|
||||||
|
update_cols = {c.key: stmt.excluded[c.key] for c in table.columns if c.key not in cfg.upsert_keys}
|
||||||
|
stmt = stmt.on_conflict_do_update(index_elements=cfg.upsert_keys, set_=update_cols)
|
||||||
|
conn.execute(stmt)
|
||||||
|
elif dialect == "oracle":
|
||||||
|
# Oracle MERGE via raw SQL
|
||||||
|
for record in records:
|
||||||
|
_oracle_merge(conn, table, record, cfg.upsert_keys)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f"Upsert not implemented for dialect: {dialect}")
|
||||||
|
|
||||||
|
return len(records)
|
||||||
|
|
||||||
|
|
||||||
|
def _df_to_records(df: pd.DataFrame) -> list[dict]:
|
||||||
|
# Replace pandas NA/NaT with None so SQLAlchemy handles nulls correctly
|
||||||
|
return [
|
||||||
|
{k: (None if pd.isna(v) else v) for k, v in row.items()}
|
||||||
|
for row in df.to_dict(orient="records")
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _oracle_merge(conn, table: Table, record: dict, keys: list[str]):
|
||||||
|
key_clauses = " AND ".join(f"t.{k} = s.{k}" for k in keys)
|
||||||
|
all_cols = list(record.keys())
|
||||||
|
non_keys = [c for c in all_cols if c not in keys]
|
||||||
|
|
||||||
|
select_parts = ", ".join(f":{c} AS {c}" for c in all_cols)
|
||||||
|
update_parts = ", ".join(f"t.{c} = s.{c}" for c in non_keys)
|
||||||
|
insert_cols = ", ".join(all_cols)
|
||||||
|
insert_vals = ", ".join(f"s.{c}" for c in all_cols)
|
||||||
|
|
||||||
|
sql = f"""
|
||||||
|
MERGE INTO {table.name} t
|
||||||
|
USING (SELECT {select_parts} FROM dual) s
|
||||||
|
ON ({key_clauses})
|
||||||
|
WHEN MATCHED THEN UPDATE SET {update_parts}
|
||||||
|
WHEN NOT MATCHED THEN INSERT ({insert_cols}) VALUES ({insert_vals})
|
||||||
|
"""
|
||||||
|
conn.execute(text(sql), record)
|
||||||
@@ -0,0 +1,49 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
from pathlib import Path
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from .config import SheetConfig
|
||||||
|
|
||||||
|
|
||||||
|
def _engine_for(path: Path) -> str:
|
||||||
|
return "xlrd" if path.suffix.lower() == ".xls" else "openpyxl"
|
||||||
|
|
||||||
|
|
||||||
|
class ExcelReader:
|
||||||
|
def __init__(self, path: str | Path):
|
||||||
|
self.path = Path(path)
|
||||||
|
if not self.path.exists():
|
||||||
|
raise FileNotFoundError(f"Excel file not found: {self.path}")
|
||||||
|
if self.path.suffix.lower() not in {".xls", ".xlsx", ".xlsm", ".xlsb"}:
|
||||||
|
raise ValueError(f"Unsupported file type: {self.path.suffix}")
|
||||||
|
|
||||||
|
def sheet_names(self) -> list[str]:
|
||||||
|
engine = _engine_for(self.path)
|
||||||
|
xf = pd.ExcelFile(self.path, engine=engine)
|
||||||
|
return xf.sheet_names
|
||||||
|
|
||||||
|
def read(self, cfg: SheetConfig) -> pd.DataFrame:
|
||||||
|
engine = _engine_for(self.path)
|
||||||
|
df = pd.read_excel(
|
||||||
|
self.path,
|
||||||
|
sheet_name=cfg.sheet,
|
||||||
|
header=cfg.header_row,
|
||||||
|
skiprows=range(cfg.skip_rows) if cfg.skip_rows else None,
|
||||||
|
engine=engine,
|
||||||
|
)
|
||||||
|
# drop completely empty rows
|
||||||
|
df.dropna(how="all", inplace=True)
|
||||||
|
|
||||||
|
# apply column mapping: rename and drop skipped columns
|
||||||
|
if cfg.columns:
|
||||||
|
skip_sources = {c.source for c in cfg.columns if c.skip}
|
||||||
|
df.drop(columns=[c for c in skip_sources if c in df.columns], inplace=True)
|
||||||
|
|
||||||
|
rename_map = {
|
||||||
|
c.source: c.target
|
||||||
|
for c in cfg.columns
|
||||||
|
if not c.skip and c.source != c.target
|
||||||
|
}
|
||||||
|
df.rename(columns=rename_map, inplace=True)
|
||||||
|
|
||||||
|
return df
|
||||||
@@ -0,0 +1,72 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
import pandas as pd
|
||||||
|
from sqlalchemy import (
|
||||||
|
Column, Integer, Float, String, DateTime, Date, Boolean, Numeric, Text
|
||||||
|
)
|
||||||
|
|
||||||
|
from .config import ColumnMapping
|
||||||
|
|
||||||
|
|
||||||
|
def _pandas_dtype_to_sqla(series: pd.Series, varchar_length: int):
|
||||||
|
dtype = series.dtype
|
||||||
|
if pd.api.types.is_bool_dtype(dtype):
|
||||||
|
return Boolean()
|
||||||
|
if pd.api.types.is_integer_dtype(dtype):
|
||||||
|
return Integer()
|
||||||
|
if pd.api.types.is_float_dtype(dtype):
|
||||||
|
return Float()
|
||||||
|
if pd.api.types.is_datetime64_any_dtype(dtype):
|
||||||
|
return DateTime()
|
||||||
|
# object columns: check if they look like dates
|
||||||
|
if dtype == object:
|
||||||
|
sample = series.dropna().head(100)
|
||||||
|
if len(sample) > 0:
|
||||||
|
try:
|
||||||
|
pd.to_datetime(sample)
|
||||||
|
return DateTime()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
max_len = int(series.dropna().astype(str).str.len().max()) if len(series.dropna()) > 0 else 1
|
||||||
|
return String(max(max_len + 10, varchar_length))
|
||||||
|
return Text()
|
||||||
|
|
||||||
|
|
||||||
|
def _override_to_sqla(dtype_str: str):
|
||||||
|
"""Convert a user-supplied type string like 'VARCHAR(100)' to a SQLAlchemy type."""
|
||||||
|
s = dtype_str.upper().strip()
|
||||||
|
if s.startswith("VARCHAR"):
|
||||||
|
length = int(s.split("(")[1].rstrip(")")) if "(" in s else 255
|
||||||
|
return String(length)
|
||||||
|
if s in ("TEXT", "CLOB"):
|
||||||
|
return Text()
|
||||||
|
if s in ("INTEGER", "INT", "NUMBER"):
|
||||||
|
return Integer()
|
||||||
|
if s.startswith("NUMBER") or s.startswith("NUMERIC") or s.startswith("DECIMAL"):
|
||||||
|
if "(" in s:
|
||||||
|
parts = s.split("(")[1].rstrip(")").split(",")
|
||||||
|
p, sc = int(parts[0]), int(parts[1]) if len(parts) > 1 else 0
|
||||||
|
return Numeric(precision=p, scale=sc)
|
||||||
|
return Numeric()
|
||||||
|
if s in ("FLOAT", "REAL", "DOUBLE"):
|
||||||
|
return Float()
|
||||||
|
if s in ("DATETIME", "TIMESTAMP"):
|
||||||
|
return DateTime()
|
||||||
|
if s == "DATE":
|
||||||
|
return Date()
|
||||||
|
if s in ("BOOLEAN", "BOOL"):
|
||||||
|
return Boolean()
|
||||||
|
raise ValueError(f"Unknown dtype override: {dtype_str!r}")
|
||||||
|
|
||||||
|
|
||||||
|
def build_columns(df: pd.DataFrame, column_configs: list[ColumnMapping], varchar_length: int) -> list[Column]:
|
||||||
|
override_map = {c.target or c.source: c.dtype for c in column_configs if c.dtype and not c.skip}
|
||||||
|
|
||||||
|
columns = []
|
||||||
|
for col in df.columns:
|
||||||
|
col_name = str(col)
|
||||||
|
if col_name in override_map and override_map[col_name]:
|
||||||
|
sqla_type = _override_to_sqla(override_map[col_name])
|
||||||
|
else:
|
||||||
|
sqla_type = _pandas_dtype_to_sqla(df[col], varchar_length)
|
||||||
|
columns.append(Column(col_name, sqla_type))
|
||||||
|
return columns
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=68", "wheel"]
|
||||||
|
build-backend = "setuptools.backends.legacy:build"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "excel-import"
|
||||||
|
version = "0.1.0"
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
dependencies = [
|
||||||
|
"pandas>=2.0",
|
||||||
|
"openpyxl>=3.1",
|
||||||
|
"xlrd>=2.0",
|
||||||
|
"sqlalchemy>=2.0",
|
||||||
|
"psycopg2-binary>=2.9",
|
||||||
|
"oracledb>=2.0",
|
||||||
|
"pyyaml>=6.0",
|
||||||
|
"click>=8.1",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
excel-import = "excel_import.cli:main"
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
dev = ["pytest>=8.0", "pytest-mock>=3.0"]
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,39 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
import pytest
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from excel_import.config import ImportConfig
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def config_file(tmp_path: Path) -> Path:
|
||||||
|
cfg = {
|
||||||
|
"dsn": "postgresql+psycopg2://u:p@localhost/db",
|
||||||
|
"sheets": [
|
||||||
|
{
|
||||||
|
"sheet": "Artikel",
|
||||||
|
"header_row": 0,
|
||||||
|
"target_table": "artikel",
|
||||||
|
"mode": "replace",
|
||||||
|
"columns": [
|
||||||
|
{"source": "Artikelnummer", "target": "art_nr", "dtype": "VARCHAR(50)"},
|
||||||
|
{"source": "Preis", "target": "preis"},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
path = tmp_path / "config.yaml"
|
||||||
|
path.write_text(yaml.dump(cfg))
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_from_yaml(config_file: Path):
|
||||||
|
cfg = ImportConfig.from_yaml(config_file)
|
||||||
|
assert cfg.dsn == "postgresql+psycopg2://u:p@localhost/db"
|
||||||
|
assert len(cfg.sheets) == 1
|
||||||
|
sheet = cfg.sheets[0]
|
||||||
|
assert sheet.sheet == "Artikel"
|
||||||
|
assert sheet.target_table == "artikel"
|
||||||
|
assert sheet.mode == "replace"
|
||||||
|
assert len(sheet.columns) == 2
|
||||||
|
assert sheet.columns[0].dtype == "VARCHAR(50)"
|
||||||
@@ -0,0 +1,80 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
import pandas as pd
|
||||||
|
import pytest
|
||||||
|
from sqlalchemy import create_engine, text
|
||||||
|
|
||||||
|
from excel_import.config import ImportConfig, SheetConfig, ColumnMapping
|
||||||
|
from excel_import.importer import Importer
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def xlsx_file(tmp_path: Path) -> Path:
|
||||||
|
path = tmp_path / "data.xlsx"
|
||||||
|
df = pd.DataFrame({
|
||||||
|
"id": [1, 2, 3],
|
||||||
|
"name": ["Alice", "Bob", "Carol"],
|
||||||
|
"amount": [100.0, 200.5, 300.0],
|
||||||
|
})
|
||||||
|
df.to_excel(path, index=False)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sqlite_config(xlsx_file):
|
||||||
|
return ImportConfig(
|
||||||
|
dsn="sqlite:///:memory:",
|
||||||
|
sheets=[
|
||||||
|
SheetConfig(
|
||||||
|
sheet=0,
|
||||||
|
target_table="persons",
|
||||||
|
mode="append",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_import_append(xlsx_file, sqlite_config):
|
||||||
|
importer = Importer(sqlite_config)
|
||||||
|
results = importer.run(xlsx_file)
|
||||||
|
assert results["persons"] == 3
|
||||||
|
|
||||||
|
with importer.engine.connect() as conn:
|
||||||
|
rows = conn.execute(text("SELECT COUNT(*) FROM persons")).scalar()
|
||||||
|
assert rows == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_import_replace(xlsx_file, tmp_path):
|
||||||
|
cfg = ImportConfig(
|
||||||
|
dsn="sqlite:///:memory:",
|
||||||
|
sheets=[SheetConfig(sheet=0, target_table="persons", mode="replace")],
|
||||||
|
)
|
||||||
|
importer = Importer(cfg)
|
||||||
|
importer.run(xlsx_file)
|
||||||
|
results = importer.run(xlsx_file) # second run should truncate+insert
|
||||||
|
assert results["persons"] == 3
|
||||||
|
|
||||||
|
with importer.engine.connect() as conn:
|
||||||
|
rows = conn.execute(text("SELECT COUNT(*) FROM persons")).scalar()
|
||||||
|
assert rows == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_import_creates_table(xlsx_file, sqlite_config):
|
||||||
|
importer = Importer(sqlite_config)
|
||||||
|
importer.run(xlsx_file)
|
||||||
|
|
||||||
|
from sqlalchemy import inspect
|
||||||
|
insp = inspect(importer.engine)
|
||||||
|
assert "persons" in insp.get_table_names()
|
||||||
|
|
||||||
|
|
||||||
|
def test_import_empty_sheet(tmp_path):
|
||||||
|
path = tmp_path / "empty.xlsx"
|
||||||
|
pd.DataFrame({"a": [], "b": []}).to_excel(path, index=False)
|
||||||
|
|
||||||
|
cfg = ImportConfig(
|
||||||
|
dsn="sqlite:///:memory:",
|
||||||
|
sheets=[SheetConfig(sheet=0, target_table="empty_table", mode="append")],
|
||||||
|
)
|
||||||
|
importer = Importer(cfg)
|
||||||
|
results = importer.run(path)
|
||||||
|
assert results["empty_table"] == 0
|
||||||
@@ -0,0 +1,84 @@
|
|||||||
|
import io
|
||||||
|
from pathlib import Path
|
||||||
|
import pandas as pd
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from excel_import.reader import ExcelReader
|
||||||
|
from excel_import.config import SheetConfig
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def xlsx_file(tmp_path: Path) -> Path:
|
||||||
|
path = tmp_path / "test.xlsx"
|
||||||
|
df = pd.DataFrame({
|
||||||
|
"Artikelnummer": ["A001", "A002", "A003"],
|
||||||
|
"Bezeichnung": ["Widget", "Gadget", None],
|
||||||
|
"Preis": [9.99, 14.50, 0.99],
|
||||||
|
})
|
||||||
|
df.to_excel(path, index=False)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def test_sheet_names(xlsx_file: Path):
|
||||||
|
reader = ExcelReader(xlsx_file)
|
||||||
|
assert reader.sheet_names() == ["Sheet1"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_basic(xlsx_file: Path):
|
||||||
|
reader = ExcelReader(xlsx_file)
|
||||||
|
df = reader.read(SheetConfig(sheet=0, target_table="t"))
|
||||||
|
assert len(df) == 3
|
||||||
|
assert list(df.columns) == ["Artikelnummer", "Bezeichnung", "Preis"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_drops_empty_rows(tmp_path: Path):
|
||||||
|
path = tmp_path / "empty_rows.xlsx"
|
||||||
|
df = pd.DataFrame({"A": ["x", None, "y"], "B": [1, None, 3]})
|
||||||
|
df.to_excel(path, index=False)
|
||||||
|
|
||||||
|
reader = ExcelReader(path)
|
||||||
|
result = reader.read(SheetConfig(sheet=0, target_table="t"))
|
||||||
|
assert len(result) == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_column_rename(xlsx_file: Path):
|
||||||
|
from excel_import.config import ColumnMapping
|
||||||
|
cfg = SheetConfig(
|
||||||
|
sheet=0,
|
||||||
|
target_table="t",
|
||||||
|
columns=[
|
||||||
|
ColumnMapping(source="Artikelnummer", target="art_nr"),
|
||||||
|
ColumnMapping(source="Bezeichnung", target="bez"),
|
||||||
|
ColumnMapping(source="Preis", target="preis"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
reader = ExcelReader(xlsx_file)
|
||||||
|
df = reader.read(cfg)
|
||||||
|
assert "art_nr" in df.columns
|
||||||
|
assert "Artikelnummer" not in df.columns
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_column_skip(xlsx_file: Path):
|
||||||
|
from excel_import.config import ColumnMapping
|
||||||
|
cfg = SheetConfig(
|
||||||
|
sheet=0,
|
||||||
|
target_table="t",
|
||||||
|
columns=[
|
||||||
|
ColumnMapping(source="Preis", target="Preis", skip=True),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
reader = ExcelReader(xlsx_file)
|
||||||
|
df = reader.read(cfg)
|
||||||
|
assert "Preis" not in df.columns
|
||||||
|
|
||||||
|
|
||||||
|
def test_file_not_found():
|
||||||
|
with pytest.raises(FileNotFoundError):
|
||||||
|
ExcelReader("/nonexistent/path/file.xlsx")
|
||||||
|
|
||||||
|
|
||||||
|
def test_unsupported_extension(tmp_path: Path):
|
||||||
|
f = tmp_path / "data.csv"
|
||||||
|
f.write_text("a,b\n1,2")
|
||||||
|
with pytest.raises(ValueError, match="Unsupported"):
|
||||||
|
ExcelReader(f)
|
||||||
Reference in New Issue
Block a user