Initial implementation of generic Excel-to-DB import tool

Supports .xls and .xlsx, Oracle and PostgreSQL via SQLAlchemy.
Includes CLI (run/inspect/generate-config), YAML config, auto schema
detection, and append/replace/upsert modes.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-13 11:31:47 +02:00
commit 8f7399de58
26 changed files with 663 additions and 0 deletions
+87
View File
@@ -0,0 +1,87 @@
from __future__ import annotations
import logging
import sys
from pathlib import Path
import click
from .config import ImportConfig, SheetConfig
from .importer import Importer
from .reader import ExcelReader
def _setup_logging(verbose: bool):
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(format="%(levelname)s %(message)s", level=level)
@click.group()
def main():
"""Generic Excel-to-database import tool (Oracle & PostgreSQL)."""
@main.command()
@click.argument("excel_file", type=click.Path(exists=True))
@click.argument("config_file", type=click.Path(exists=True))
@click.option("-v", "--verbose", is_flag=True)
def run(excel_file: str, config_file: str, verbose: bool):
"""Import EXCEL_FILE using CONFIG_FILE (YAML)."""
_setup_logging(verbose)
cfg = ImportConfig.from_yaml(config_file)
importer = Importer(cfg)
try:
results = importer.run(excel_file)
except Exception as exc:
click.echo(f"ERROR: {exc}", err=True)
sys.exit(1)
for table, rows in results.items():
click.echo(f" {table}: {rows} rows imported")
@main.command()
@click.argument("excel_file", type=click.Path(exists=True))
def inspect(excel_file: str):
"""Show sheet names and column preview of EXCEL_FILE."""
reader = ExcelReader(excel_file)
names = reader.sheet_names()
click.echo(f"Sheets in {Path(excel_file).name}:")
for i, name in enumerate(names):
click.echo(f" [{i}] {name}")
# read first few rows for preview
from .config import SheetConfig as SC
df = reader.read(SC(sheet=i))
click.echo(f" Columns ({len(df.columns)}): {', '.join(str(c) for c in df.columns[:8])}")
if len(df.columns) > 8:
click.echo(f" ... and {len(df.columns) - 8} more")
click.echo(f" Rows: {len(df)}")
@main.command("generate-config")
@click.argument("excel_file", type=click.Path(exists=True))
@click.option("--dsn", default="postgresql+psycopg2://user:pass@localhost/dbname", show_default=True)
@click.option("--output", "-o", default="import_config.yaml", show_default=True)
def generate_config(excel_file: str, dsn: str, output: str):
"""Generate a starter YAML config from EXCEL_FILE's structure."""
import yaml
reader = ExcelReader(excel_file)
names = reader.sheet_names()
sheets = []
for i, name in enumerate(names):
from .config import SheetConfig as SC
df = reader.read(SC(sheet=i))
table_name = name.lower().replace(" ", "_")
columns = [{"source": str(c), "target": str(c).lower().replace(" ", "_")} for c in df.columns]
sheets.append({
"sheet": name,
"header_row": 0,
"target_table": table_name,
"mode": "append",
"columns": columns,
})
config = {"dsn": dsn, "sheets": sheets}
with open(output, "w") as f:
yaml.dump(config, f, allow_unicode=True, sort_keys=False)
click.echo(f"Config written to {output}")