Initial implementation of generic Excel-to-DB import tool
Supports .xls and .xlsx, Oracle and PostgreSQL via SQLAlchemy. Includes CLI (run/inspect/generate-config), YAML config, auto schema detection, and append/replace/upsert modes. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,87 @@
|
||||
from __future__ import annotations
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
|
||||
from .config import ImportConfig, SheetConfig
|
||||
from .importer import Importer
|
||||
from .reader import ExcelReader
|
||||
|
||||
|
||||
def _setup_logging(verbose: bool):
|
||||
level = logging.DEBUG if verbose else logging.INFO
|
||||
logging.basicConfig(format="%(levelname)s %(message)s", level=level)
|
||||
|
||||
|
||||
@click.group()
|
||||
def main():
|
||||
"""Generic Excel-to-database import tool (Oracle & PostgreSQL)."""
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.argument("excel_file", type=click.Path(exists=True))
|
||||
@click.argument("config_file", type=click.Path(exists=True))
|
||||
@click.option("-v", "--verbose", is_flag=True)
|
||||
def run(excel_file: str, config_file: str, verbose: bool):
|
||||
"""Import EXCEL_FILE using CONFIG_FILE (YAML)."""
|
||||
_setup_logging(verbose)
|
||||
cfg = ImportConfig.from_yaml(config_file)
|
||||
importer = Importer(cfg)
|
||||
try:
|
||||
results = importer.run(excel_file)
|
||||
except Exception as exc:
|
||||
click.echo(f"ERROR: {exc}", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
for table, rows in results.items():
|
||||
click.echo(f" {table}: {rows} rows imported")
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.argument("excel_file", type=click.Path(exists=True))
|
||||
def inspect(excel_file: str):
|
||||
"""Show sheet names and column preview of EXCEL_FILE."""
|
||||
reader = ExcelReader(excel_file)
|
||||
names = reader.sheet_names()
|
||||
click.echo(f"Sheets in {Path(excel_file).name}:")
|
||||
for i, name in enumerate(names):
|
||||
click.echo(f" [{i}] {name}")
|
||||
# read first few rows for preview
|
||||
from .config import SheetConfig as SC
|
||||
df = reader.read(SC(sheet=i))
|
||||
click.echo(f" Columns ({len(df.columns)}): {', '.join(str(c) for c in df.columns[:8])}")
|
||||
if len(df.columns) > 8:
|
||||
click.echo(f" ... and {len(df.columns) - 8} more")
|
||||
click.echo(f" Rows: {len(df)}")
|
||||
|
||||
|
||||
@main.command("generate-config")
|
||||
@click.argument("excel_file", type=click.Path(exists=True))
|
||||
@click.option("--dsn", default="postgresql+psycopg2://user:pass@localhost/dbname", show_default=True)
|
||||
@click.option("--output", "-o", default="import_config.yaml", show_default=True)
|
||||
def generate_config(excel_file: str, dsn: str, output: str):
|
||||
"""Generate a starter YAML config from EXCEL_FILE's structure."""
|
||||
import yaml
|
||||
reader = ExcelReader(excel_file)
|
||||
names = reader.sheet_names()
|
||||
|
||||
sheets = []
|
||||
for i, name in enumerate(names):
|
||||
from .config import SheetConfig as SC
|
||||
df = reader.read(SC(sheet=i))
|
||||
table_name = name.lower().replace(" ", "_")
|
||||
columns = [{"source": str(c), "target": str(c).lower().replace(" ", "_")} for c in df.columns]
|
||||
sheets.append({
|
||||
"sheet": name,
|
||||
"header_row": 0,
|
||||
"target_table": table_name,
|
||||
"mode": "append",
|
||||
"columns": columns,
|
||||
})
|
||||
|
||||
config = {"dsn": dsn, "sheets": sheets}
|
||||
with open(output, "w") as f:
|
||||
yaml.dump(config, f, allow_unicode=True, sort_keys=False)
|
||||
click.echo(f"Config written to {output}")
|
||||
Reference in New Issue
Block a user