8f7399de58
Supports .xls and .xlsx, Oracle and PostgreSQL via SQLAlchemy. Includes CLI (run/inspect/generate-config), YAML config, auto schema detection, and append/replace/upsert modes. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
49 lines
1.5 KiB
Python
49 lines
1.5 KiB
Python
from __future__ import annotations
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Literal
|
|
import yaml
|
|
|
|
|
|
@dataclass
|
|
class ColumnMapping:
|
|
source: str
|
|
target: str
|
|
dtype: str | None = None # override detected type, e.g. "VARCHAR(100)", "NUMBER"
|
|
skip: bool = False
|
|
|
|
|
|
@dataclass
|
|
class SheetConfig:
|
|
sheet: str | int = 0 # sheet name or index
|
|
header_row: int = 0 # 0-based row index of the header
|
|
skip_rows: int = 0 # rows to skip before header
|
|
target_table: str = ""
|
|
columns: list[ColumnMapping] = field(default_factory=list)
|
|
mode: Literal["append", "replace", "upsert"] = "append"
|
|
upsert_keys: list[str] = field(default_factory=list) # column names for upsert PK
|
|
|
|
|
|
@dataclass
|
|
class ImportConfig:
|
|
dsn: str # SQLAlchemy DSN
|
|
sheets: list[SheetConfig] = field(default_factory=list)
|
|
default_varchar_length: int = 255
|
|
|
|
@classmethod
|
|
def from_yaml(cls, path: str | Path) -> "ImportConfig":
|
|
with open(path) as f:
|
|
raw = yaml.safe_load(f)
|
|
|
|
sheets = []
|
|
for s in raw.get("sheets", []):
|
|
columns = [ColumnMapping(**c) for c in s.pop("columns", [])]
|
|
upsert_keys = s.pop("upsert_keys", [])
|
|
sheets.append(SheetConfig(**s, columns=columns, upsert_keys=upsert_keys))
|
|
|
|
return cls(
|
|
dsn=raw["dsn"],
|
|
default_varchar_length=raw.get("default_varchar_length", 255),
|
|
sheets=sheets,
|
|
)
|