From 2d9bce014f88e6c58cc57af5661808e83328d083 Mon Sep 17 00:00:00 2001 From: Dierk Date: Wed, 13 May 2026 11:48:03 +0200 Subject: [PATCH] Add ODF (.ods) support via odfpy Co-Authored-By: Claude Sonnet 4.6 --- README.md | 10 +++++++++- excel_import/reader.py | 13 +++++++++++-- pyproject.toml | 1 + tests/test_reader.py | 12 ++++++++++++ 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6dedb6f..aae8c1f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,14 @@ # excel-import -Generisches Kommandozeilen-Tool zum Import von Excel-Dateien (`.xls` und `.xlsx`) in Oracle- und PostgreSQL-Datenbanken. +Generisches Kommandozeilen-Tool zum Import von Tabellendateien in Oracle- und PostgreSQL-Datenbanken. + +Unterstützte Formate: + +| Format | Endung | Paket | +|--------|--------|-------| +| Excel 97–2003 | `.xls` | `xlrd` | +| Excel 2007+ | `.xlsx`, `.xlsm`, `.xlsb` | `openpyxl` | +| OpenDocument (LibreOffice) | `.ods` | `odfpy` | ## Voraussetzungen diff --git a/excel_import/reader.py b/excel_import/reader.py index 05d7619..8ecd690 100644 --- a/excel_import/reader.py +++ b/excel_import/reader.py @@ -5,8 +5,17 @@ import pandas as pd from .config import SheetConfig +_ENGINES = { + ".xls": "xlrd", + ".xlsx": "openpyxl", + ".xlsm": "openpyxl", + ".xlsb": "openpyxl", + ".ods": "odf", +} + + def _engine_for(path: Path) -> str: - return "xlrd" if path.suffix.lower() == ".xls" else "openpyxl" + return _ENGINES[path.suffix.lower()] class ExcelReader: @@ -14,7 +23,7 @@ class ExcelReader: self.path = Path(path) if not self.path.exists(): raise FileNotFoundError(f"Excel file not found: {self.path}") - if self.path.suffix.lower() not in {".xls", ".xlsx", ".xlsm", ".xlsb"}: + if self.path.suffix.lower() not in _ENGINES: raise ValueError(f"Unsupported file type: {self.path.suffix}") def sheet_names(self) -> list[str]: diff --git a/pyproject.toml b/pyproject.toml index 28cae8b..ce72029 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ dependencies = [ "pandas>=2.0", "openpyxl>=3.1", "xlrd>=2.0", + "odfpy>=1.4", "sqlalchemy>=2.0", "psycopg2-binary>=2.9", "oracledb>=2.0", diff --git a/tests/test_reader.py b/tests/test_reader.py index 416698a..4c644db 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -72,6 +72,18 @@ def test_read_column_skip(xlsx_file: Path): assert "Preis" not in df.columns +def test_read_ods(tmp_path: Path): + pytest.importorskip("odf") + path = tmp_path / "test.ods" + df = pd.DataFrame({"Name": ["Alice", "Bob"], "Wert": [1, 2]}) + df.to_excel(path, index=False, engine="odf") + + reader = ExcelReader(path) + result = reader.read(SheetConfig(sheet=0, target_table="t")) + assert len(result) == 2 + assert list(result.columns) == ["Name", "Wert"] + + def test_file_not_found(): with pytest.raises(FileNotFoundError): ExcelReader("/nonexistent/path/file.xlsx")