import os import time from dotenv import load_dotenv from db_oracle import get_connection_indb load_dotenv() PHOTOS_DIR = os.getenv("PHOTOS_DIR") def main(): conn = get_connection_indb() cur = conn.cursor() cur.execute("SELECT COUNT(*) FROM VECTOR.FOTO_VEKTOR") print(f"Rows before: {cur.fetchone()[0]}") files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))] print(f"Found {len(files)} photos in {PHOTOS_DIR}") start = time.time() for i, filename in enumerate(files, 1): filepath = os.path.join(PHOTOS_DIR, filename) cur.execute("SELECT 1 FROM VECTOR.FOTO_VEKTOR WHERE filename = :1", (filename,)) if cur.fetchone(): print(f"[{i}/{len(files)}] Skipping {filename} (already indexed)") continue with open(filepath, "rb") as f: blob_data = f.read() # ORA-24816: Oracle cannot bind the same BLOB as both column value and # VECTOR_EMBEDDING() input in one statement. Insert the BLOB first, then # let Oracle compute the embedding from the stored data in a second step. cur.execute( "INSERT INTO VECTOR.FOTO_VEKTOR (filename, foto) VALUES (:1, :2)", (filename, blob_data), ) cur.execute( """UPDATE VECTOR.FOTO_VEKTOR SET foto_vek = VECTOR_EMBEDDING(CLIP_IMG USING foto AS data) WHERE filename = :1""", (filename,), ) conn.commit() print(f"[{i}/{len(files)}] Indexed {filename}") elapsed = time.time() - start print(f"Done in {elapsed:.1f} seconds.") if __name__ == "__main__": main()