import os import array import time from dotenv import load_dotenv from db_oracle import get_connection from embedder import embed_image load_dotenv() PHOTOS_DIR = os.getenv("PHOTOS_DIR") CREATE_TABLE = """ CREATE TABLE images ( id NUMBER GENERATED ALWAYS AS IDENTITY PRIMARY KEY, filename VARCHAR2(255) NOT NULL UNIQUE, filepath VARCHAR2(1000) NOT NULL, embedding VECTOR(512, FLOAT32) ) """ CREATE_INDEX = """ CREATE VECTOR INDEX images_embedding_idx ON images(embedding) ORGANIZATION INMEMORY NEIGHBOR GRAPH WITH DISTANCE COSINE WITH TARGET ACCURACY 95 PARAMETERS (type HNSW, neighbors 32, efconstruction 200) """ INSERT = "INSERT INTO images (filename, filepath, embedding) VALUES (:1, :2, :3)" def table_exists(cur): cur.execute("SELECT COUNT(*) FROM user_tables WHERE table_name = 'IMAGES'") return cur.fetchone()[0] > 0 def main(): conn = get_connection() cur = conn.cursor() if not table_exists(cur): cur.execute(CREATE_TABLE) cur.execute(CREATE_INDEX) conn.commit() print("Table and index created.") else: print("Table already exists, skipping creation.") files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))] print(f"Found {len(files)} photos in {PHOTOS_DIR}") start = time.time() for i, filename in enumerate(files, 1): filepath = os.path.join(PHOTOS_DIR, filename) cur.execute("SELECT 1 FROM images WHERE filename = :1", (filename,)) if cur.fetchone(): print(f"[{i}/{len(files)}] Skipping {filename} (already indexed)") continue # oracledb requires array.array("f") for VECTOR(512, FLOAT32) — plain list is rejected. embedding = array.array("f", embed_image(filepath)) cur.execute(INSERT, (filename, filepath, embedding)) conn.commit() print(f"[{i}/{len(files)}] Indexed {filename}") cur.close() conn.close() print(f"Done in {time.time() - start:.1f} seconds.") if __name__ == "__main__": main()