import os from dotenv import load_dotenv from db import get_connection from embedder import embed_image load_dotenv() PHOTOS_DIR = os.getenv("PHOTOS_DIR") CREATE_TABLE = """ CREATE TABLE IF NOT EXISTS images ( id SERIAL PRIMARY KEY, filename TEXT NOT NULL UNIQUE, filepath TEXT NOT NULL, embedding vector(512) ); """ CREATE_INDEX = """ CREATE INDEX IF NOT EXISTS images_embedding_idx ON images USING hnsw (embedding vector_cosine_ops); """ INSERT = """ INSERT INTO images (filename, filepath, embedding) VALUES (%s, %s, %s) ON CONFLICT (filename) DO NOTHING; """ def main(): conn = get_connection() cur = conn.cursor() cur.execute(CREATE_TABLE) cur.execute(CREATE_INDEX) conn.commit() files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))] print(f"Found {len(files)} photos in {PHOTOS_DIR}") for i, filename in enumerate(files, 1): filepath = os.path.join(PHOTOS_DIR, filename) cur.execute("SELECT 1 FROM images WHERE filename = %s", (filename,)) if cur.fetchone(): print(f"[{i}/{len(files)}] Skipping {filename} (already indexed)") continue embedding = embed_image(filepath) cur.execute(INSERT, (filename, filepath, embedding)) conn.commit() print(f"[{i}/{len(files)}] Indexed {filename}") cur.close() conn.close() print("Done.") if __name__ == "__main__": main()