Add in-DB indexing script, benchmark results, schema names in presentation

- index_images_indb.py: new script indexing via VECTOR_EMBEDDING(CLIP_IMG) using a two-step INSERT+UPDATE to work around ORA-24816 - index_images_oracle.py / index_images.py: add timing output - Presentation: schema names VECTORS_USER/VECTOR in diagram and comparison, ONNX expansion, HNSW index note on slide 11, indexing times updated from 3-run benchmark (avg: PG 12.1s, Ora 12.1s, InDB 13.6s) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-20 10:42:13 +02:00
parent e70d422c69
commit 3ef43019be
5 changed files with 324 additions and 50 deletions
@@ -1,4 +1,5 @@
 import os
+import time
 from dotenv import load_dotenv
 from db import get_connection
 from embedder import embed_image
@@ -37,6 +38,7 @@ def main():
    files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))]
    print(f"Found {len(files)} photos in {PHOTOS_DIR}")

+    start = time.time()
    for i, filename in enumerate(files, 1):
        filepath = os.path.join(PHOTOS_DIR, filename)
        cur.execute("SELECT 1 FROM images WHERE filename = %s", (filename,))
@@ -50,7 +52,7 @@ def main():

    cur.close()
    conn.close()
-    print("Done.")
+    print(f"Done in {time.time() - start:.1f} seconds.")

 if __name__ == "__main__":
    main()