From 1c5e00d8e4a0d32f85442b93f62683f9eb70350c Mon Sep 17 00:00:00 2001 From: Dierk Date: Tue, 19 May 2026 14:39:40 +0200 Subject: [PATCH] Add targeted comments explaining non-obvious behaviour MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - embedder.py: lazy model load rationale, RGB conversion, shared vector space - main.py: why vec appears twice, ::vector cast, 1-distance score formula - main_oracle.py: why array.array("f") is required instead of plain list - main_oracle_indb.py: no embedder import — embedding done inside Oracle SQL - index_images_oracle.py: same array.array requirement on indexing path Co-Authored-By: Claude Sonnet 4.6 --- oravector-demo/backend/embedder.py | 5 +++++ oravector-demo/backend/index_images_oracle.py | 1 + oravector-demo/backend/main_oracle.py | 2 ++ oravector-demo/backend/main_oracle_indb.py | 2 ++ pgvector-demo/backend/embedder.py | 5 +++++ pgvector-demo/backend/main.py | 3 +++ 6 files changed, 18 insertions(+) diff --git a/oravector-demo/backend/embedder.py b/oravector-demo/backend/embedder.py index 4e1f34b..2b642ef 100644 --- a/oravector-demo/backend/embedder.py +++ b/oravector-demo/backend/embedder.py @@ -4,14 +4,19 @@ from PIL import Image _model = None def _get_model(): + # Lazy load: the CLIP model is ~600 MB and takes several seconds to initialise. + # Loading on first call avoids the cost at import time and during indexing warmup. global _model if _model is None: _model = SentenceTransformer("clip-ViT-B-32") return _model def embed_image(path: str) -> list[float]: + # CLIP requires RGB — some JPEGs are stored as CMYK or grayscale. img = Image.open(path).convert("RGB") return _get_model().encode(img).tolist() def embed_text(text: str) -> list[float]: + # Text and images share the same 512-dimensional vector space in CLIP, + # so the returned vector is directly comparable to image embeddings. return _get_model().encode(text).tolist() diff --git a/oravector-demo/backend/index_images_oracle.py b/oravector-demo/backend/index_images_oracle.py index bebdf27..785e94f 100644 --- a/oravector-demo/backend/index_images_oracle.py +++ b/oravector-demo/backend/index_images_oracle.py @@ -53,6 +53,7 @@ def main(): if cur.fetchone(): print(f"[{i}/{len(files)}] Skipping {filename} (already indexed)") continue + # oracledb requires array.array("f") for VECTOR(512, FLOAT32) — plain list is rejected. embedding = array.array("f", embed_image(filepath)) cur.execute(INSERT, (filename, filepath, embedding)) conn.commit() diff --git a/oravector-demo/backend/main_oracle.py b/oravector-demo/backend/main_oracle.py index d761e04..4069350 100644 --- a/oravector-demo/backend/main_oracle.py +++ b/oravector-demo/backend/main_oracle.py @@ -20,6 +20,8 @@ app.mount("/ui", StaticFiles(directory=os.path.abspath(FRONTEND_DIR), html=True) @app.get("/search") def search(q: str = Query(...), limit: int = Query(12)): + # oracledb rejects a plain Python list for a VECTOR column. + # array.array("f") produces a typed 32-bit float buffer that matches VECTOR(512, FLOAT32). vec = array.array("f", embed_text(q)) conn = get_connection() cur = conn.cursor() diff --git a/oravector-demo/backend/main_oracle_indb.py b/oravector-demo/backend/main_oracle_indb.py index 6a68ade..4f841ac 100644 --- a/oravector-demo/backend/main_oracle_indb.py +++ b/oravector-demo/backend/main_oracle_indb.py @@ -1,3 +1,5 @@ +# No embedder import — text embedding happens inside Oracle via VECTOR_EMBEDDING(CLIP_TXT). +# The only value Python passes to the database is the raw query string (:q). import os from fastapi import FastAPI, Query from fastapi.middleware.cors import CORSMiddleware diff --git a/pgvector-demo/backend/embedder.py b/pgvector-demo/backend/embedder.py index 4e1f34b..2b642ef 100644 --- a/pgvector-demo/backend/embedder.py +++ b/pgvector-demo/backend/embedder.py @@ -4,14 +4,19 @@ from PIL import Image _model = None def _get_model(): + # Lazy load: the CLIP model is ~600 MB and takes several seconds to initialise. + # Loading on first call avoids the cost at import time and during indexing warmup. global _model if _model is None: _model = SentenceTransformer("clip-ViT-B-32") return _model def embed_image(path: str) -> list[float]: + # CLIP requires RGB — some JPEGs are stored as CMYK or grayscale. img = Image.open(path).convert("RGB") return _get_model().encode(img).tolist() def embed_text(text: str) -> list[float]: + # Text and images share the same 512-dimensional vector space in CLIP, + # so the returned vector is directly comparable to image embeddings. return _get_model().encode(text).tolist() diff --git a/pgvector-demo/backend/main.py b/pgvector-demo/backend/main.py index f30df08..29363cc 100644 --- a/pgvector-demo/backend/main.py +++ b/pgvector-demo/backend/main.py @@ -29,6 +29,9 @@ def search(q: str = Query(...), limit: int = Query(12)): ORDER BY embedding <=> %s::vector LIMIT %s """, + # vec appears twice: once for ORDER BY (uses HNSW index), once for the score column. + # ::vector cast is required — psycopg2 passes the list as text without it. + # 1 - distance converts cosine distance (0=identical) to similarity (1=identical). (vec, vec, limit), ) rows = cur.fetchall()