Add targeted comments explaining non-obvious behaviour
- embedder.py: lazy model load rationale, RGB conversion, shared vector space
- main.py: why vec appears twice, ::vector cast, 1-distance score formula
- main_oracle.py: why array.array("f") is required instead of plain list
- main_oracle_indb.py: no embedder import — embedding done inside Oracle SQL
- index_images_oracle.py: same array.array requirement on indexing path
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,14 +4,19 @@ from PIL import Image
|
|||||||
_model = None
|
_model = None
|
||||||
|
|
||||||
def _get_model():
|
def _get_model():
|
||||||
|
# Lazy load: the CLIP model is ~600 MB and takes several seconds to initialise.
|
||||||
|
# Loading on first call avoids the cost at import time and during indexing warmup.
|
||||||
global _model
|
global _model
|
||||||
if _model is None:
|
if _model is None:
|
||||||
_model = SentenceTransformer("clip-ViT-B-32")
|
_model = SentenceTransformer("clip-ViT-B-32")
|
||||||
return _model
|
return _model
|
||||||
|
|
||||||
def embed_image(path: str) -> list[float]:
|
def embed_image(path: str) -> list[float]:
|
||||||
|
# CLIP requires RGB — some JPEGs are stored as CMYK or grayscale.
|
||||||
img = Image.open(path).convert("RGB")
|
img = Image.open(path).convert("RGB")
|
||||||
return _get_model().encode(img).tolist()
|
return _get_model().encode(img).tolist()
|
||||||
|
|
||||||
def embed_text(text: str) -> list[float]:
|
def embed_text(text: str) -> list[float]:
|
||||||
|
# Text and images share the same 512-dimensional vector space in CLIP,
|
||||||
|
# so the returned vector is directly comparable to image embeddings.
|
||||||
return _get_model().encode(text).tolist()
|
return _get_model().encode(text).tolist()
|
||||||
|
|||||||
@@ -53,6 +53,7 @@ def main():
|
|||||||
if cur.fetchone():
|
if cur.fetchone():
|
||||||
print(f"[{i}/{len(files)}] Skipping {filename} (already indexed)")
|
print(f"[{i}/{len(files)}] Skipping {filename} (already indexed)")
|
||||||
continue
|
continue
|
||||||
|
# oracledb requires array.array("f") for VECTOR(512, FLOAT32) — plain list is rejected.
|
||||||
embedding = array.array("f", embed_image(filepath))
|
embedding = array.array("f", embed_image(filepath))
|
||||||
cur.execute(INSERT, (filename, filepath, embedding))
|
cur.execute(INSERT, (filename, filepath, embedding))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|||||||
@@ -20,6 +20,8 @@ app.mount("/ui", StaticFiles(directory=os.path.abspath(FRONTEND_DIR), html=True)
|
|||||||
|
|
||||||
@app.get("/search")
|
@app.get("/search")
|
||||||
def search(q: str = Query(...), limit: int = Query(12)):
|
def search(q: str = Query(...), limit: int = Query(12)):
|
||||||
|
# oracledb rejects a plain Python list for a VECTOR column.
|
||||||
|
# array.array("f") produces a typed 32-bit float buffer that matches VECTOR(512, FLOAT32).
|
||||||
vec = array.array("f", embed_text(q))
|
vec = array.array("f", embed_text(q))
|
||||||
conn = get_connection()
|
conn = get_connection()
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
# No embedder import — text embedding happens inside Oracle via VECTOR_EMBEDDING(CLIP_TXT).
|
||||||
|
# The only value Python passes to the database is the raw query string (:q).
|
||||||
import os
|
import os
|
||||||
from fastapi import FastAPI, Query
|
from fastapi import FastAPI, Query
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|||||||
@@ -4,14 +4,19 @@ from PIL import Image
|
|||||||
_model = None
|
_model = None
|
||||||
|
|
||||||
def _get_model():
|
def _get_model():
|
||||||
|
# Lazy load: the CLIP model is ~600 MB and takes several seconds to initialise.
|
||||||
|
# Loading on first call avoids the cost at import time and during indexing warmup.
|
||||||
global _model
|
global _model
|
||||||
if _model is None:
|
if _model is None:
|
||||||
_model = SentenceTransformer("clip-ViT-B-32")
|
_model = SentenceTransformer("clip-ViT-B-32")
|
||||||
return _model
|
return _model
|
||||||
|
|
||||||
def embed_image(path: str) -> list[float]:
|
def embed_image(path: str) -> list[float]:
|
||||||
|
# CLIP requires RGB — some JPEGs are stored as CMYK or grayscale.
|
||||||
img = Image.open(path).convert("RGB")
|
img = Image.open(path).convert("RGB")
|
||||||
return _get_model().encode(img).tolist()
|
return _get_model().encode(img).tolist()
|
||||||
|
|
||||||
def embed_text(text: str) -> list[float]:
|
def embed_text(text: str) -> list[float]:
|
||||||
|
# Text and images share the same 512-dimensional vector space in CLIP,
|
||||||
|
# so the returned vector is directly comparable to image embeddings.
|
||||||
return _get_model().encode(text).tolist()
|
return _get_model().encode(text).tolist()
|
||||||
|
|||||||
@@ -29,6 +29,9 @@ def search(q: str = Query(...), limit: int = Query(12)):
|
|||||||
ORDER BY embedding <=> %s::vector
|
ORDER BY embedding <=> %s::vector
|
||||||
LIMIT %s
|
LIMIT %s
|
||||||
""",
|
""",
|
||||||
|
# vec appears twice: once for ORDER BY (uses HNSW index), once for the score column.
|
||||||
|
# ::vector cast is required — psycopg2 passes the list as text without it.
|
||||||
|
# 1 - distance converts cosine distance (0=identical) to similarity (1=identical).
|
||||||
(vec, vec, limit),
|
(vec, vec, limit),
|
||||||
)
|
)
|
||||||
rows = cur.fetchall()
|
rows = cur.fetchall()
|
||||||
|
|||||||
Reference in New Issue
Block a user