Initial implementation of pgvector and Oracle 26ai vector search demo

Three FastAPI backends comparing PostgreSQL/pgvector and Oracle 26ai for
semantic image search using CLIP embeddings: Python-side embedding for both
databases, plus Oracle in-database embedding via VECTOR_EMBEDDING(CLIP_TXT).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-19 11:33:16 +02:00
commit 66f7db40b0
15 changed files with 1347 additions and 0 deletions
+14
View File
@@ -0,0 +1,14 @@
import os
import psycopg2
from dotenv import load_dotenv
load_dotenv()
def get_connection():
return psycopg2.connect(
host=os.getenv("DB_HOST"),
port=os.getenv("DB_PORT"),
dbname=os.getenv("DB_NAME"),
user=os.getenv("DB_USER"),
password=os.getenv("DB_PASSWORD"),
)
+17
View File
@@ -0,0 +1,17 @@
from sentence_transformers import SentenceTransformer
from PIL import Image
_model = None
def _get_model():
global _model
if _model is None:
_model = SentenceTransformer("clip-ViT-B-32")
return _model
def embed_image(path: str) -> list[float]:
img = Image.open(path).convert("RGB")
return _get_model().encode(img).tolist()
def embed_text(text: str) -> list[float]:
return _get_model().encode(text).tolist()
+56
View File
@@ -0,0 +1,56 @@
import os
from dotenv import load_dotenv
from db import get_connection
from embedder import embed_image
load_dotenv()
PHOTOS_DIR = os.getenv("PHOTOS_DIR")
CREATE_TABLE = """
CREATE TABLE IF NOT EXISTS images (
id SERIAL PRIMARY KEY,
filename TEXT NOT NULL UNIQUE,
filepath TEXT NOT NULL,
embedding vector(512)
);
"""
CREATE_INDEX = """
CREATE INDEX IF NOT EXISTS images_embedding_idx
ON images USING hnsw (embedding vector_cosine_ops);
"""
INSERT = """
INSERT INTO images (filename, filepath, embedding)
VALUES (%s, %s, %s)
ON CONFLICT (filename) DO NOTHING;
"""
def main():
conn = get_connection()
cur = conn.cursor()
cur.execute(CREATE_TABLE)
cur.execute(CREATE_INDEX)
conn.commit()
files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))]
print(f"Found {len(files)} photos in {PHOTOS_DIR}")
for i, filename in enumerate(files, 1):
filepath = os.path.join(PHOTOS_DIR, filename)
cur.execute("SELECT 1 FROM images WHERE filename = %s", (filename,))
if cur.fetchone():
print(f"[{i}/{len(files)}] Skipping {filename} (already indexed)")
continue
embedding = embed_image(filepath)
cur.execute(INSERT, (filename, filepath, embedding))
conn.commit()
print(f"[{i}/{len(files)}] Indexed {filename}")
cur.close()
conn.close()
print("Done.")
if __name__ == "__main__":
main()
+48
View File
@@ -0,0 +1,48 @@
import os
from fastapi import FastAPI, Query
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from dotenv import load_dotenv
from db import get_connection
from embedder import embed_text
load_dotenv()
PHOTOS_DIR = os.getenv("PHOTOS_DIR")
app = FastAPI()
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
@app.get("/search")
def search(q: str = Query(...), limit: int = Query(12)):
vec = embed_text(q)
conn = get_connection()
cur = conn.cursor()
cur.execute(
"""
SELECT filename, 1 - (embedding <=> %s::vector) AS score
FROM images
ORDER BY embedding <=> %s::vector
LIMIT %s
""",
(vec, vec, limit),
)
rows = cur.fetchall()
cur.close()
conn.close()
return [{"filename": r[0], "score": round(r[1], 4)} for r in rows]
@app.get("/stats")
def stats():
conn = get_connection()
cur = conn.cursor()
cur.execute("SELECT COUNT(*) FROM images")
count = cur.fetchone()[0]
cur.close()
conn.close()
return {"count": count}
@app.get("/photos/{filename}")
def get_photo(filename: str):
path = os.path.join(PHOTOS_DIR, filename)
return FileResponse(path, media_type="image/jpeg")