Initial implementation of pgvector and Oracle 26ai vector search demo

Three FastAPI backends comparing PostgreSQL/pgvector and Oracle 26ai for
semantic image search using CLIP embeddings: Python-side embedding for both
databases, plus Oracle in-database embedding via VECTOR_EMBEDDING(CLIP_TXT).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-19 11:33:16 +02:00
commit 66f7db40b0
15 changed files with 1347 additions and 0 deletions
+14
View File
@@ -0,0 +1,14 @@
import os
import psycopg2
from dotenv import load_dotenv
load_dotenv()
def get_connection():
return psycopg2.connect(
host=os.getenv("DB_HOST"),
port=os.getenv("DB_PORT"),
dbname=os.getenv("DB_NAME"),
user=os.getenv("DB_USER"),
password=os.getenv("DB_PASSWORD"),
)
+17
View File
@@ -0,0 +1,17 @@
from sentence_transformers import SentenceTransformer
from PIL import Image
_model = None
def _get_model():
global _model
if _model is None:
_model = SentenceTransformer("clip-ViT-B-32")
return _model
def embed_image(path: str) -> list[float]:
img = Image.open(path).convert("RGB")
return _get_model().encode(img).tolist()
def embed_text(text: str) -> list[float]:
return _get_model().encode(text).tolist()
+56
View File
@@ -0,0 +1,56 @@
import os
from dotenv import load_dotenv
from db import get_connection
from embedder import embed_image
load_dotenv()
PHOTOS_DIR = os.getenv("PHOTOS_DIR")
CREATE_TABLE = """
CREATE TABLE IF NOT EXISTS images (
id SERIAL PRIMARY KEY,
filename TEXT NOT NULL UNIQUE,
filepath TEXT NOT NULL,
embedding vector(512)
);
"""
CREATE_INDEX = """
CREATE INDEX IF NOT EXISTS images_embedding_idx
ON images USING hnsw (embedding vector_cosine_ops);
"""
INSERT = """
INSERT INTO images (filename, filepath, embedding)
VALUES (%s, %s, %s)
ON CONFLICT (filename) DO NOTHING;
"""
def main():
conn = get_connection()
cur = conn.cursor()
cur.execute(CREATE_TABLE)
cur.execute(CREATE_INDEX)
conn.commit()
files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))]
print(f"Found {len(files)} photos in {PHOTOS_DIR}")
for i, filename in enumerate(files, 1):
filepath = os.path.join(PHOTOS_DIR, filename)
cur.execute("SELECT 1 FROM images WHERE filename = %s", (filename,))
if cur.fetchone():
print(f"[{i}/{len(files)}] Skipping {filename} (already indexed)")
continue
embedding = embed_image(filepath)
cur.execute(INSERT, (filename, filepath, embedding))
conn.commit()
print(f"[{i}/{len(files)}] Indexed {filename}")
cur.close()
conn.close()
print("Done.")
if __name__ == "__main__":
main()
+48
View File
@@ -0,0 +1,48 @@
import os
from fastapi import FastAPI, Query
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from dotenv import load_dotenv
from db import get_connection
from embedder import embed_text
load_dotenv()
PHOTOS_DIR = os.getenv("PHOTOS_DIR")
app = FastAPI()
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
@app.get("/search")
def search(q: str = Query(...), limit: int = Query(12)):
vec = embed_text(q)
conn = get_connection()
cur = conn.cursor()
cur.execute(
"""
SELECT filename, 1 - (embedding <=> %s::vector) AS score
FROM images
ORDER BY embedding <=> %s::vector
LIMIT %s
""",
(vec, vec, limit),
)
rows = cur.fetchall()
cur.close()
conn.close()
return [{"filename": r[0], "score": round(r[1], 4)} for r in rows]
@app.get("/stats")
def stats():
conn = get_connection()
cur = conn.cursor()
cur.execute("SELECT COUNT(*) FROM images")
count = cur.fetchone()[0]
cur.close()
conn.close()
return {"count": count}
@app.get("/photos/{filename}")
def get_photo(filename: str):
path = os.path.join(PHOTOS_DIR, filename)
return FileResponse(path, media_type="image/jpeg")
+179
View File
@@ -0,0 +1,179 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Vector Image Search — pgvector</title>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body { font-family: system-ui, sans-serif; background: #f5f5f5; color: #222; }
header {
background: #1a56db;
color: white;
padding: 1.2rem 2rem;
display: flex;
align-items: center;
gap: 1rem;
}
header h1 { font-size: 1.4rem; font-weight: 600; }
.badge {
background: white;
color: #1a56db;
font-size: 0.75rem;
font-weight: 700;
padding: 0.2rem 0.6rem;
border-radius: 999px;
}
.search-area {
max-width: 700px;
margin: 2rem auto 1rem;
padding: 0 1rem;
}
.search-row {
display: flex;
gap: 0.5rem;
}
input[type="text"] {
flex: 1;
padding: 0.7rem 1rem;
font-size: 1rem;
border: 1px solid #ccc;
border-radius: 6px;
}
button.search-btn {
padding: 0.7rem 1.4rem;
background: #1a56db;
color: white;
border: none;
border-radius: 6px;
font-size: 1rem;
cursor: pointer;
}
button.search-btn:hover { background: #1648c0; }
.chips {
display: flex;
flex-wrap: wrap;
gap: 0.4rem;
margin-top: 0.8rem;
}
.chip {
padding: 0.3rem 0.8rem;
background: white;
border: 1px solid #ccc;
border-radius: 999px;
font-size: 0.85rem;
cursor: pointer;
}
.chip:hover { background: #e8eef9; border-color: #1a56db; }
.stats { text-align: center; color: #666; font-size: 0.85rem; margin-bottom: 1rem; }
.grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
gap: 1rem;
max-width: 1200px;
margin: 0 auto;
padding: 0 1rem 2rem;
}
.card {
background: white;
border-radius: 8px;
overflow: hidden;
box-shadow: 0 1px 4px rgba(0,0,0,0.1);
}
.card img {
width: 100%;
height: 140px;
object-fit: cover;
display: block;
}
.card-info {
padding: 0.5rem 0.7rem;
font-size: 0.8rem;
}
.card-info .score {
font-weight: 700;
color: #1a56db;
}
.card-info .name {
color: #555;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.empty { text-align: center; color: #999; margin-top: 3rem; font-size: 1rem; }
</style>
</head>
<body>
<header>
<h1>Vector Image Search</h1>
<span class="badge">pgvector</span>
</header>
<div class="search-area">
<div class="search-row">
<input id="query" type="text" placeholder="Search photos, e.g. trees, water, night…" />
<button class="search-btn" onclick="doSearch()">Search</button>
</div>
<div class="chips">
<span class="chip" onclick="setQuery('trees')">trees</span>
<span class="chip" onclick="setQuery('water')">water</span>
<span class="chip" onclick="setQuery('people')">people</span>
<span class="chip" onclick="setQuery('buildings')">buildings</span>
<span class="chip" onclick="setQuery('sky')">sky</span>
<span class="chip" onclick="setQuery('street')">street</span>
<span class="chip" onclick="setQuery('night')">night</span>
<span class="chip" onclick="setQuery('cars')">cars</span>
</div>
</div>
<p class="stats" id="stats"></p>
<div class="grid" id="grid"><p class="empty">Enter a search term above.</p></div>
<script>
const API = "http://localhost:8000";
fetch(`${API}/stats`)
.then(r => r.json())
.then(d => document.getElementById("stats").textContent = `${d.count} photos indexed`);
document.getElementById("query").addEventListener("keydown", e => {
if (e.key === "Enter") doSearch();
});
function setQuery(text) {
document.getElementById("query").value = text;
doSearch();
}
function doSearch() {
const q = document.getElementById("query").value.trim();
if (!q) return;
fetch(`${API}/search?q=${encodeURIComponent(q)}&limit=12`)
.then(r => r.json())
.then(renderResults);
}
function renderResults(results) {
const grid = document.getElementById("grid");
if (!results.length) {
grid.innerHTML = '<p class="empty">No results found.</p>';
return;
}
grid.innerHTML = results.map(r => `
<div class="card">
<img src="${API}/photos/${encodeURIComponent(r.filename)}" alt="${r.filename}" loading="lazy" />
<div class="card-info">
<div class="score">${(r.score * 100).toFixed(1)}% match</div>
<div class="name">${r.filename}</div>
</div>
</div>
`).join("");
}
</script>
</body>
</html>