Initial implementation of pgvector and Oracle 26ai vector search demo
Three FastAPI backends comparing PostgreSQL/pgvector and Oracle 26ai for semantic image search using CLIP embeddings: Python-side embedding for both databases, plus Oracle in-database embedding via VECTOR_EMBEDDING(CLIP_TXT). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,19 @@
|
||||
import os
|
||||
import oracledb
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
def get_connection():
|
||||
return oracledb.connect(
|
||||
user=os.getenv("ORA_USER"),
|
||||
password=os.getenv("ORA_PASSWORD"),
|
||||
dsn=f"{os.getenv('ORA_HOST')}:{os.getenv('ORA_PORT')}/{os.getenv('ORA_SERVICE')}",
|
||||
)
|
||||
|
||||
def get_connection_indb():
|
||||
return oracledb.connect(
|
||||
user=os.getenv("ORA_USER_INDB"),
|
||||
password=os.getenv("ORA_PASSWORD_INDB"),
|
||||
dsn=f"{os.getenv('ORA_HOST')}:{os.getenv('ORA_PORT')}/{os.getenv('ORA_SERVICE')}",
|
||||
)
|
||||
@@ -0,0 +1,17 @@
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from PIL import Image
|
||||
|
||||
_model = None
|
||||
|
||||
def _get_model():
|
||||
global _model
|
||||
if _model is None:
|
||||
_model = SentenceTransformer("clip-ViT-B-32")
|
||||
return _model
|
||||
|
||||
def embed_image(path: str) -> list[float]:
|
||||
img = Image.open(path).convert("RGB")
|
||||
return _get_model().encode(img).tolist()
|
||||
|
||||
def embed_text(text: str) -> list[float]:
|
||||
return _get_model().encode(text).tolist()
|
||||
@@ -0,0 +1,66 @@
|
||||
import os
|
||||
import array
|
||||
from dotenv import load_dotenv
|
||||
from db_oracle import get_connection
|
||||
from embedder import embed_image
|
||||
|
||||
load_dotenv()
|
||||
|
||||
PHOTOS_DIR = os.getenv("PHOTOS_DIR")
|
||||
|
||||
CREATE_TABLE = """
|
||||
CREATE TABLE images (
|
||||
id NUMBER GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
|
||||
filename VARCHAR2(255) NOT NULL UNIQUE,
|
||||
filepath VARCHAR2(1000) NOT NULL,
|
||||
embedding VECTOR(512, FLOAT32)
|
||||
)
|
||||
"""
|
||||
|
||||
CREATE_INDEX = """
|
||||
CREATE VECTOR INDEX images_embedding_idx
|
||||
ON images(embedding)
|
||||
ORGANIZATION INMEMORY NEIGHBOR GRAPH
|
||||
WITH DISTANCE COSINE
|
||||
WITH TARGET ACCURACY 95
|
||||
PARAMETERS (type HNSW, neighbors 32, efconstruction 200)
|
||||
"""
|
||||
|
||||
INSERT = "INSERT INTO images (filename, filepath, embedding) VALUES (:1, :2, :3)"
|
||||
|
||||
def table_exists(cur):
|
||||
cur.execute("SELECT COUNT(*) FROM user_tables WHERE table_name = 'IMAGES'")
|
||||
return cur.fetchone()[0] > 0
|
||||
|
||||
def main():
|
||||
conn = get_connection()
|
||||
cur = conn.cursor()
|
||||
|
||||
if not table_exists(cur):
|
||||
cur.execute(CREATE_TABLE)
|
||||
cur.execute(CREATE_INDEX)
|
||||
conn.commit()
|
||||
print("Table and index created.")
|
||||
else:
|
||||
print("Table already exists, skipping creation.")
|
||||
|
||||
files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))]
|
||||
print(f"Found {len(files)} photos in {PHOTOS_DIR}")
|
||||
|
||||
for i, filename in enumerate(files, 1):
|
||||
filepath = os.path.join(PHOTOS_DIR, filename)
|
||||
cur.execute("SELECT 1 FROM images WHERE filename = :1", (filename,))
|
||||
if cur.fetchone():
|
||||
print(f"[{i}/{len(files)}] Skipping {filename} (already indexed)")
|
||||
continue
|
||||
embedding = array.array("f", embed_image(filepath))
|
||||
cur.execute(INSERT, (filename, filepath, embedding))
|
||||
conn.commit()
|
||||
print(f"[{i}/{len(files)}] Indexed {filename}")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
print("Done.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,49 @@
|
||||
import os
|
||||
import array
|
||||
from fastapi import FastAPI, Query
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse
|
||||
from dotenv import load_dotenv
|
||||
from db_oracle import get_connection
|
||||
from embedder import embed_text
|
||||
|
||||
load_dotenv()
|
||||
|
||||
PHOTOS_DIR = os.getenv("PHOTOS_DIR")
|
||||
|
||||
app = FastAPI()
|
||||
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
|
||||
|
||||
@app.get("/search")
|
||||
def search(q: str = Query(...), limit: int = Query(12)):
|
||||
vec = array.array("f", embed_text(q))
|
||||
conn = get_connection()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT filename, 1 - VECTOR_DISTANCE(embedding, :vec, COSINE) AS score
|
||||
FROM images
|
||||
ORDER BY VECTOR_DISTANCE(embedding, :vec, COSINE)
|
||||
FETCH FIRST :lim ROWS ONLY
|
||||
""",
|
||||
{"vec": vec, "lim": limit},
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
cur.close()
|
||||
conn.close()
|
||||
return [{"filename": r[0], "score": round(r[1], 4)} for r in rows]
|
||||
|
||||
@app.get("/stats")
|
||||
def stats():
|
||||
conn = get_connection()
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT COUNT(*) FROM images")
|
||||
count = cur.fetchone()[0]
|
||||
cur.close()
|
||||
conn.close()
|
||||
return {"count": count}
|
||||
|
||||
@app.get("/photos/{filename}")
|
||||
def get_photo(filename: str):
|
||||
path = os.path.join(PHOTOS_DIR, filename)
|
||||
return FileResponse(path, media_type="image/jpeg")
|
||||
@@ -0,0 +1,55 @@
|
||||
import os
|
||||
from fastapi import FastAPI, Query
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse
|
||||
from dotenv import load_dotenv
|
||||
from db_oracle import get_connection_indb
|
||||
|
||||
load_dotenv()
|
||||
|
||||
PHOTOS_DIR = os.getenv("PHOTOS_DIR")
|
||||
|
||||
app = FastAPI()
|
||||
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
|
||||
|
||||
@app.get("/search")
|
||||
def search(q: str = Query(...), limit: int = Query(12)):
|
||||
conn = get_connection_indb()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT filename,
|
||||
1 - VECTOR_DISTANCE(
|
||||
foto_vek,
|
||||
VECTOR_EMBEDDING(CLIP_TXT USING :q AS data),
|
||||
COSINE
|
||||
) AS score
|
||||
FROM VECTOR.FOTO_VEKTOR
|
||||
ORDER BY VECTOR_DISTANCE(
|
||||
foto_vek,
|
||||
VECTOR_EMBEDDING(CLIP_TXT USING :q AS data),
|
||||
COSINE
|
||||
)
|
||||
FETCH FIRST :lim ROWS ONLY
|
||||
""",
|
||||
{"q": q, "lim": limit},
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
cur.close()
|
||||
conn.close()
|
||||
return [{"filename": r[0], "score": round(r[1], 4)} for r in rows]
|
||||
|
||||
@app.get("/stats")
|
||||
def stats():
|
||||
conn = get_connection_indb()
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT COUNT(*) FROM VECTOR.FOTO_VEKTOR")
|
||||
count = cur.fetchone()[0]
|
||||
cur.close()
|
||||
conn.close()
|
||||
return {"count": count}
|
||||
|
||||
@app.get("/photos/{filename}")
|
||||
def get_photo(filename: str):
|
||||
path = os.path.join(PHOTOS_DIR, filename)
|
||||
return FileResponse(path, media_type="image/jpeg")
|
||||
@@ -0,0 +1,179 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Vector Image Search — Oracle 26ai</title>
|
||||
<style>
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: system-ui, sans-serif; background: #f5f5f5; color: #222; }
|
||||
|
||||
header {
|
||||
background: #c74634;
|
||||
color: white;
|
||||
padding: 1.2rem 2rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 1rem;
|
||||
}
|
||||
header h1 { font-size: 1.4rem; font-weight: 600; }
|
||||
.badge {
|
||||
background: white;
|
||||
color: #c74634;
|
||||
font-size: 0.75rem;
|
||||
font-weight: 700;
|
||||
padding: 0.2rem 0.6rem;
|
||||
border-radius: 999px;
|
||||
}
|
||||
|
||||
.search-area {
|
||||
max-width: 700px;
|
||||
margin: 2rem auto 1rem;
|
||||
padding: 0 1rem;
|
||||
}
|
||||
.search-row {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
input[type="text"] {
|
||||
flex: 1;
|
||||
padding: 0.7rem 1rem;
|
||||
font-size: 1rem;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 6px;
|
||||
}
|
||||
button.search-btn {
|
||||
padding: 0.7rem 1.4rem;
|
||||
background: #c74634;
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 6px;
|
||||
font-size: 1rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
button.search-btn:hover { background: #a83929; }
|
||||
|
||||
.chips {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.4rem;
|
||||
margin-top: 0.8rem;
|
||||
}
|
||||
.chip {
|
||||
padding: 0.3rem 0.8rem;
|
||||
background: white;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 999px;
|
||||
font-size: 0.85rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
.chip:hover { background: #fcecea; border-color: #c74634; }
|
||||
|
||||
.stats { text-align: center; color: #666; font-size: 0.85rem; margin-bottom: 1rem; }
|
||||
|
||||
.grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
|
||||
gap: 1rem;
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
padding: 0 1rem 2rem;
|
||||
}
|
||||
.card {
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
box-shadow: 0 1px 4px rgba(0,0,0,0.1);
|
||||
}
|
||||
.card img {
|
||||
width: 100%;
|
||||
height: 140px;
|
||||
object-fit: cover;
|
||||
display: block;
|
||||
}
|
||||
.card-info {
|
||||
padding: 0.5rem 0.7rem;
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
.card-info .score {
|
||||
font-weight: 700;
|
||||
color: #c74634;
|
||||
}
|
||||
.card-info .name {
|
||||
color: #555;
|
||||
white-space: nowrap;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}
|
||||
.empty { text-align: center; color: #999; margin-top: 3rem; font-size: 1rem; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1>Vector Image Search</h1>
|
||||
<span class="badge">Oracle 26ai</span>
|
||||
</header>
|
||||
|
||||
<div class="search-area">
|
||||
<div class="search-row">
|
||||
<input id="query" type="text" placeholder="Search photos, e.g. trees, water, night…" />
|
||||
<button class="search-btn" onclick="doSearch()">Search</button>
|
||||
</div>
|
||||
<div class="chips">
|
||||
<span class="chip" onclick="setQuery('trees')">trees</span>
|
||||
<span class="chip" onclick="setQuery('water')">water</span>
|
||||
<span class="chip" onclick="setQuery('people')">people</span>
|
||||
<span class="chip" onclick="setQuery('buildings')">buildings</span>
|
||||
<span class="chip" onclick="setQuery('sky')">sky</span>
|
||||
<span class="chip" onclick="setQuery('street')">street</span>
|
||||
<span class="chip" onclick="setQuery('night')">night</span>
|
||||
<span class="chip" onclick="setQuery('cars')">cars</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p class="stats" id="stats"></p>
|
||||
<div class="grid" id="grid"><p class="empty">Enter a search term above.</p></div>
|
||||
|
||||
<script>
|
||||
const API = "http://localhost:8001";
|
||||
|
||||
fetch(`${API}/stats`)
|
||||
.then(r => r.json())
|
||||
.then(d => document.getElementById("stats").textContent = `${d.count} photos indexed`);
|
||||
|
||||
document.getElementById("query").addEventListener("keydown", e => {
|
||||
if (e.key === "Enter") doSearch();
|
||||
});
|
||||
|
||||
function setQuery(text) {
|
||||
document.getElementById("query").value = text;
|
||||
doSearch();
|
||||
}
|
||||
|
||||
function doSearch() {
|
||||
const q = document.getElementById("query").value.trim();
|
||||
if (!q) return;
|
||||
fetch(`${API}/search?q=${encodeURIComponent(q)}&limit=12`)
|
||||
.then(r => r.json())
|
||||
.then(renderResults);
|
||||
}
|
||||
|
||||
function renderResults(results) {
|
||||
const grid = document.getElementById("grid");
|
||||
if (!results.length) {
|
||||
grid.innerHTML = '<p class="empty">No results found.</p>';
|
||||
return;
|
||||
}
|
||||
grid.innerHTML = results.map(r => `
|
||||
<div class="card">
|
||||
<img src="${API}/photos/${encodeURIComponent(r.filename)}" alt="${r.filename}" loading="lazy" />
|
||||
<div class="card-info">
|
||||
<div class="score">${(r.score * 100).toFixed(1)}% match</div>
|
||||
<div class="name">${r.filename}</div>
|
||||
</div>
|
||||
</div>
|
||||
`).join("");
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,179 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Vector Image Search — Oracle In-DB</title>
|
||||
<style>
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: system-ui, sans-serif; background: #f5f5f5; color: #222; }
|
||||
|
||||
header {
|
||||
background: #7b5ea7;
|
||||
color: white;
|
||||
padding: 1.2rem 2rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 1rem;
|
||||
}
|
||||
header h1 { font-size: 1.4rem; font-weight: 600; }
|
||||
.badge {
|
||||
background: white;
|
||||
color: #7b5ea7;
|
||||
font-size: 0.75rem;
|
||||
font-weight: 700;
|
||||
padding: 0.2rem 0.6rem;
|
||||
border-radius: 999px;
|
||||
}
|
||||
|
||||
.search-area {
|
||||
max-width: 700px;
|
||||
margin: 2rem auto 1rem;
|
||||
padding: 0 1rem;
|
||||
}
|
||||
.search-row {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
input[type="text"] {
|
||||
flex: 1;
|
||||
padding: 0.7rem 1rem;
|
||||
font-size: 1rem;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 6px;
|
||||
}
|
||||
button.search-btn {
|
||||
padding: 0.7rem 1.4rem;
|
||||
background: #7b5ea7;
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 6px;
|
||||
font-size: 1rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
button.search-btn:hover { background: #664e8d; }
|
||||
|
||||
.chips {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.4rem;
|
||||
margin-top: 0.8rem;
|
||||
}
|
||||
.chip {
|
||||
padding: 0.3rem 0.8rem;
|
||||
background: white;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 999px;
|
||||
font-size: 0.85rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
.chip:hover { background: #f3f0f8; border-color: #7b5ea7; }
|
||||
|
||||
.stats { text-align: center; color: #666; font-size: 0.85rem; margin-bottom: 1rem; }
|
||||
|
||||
.grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
|
||||
gap: 1rem;
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
padding: 0 1rem 2rem;
|
||||
}
|
||||
.card {
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
box-shadow: 0 1px 4px rgba(0,0,0,0.1);
|
||||
}
|
||||
.card img {
|
||||
width: 100%;
|
||||
height: 140px;
|
||||
object-fit: cover;
|
||||
display: block;
|
||||
}
|
||||
.card-info {
|
||||
padding: 0.5rem 0.7rem;
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
.card-info .score {
|
||||
font-weight: 700;
|
||||
color: #7b5ea7;
|
||||
}
|
||||
.card-info .name {
|
||||
color: #555;
|
||||
white-space: nowrap;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}
|
||||
.empty { text-align: center; color: #999; margin-top: 3rem; font-size: 1rem; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1>Vector Image Search</h1>
|
||||
<span class="badge">Oracle In-DB</span>
|
||||
</header>
|
||||
|
||||
<div class="search-area">
|
||||
<div class="search-row">
|
||||
<input id="query" type="text" placeholder="Search photos, e.g. trees, water, night…" />
|
||||
<button class="search-btn" onclick="doSearch()">Search</button>
|
||||
</div>
|
||||
<div class="chips">
|
||||
<span class="chip" onclick="setQuery('trees')">trees</span>
|
||||
<span class="chip" onclick="setQuery('water')">water</span>
|
||||
<span class="chip" onclick="setQuery('people')">people</span>
|
||||
<span class="chip" onclick="setQuery('buildings')">buildings</span>
|
||||
<span class="chip" onclick="setQuery('sky')">sky</span>
|
||||
<span class="chip" onclick="setQuery('street')">street</span>
|
||||
<span class="chip" onclick="setQuery('night')">night</span>
|
||||
<span class="chip" onclick="setQuery('cars')">cars</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p class="stats" id="stats"></p>
|
||||
<div class="grid" id="grid"><p class="empty">Enter a search term above.</p></div>
|
||||
|
||||
<script>
|
||||
const API = "http://localhost:8002";
|
||||
|
||||
fetch(`${API}/stats`)
|
||||
.then(r => r.json())
|
||||
.then(d => document.getElementById("stats").textContent = `${d.count} photos indexed`);
|
||||
|
||||
document.getElementById("query").addEventListener("keydown", e => {
|
||||
if (e.key === "Enter") doSearch();
|
||||
});
|
||||
|
||||
function setQuery(text) {
|
||||
document.getElementById("query").value = text;
|
||||
doSearch();
|
||||
}
|
||||
|
||||
function doSearch() {
|
||||
const q = document.getElementById("query").value.trim();
|
||||
if (!q) return;
|
||||
fetch(`${API}/search?q=${encodeURIComponent(q)}&limit=12`)
|
||||
.then(r => r.json())
|
||||
.then(renderResults);
|
||||
}
|
||||
|
||||
function renderResults(results) {
|
||||
const grid = document.getElementById("grid");
|
||||
if (!results.length) {
|
||||
grid.innerHTML = '<p class="empty">No results found.</p>';
|
||||
return;
|
||||
}
|
||||
grid.innerHTML = results.map(r => `
|
||||
<div class="card">
|
||||
<img src="${API}/photos/${encodeURIComponent(r.filename)}" alt="${r.filename}" loading="lazy" />
|
||||
<div class="card-info">
|
||||
<div class="score">${(r.score * 100).toFixed(1)}% match</div>
|
||||
<div class="name">${r.filename}</div>
|
||||
</div>
|
||||
</div>
|
||||
`).join("");
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user