Initial implementation of pgvector and Oracle 26ai vector search demo
Three FastAPI backends comparing PostgreSQL/pgvector and Oracle 26ai for semantic image search using CLIP embeddings: Python-side embedding for both databases, plus Oracle in-database embedding via VECTOR_EMBEDDING(CLIP_TXT). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,56 @@
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from db import get_connection
|
||||
from embedder import embed_image
|
||||
|
||||
load_dotenv()
|
||||
|
||||
PHOTOS_DIR = os.getenv("PHOTOS_DIR")
|
||||
|
||||
CREATE_TABLE = """
|
||||
CREATE TABLE IF NOT EXISTS images (
|
||||
id SERIAL PRIMARY KEY,
|
||||
filename TEXT NOT NULL UNIQUE,
|
||||
filepath TEXT NOT NULL,
|
||||
embedding vector(512)
|
||||
);
|
||||
"""
|
||||
|
||||
CREATE_INDEX = """
|
||||
CREATE INDEX IF NOT EXISTS images_embedding_idx
|
||||
ON images USING hnsw (embedding vector_cosine_ops);
|
||||
"""
|
||||
|
||||
INSERT = """
|
||||
INSERT INTO images (filename, filepath, embedding)
|
||||
VALUES (%s, %s, %s)
|
||||
ON CONFLICT (filename) DO NOTHING;
|
||||
"""
|
||||
|
||||
def main():
|
||||
conn = get_connection()
|
||||
cur = conn.cursor()
|
||||
cur.execute(CREATE_TABLE)
|
||||
cur.execute(CREATE_INDEX)
|
||||
conn.commit()
|
||||
|
||||
files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))]
|
||||
print(f"Found {len(files)} photos in {PHOTOS_DIR}")
|
||||
|
||||
for i, filename in enumerate(files, 1):
|
||||
filepath = os.path.join(PHOTOS_DIR, filename)
|
||||
cur.execute("SELECT 1 FROM images WHERE filename = %s", (filename,))
|
||||
if cur.fetchone():
|
||||
print(f"[{i}/{len(files)}] Skipping {filename} (already indexed)")
|
||||
continue
|
||||
embedding = embed_image(filepath)
|
||||
cur.execute(INSERT, (filename, filepath, embedding))
|
||||
conn.commit()
|
||||
print(f"[{i}/{len(files)}] Indexed {filename}")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
print("Done.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user