From d360ff1a7832af150f250398998fc9b3830651c6 Mon Sep 17 00:00:00 2001 From: Dierk Date: Tue, 19 May 2026 12:00:58 +0200 Subject: [PATCH] Add SQL setup scripts for all three backends - pgvector-demo/sql/setup.sql: enable pgvector extension, create images table and HNSW index - oravector-demo/sql/setup_vectors_user.sql: create vectors_user, images table and HNSW vector index - oravector-demo/sql/setup_vector_schema.sql: create VECTOR user, load CLIP ONNX models, create FOTO_VEKTOR table Co-Authored-By: Claude Sonnet 4.6 --- oravector-demo/sql/setup_vector_schema.sql | 47 ++++++++++++++++++++++ oravector-demo/sql/setup_vectors_user.sql | 38 +++++++++++++++++ pgvector-demo/sql/setup.sql | 17 ++++++++ 3 files changed, 102 insertions(+) create mode 100644 oravector-demo/sql/setup_vector_schema.sql create mode 100644 oravector-demo/sql/setup_vectors_user.sql create mode 100644 pgvector-demo/sql/setup.sql diff --git a/oravector-demo/sql/setup_vector_schema.sql b/oravector-demo/sql/setup_vector_schema.sql new file mode 100644 index 0000000..0db3dfd --- /dev/null +++ b/oravector-demo/sql/setup_vector_schema.sql @@ -0,0 +1,47 @@ +-- Oracle 26ai setup for the in-database embedding backend (port 8002) +-- Run as SYSDBA connected to FREEPDB1 +-- +-- Prerequisites: +-- - CLIP_TXT and CLIP_IMG ONNX model files must be present in the Oracle +-- VEC_DUMP directory before loading (see notes below) +-- - vector_memory_size must be set in SGA (512M recommended) +-- +-- Usage: +-- podman exec -it oracle.free sqlplus / as sysdba + +ALTER SESSION SET CONTAINER=FREEPDB1; + +-- Create VECTOR user +CREATE USER vector IDENTIFIED BY Vektor; +GRANT CONNECT, RESOURCE TO vector; +GRANT UNLIMITED TABLESPACE TO vector; +GRANT READ, WRITE ON DIRECTORY VEC_DUMP TO vector; + +-- Load ONNX models (run as VECTOR user or SYSDBA) +-- The .onnx files must be placed in the VEC_DUMP directory on the Oracle host +-- beforehand (typically /opt/oracle/dbs/vec_dump/ inside the container). +-- +-- CLIP_TXT: text encoder using CLS-token pooling (position 0). +-- Standard CLIP exports use EOS-token pooling (ArgMax), which Oracle rejects +-- because it introduces an ArgMax node on input_ids. This model must be +-- re-exported with CLS pooling to pass Oracle's ONNX validator. +-- +EXEC DBMS_VECTOR.LOAD_ONNX_MODEL('VEC_DUMP', 'clip_txt.onnx', 'CLIP_TXT', + JSON('{"function":"embedding","embeddingOutput":"output","input":{"input":["DATA"]}}')); + +EXEC DBMS_VECTOR.LOAD_ONNX_MODEL('VEC_DUMP', 'clip_img.onnx', 'CLIP_IMG', + JSON('{"function":"embedding","embeddingOutput":"output","input":{"input":["DATA"]}}')); + +-- Photo table (stores filename, image BLOB, and vector) +CREATE TABLE vector.foto_vektor ( + id NUMBER GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + filename VARCHAR2(100), + foto BLOB, + foto_vek VECTOR +); + +-- Grant SELECT on FOTO_VEKTOR to vectors_user so both backends share one table +-- (optional — remove if the schemas should remain isolated) +GRANT SELECT ON vector.foto_vektor TO vectors_user; + +EXIT; diff --git a/oravector-demo/sql/setup_vectors_user.sql b/oravector-demo/sql/setup_vectors_user.sql new file mode 100644 index 0000000..84bc6a5 --- /dev/null +++ b/oravector-demo/sql/setup_vectors_user.sql @@ -0,0 +1,38 @@ +-- Oracle 26ai setup for the Python-embedding backend (port 8001) +-- Run as SYSDBA connected to FREEPDB1 +-- +-- Usage (from host): +-- podman exec oracle.free bash -c "sqlplus -s / as sysdba @/tmp/setup_vectors_user.sql" +-- +-- Or interactively: +-- podman exec -it oracle.free sqlplus / as sysdba +-- ALTER SESSION SET CONTAINER=FREEPDB1; +-- @setup_vectors_user.sql + +ALTER SESSION SET CONTAINER=FREEPDB1; + +-- Create user +CREATE USER vectors_user IDENTIFIED BY Vektor; +GRANT CONNECT, RESOURCE TO vectors_user; +GRANT UNLIMITED TABLESPACE TO vectors_user; + +-- Switch to vectors_user and create schema objects +-- (run the following block connected as vectors_user) + +-- Table +CREATE TABLE vectors_user.images ( + id NUMBER GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + filename VARCHAR2(255) NOT NULL UNIQUE, + filepath VARCHAR2(1000) NOT NULL, + embedding VECTOR(512, FLOAT32) +); + +-- HNSW vector index (requires vector_memory_size > 0 in SGA) +CREATE VECTOR INDEX vectors_user.images_embedding_idx + ON vectors_user.images(embedding) + ORGANIZATION INMEMORY NEIGHBOR GRAPH + WITH DISTANCE COSINE + WITH TARGET ACCURACY 95 + PARAMETERS (type HNSW, neighbors 32, efconstruction 200); + +EXIT; diff --git a/pgvector-demo/sql/setup.sql b/pgvector-demo/sql/setup.sql new file mode 100644 index 0000000..edff597 --- /dev/null +++ b/pgvector-demo/sql/setup.sql @@ -0,0 +1,17 @@ +-- PostgreSQL setup for pgvector demo +-- Run as superuser (dl) against the vectors_demo database +-- +-- Usage: +-- docker exec -i postgresql-database-1 psql -U dl -d vectors_demo -f - < pgvector-demo/sql/setup.sql + +CREATE EXTENSION IF NOT EXISTS vector; + +CREATE TABLE IF NOT EXISTS images ( + id SERIAL PRIMARY KEY, + filename TEXT NOT NULL UNIQUE, + filepath TEXT NOT NULL, + embedding vector(512) +); + +CREATE INDEX IF NOT EXISTS images_embedding_idx + ON images USING hnsw (embedding vector_cosine_ops);