diff --git a/oravector-demo/sql/setup_vector_schema.sql b/oravector-demo/sql/setup_vector_schema.sql new file mode 100644 index 0000000..0db3dfd --- /dev/null +++ b/oravector-demo/sql/setup_vector_schema.sql @@ -0,0 +1,47 @@ +-- Oracle 26ai setup for the in-database embedding backend (port 8002) +-- Run as SYSDBA connected to FREEPDB1 +-- +-- Prerequisites: +-- - CLIP_TXT and CLIP_IMG ONNX model files must be present in the Oracle +-- VEC_DUMP directory before loading (see notes below) +-- - vector_memory_size must be set in SGA (512M recommended) +-- +-- Usage: +-- podman exec -it oracle.free sqlplus / as sysdba + +ALTER SESSION SET CONTAINER=FREEPDB1; + +-- Create VECTOR user +CREATE USER vector IDENTIFIED BY Vektor; +GRANT CONNECT, RESOURCE TO vector; +GRANT UNLIMITED TABLESPACE TO vector; +GRANT READ, WRITE ON DIRECTORY VEC_DUMP TO vector; + +-- Load ONNX models (run as VECTOR user or SYSDBA) +-- The .onnx files must be placed in the VEC_DUMP directory on the Oracle host +-- beforehand (typically /opt/oracle/dbs/vec_dump/ inside the container). +-- +-- CLIP_TXT: text encoder using CLS-token pooling (position 0). +-- Standard CLIP exports use EOS-token pooling (ArgMax), which Oracle rejects +-- because it introduces an ArgMax node on input_ids. This model must be +-- re-exported with CLS pooling to pass Oracle's ONNX validator. +-- +EXEC DBMS_VECTOR.LOAD_ONNX_MODEL('VEC_DUMP', 'clip_txt.onnx', 'CLIP_TXT', + JSON('{"function":"embedding","embeddingOutput":"output","input":{"input":["DATA"]}}')); + +EXEC DBMS_VECTOR.LOAD_ONNX_MODEL('VEC_DUMP', 'clip_img.onnx', 'CLIP_IMG', + JSON('{"function":"embedding","embeddingOutput":"output","input":{"input":["DATA"]}}')); + +-- Photo table (stores filename, image BLOB, and vector) +CREATE TABLE vector.foto_vektor ( + id NUMBER GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + filename VARCHAR2(100), + foto BLOB, + foto_vek VECTOR +); + +-- Grant SELECT on FOTO_VEKTOR to vectors_user so both backends share one table +-- (optional — remove if the schemas should remain isolated) +GRANT SELECT ON vector.foto_vektor TO vectors_user; + +EXIT; diff --git a/oravector-demo/sql/setup_vectors_user.sql b/oravector-demo/sql/setup_vectors_user.sql new file mode 100644 index 0000000..84bc6a5 --- /dev/null +++ b/oravector-demo/sql/setup_vectors_user.sql @@ -0,0 +1,38 @@ +-- Oracle 26ai setup for the Python-embedding backend (port 8001) +-- Run as SYSDBA connected to FREEPDB1 +-- +-- Usage (from host): +-- podman exec oracle.free bash -c "sqlplus -s / as sysdba @/tmp/setup_vectors_user.sql" +-- +-- Or interactively: +-- podman exec -it oracle.free sqlplus / as sysdba +-- ALTER SESSION SET CONTAINER=FREEPDB1; +-- @setup_vectors_user.sql + +ALTER SESSION SET CONTAINER=FREEPDB1; + +-- Create user +CREATE USER vectors_user IDENTIFIED BY Vektor; +GRANT CONNECT, RESOURCE TO vectors_user; +GRANT UNLIMITED TABLESPACE TO vectors_user; + +-- Switch to vectors_user and create schema objects +-- (run the following block connected as vectors_user) + +-- Table +CREATE TABLE vectors_user.images ( + id NUMBER GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + filename VARCHAR2(255) NOT NULL UNIQUE, + filepath VARCHAR2(1000) NOT NULL, + embedding VECTOR(512, FLOAT32) +); + +-- HNSW vector index (requires vector_memory_size > 0 in SGA) +CREATE VECTOR INDEX vectors_user.images_embedding_idx + ON vectors_user.images(embedding) + ORGANIZATION INMEMORY NEIGHBOR GRAPH + WITH DISTANCE COSINE + WITH TARGET ACCURACY 95 + PARAMETERS (type HNSW, neighbors 32, efconstruction 200); + +EXIT; diff --git a/pgvector-demo/sql/setup.sql b/pgvector-demo/sql/setup.sql new file mode 100644 index 0000000..edff597 --- /dev/null +++ b/pgvector-demo/sql/setup.sql @@ -0,0 +1,17 @@ +-- PostgreSQL setup for pgvector demo +-- Run as superuser (dl) against the vectors_demo database +-- +-- Usage: +-- docker exec -i postgresql-database-1 psql -U dl -d vectors_demo -f - < pgvector-demo/sql/setup.sql + +CREATE EXTENSION IF NOT EXISTS vector; + +CREATE TABLE IF NOT EXISTS images ( + id SERIAL PRIMARY KEY, + filename TEXT NOT NULL UNIQUE, + filepath TEXT NOT NULL, + embedding vector(512) +); + +CREATE INDEX IF NOT EXISTS images_embedding_idx + ON images USING hnsw (embedding vector_cosine_ops);