Update README with all recent changes

- Project structure: add index_images_indb.py - Architecture: fix schema names (VECTORS_USER/VECTOR), HNSW for all three - Database schemas: separate sections for VECTORS_USER and VECTOR, photo storage differences - Indexing scripts: three-way comparison table, measured avg times (12.1s/12.1s/13.6s) - ORA-24816 workaround documented - Performance comparison: real benchmark numbers, HNSW for in-DB, photo storage row - Oracle in-DB section: HNSW index creation, index_images_indb.py for population - Re-index section: add index_images_indb.py Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Add in-DB indexing script, benchmark results, schema names in presentation
2026-05-20 11:17:27 +02:00 · 2026-05-20 10:42:13 +02:00 · 2026-05-19 15:19:37 +02:00
6 changed files with 413 additions and 89 deletions
@@ -3,3 +3,5 @@ __pycache__/
 photos/
 .~lock.*
 present.sh
+benchmark.sh
+diagrams/
@@ -40,8 +40,8 @@ ML library is loaded or called at search time.
  │  PostgreSQL 18       │    │  Oracle 26ai         │    │  Oracle 26ai          │
  │  + pgvector 0.8.2    │    │  (version 23.26.1)   │    │  (version 23.26.1)    │
  │  database:           │    │  PDB: FREEPDB1       │    │  PDB: FREEPDB1        │
-  │  vectors_demo        │    │  user: vectors_user  │    │  schema: VECTOR       │
-  │  HNSW index          │    │  HNSW index          │    │  HNSW not needed      │
+  │  vectors_demo        │    │  schema: VECTORS_USER│    │  schema: VECTOR       │
+  │  HNSW index          │    │  HNSW index          │    │  HNSW index           │
  └────────┬─────────────┘    └──────────┬───────────┘    └──────────┬────────────┘
           │                             │                            │
           ▼                             ▼                            │
@@ -88,7 +88,8 @@ vector-search-demo/
    │   ├── .env                     # Oracle credentials, photo path
    │   ├── db_oracle.py             # Oracle connection factory
    │   ├── embedder.py              # CLIP model wrapper (identical to pgvector)
-    │   ├── index_images_oracle.py   # One-time indexing script (Python embedding)
+    │   ├── index_images_oracle.py   # One-time indexing script (Python embedding, VECTORS_USER)
+    │   ├── index_images_indb.py     # One-time indexing script (in-DB embedding, VECTOR schema)
    │   ├── main_oracle.py           # FastAPI app — Python embedding (port 8001)
    │   └── main_oracle_indb.py      # FastAPI app — in-database embedding (port 8002)
    └── frontend/
@@ -130,7 +131,7 @@ The `pgvector/pgvector:pg18` image includes pgvector pre-installed. See the
 | Container name | `oracle.free` |
 | Host port | 37611 (mapped to 1521 inside container) |
 | Pluggable Database | FREEPDB1 |
-| Schema users | `vectors_user`, `VECTOR` |
+| Schema users | `VECTORS_USER`, `VECTOR` |

 **Oracle vector memory** — the HNSW index is held entirely in the SGA's Vector
 Memory Area. This is already configured:
@@ -215,10 +216,11 @@ CREATE INDEX images_embedding_idx
    ON images USING hnsw (embedding vector_cosine_ops);
 ```

-### Oracle 26ai
+### Oracle 26ai — schema VECTORS_USER (Python embedding backend)

 ```sql
-- PDB: FREEPDB1, user: vectors_user
+-- PDB: FREEPDB1, schema: VECTORS_USER
+-- Photos stored as file paths on the app server filesystem

 CREATE TABLE images (
    id        NUMBER GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
@@ -235,6 +237,36 @@ CREATE VECTOR INDEX images_embedding_idx
    PARAMETERS (type HNSW, neighbors 32, efconstruction 200);
 ```

+### Oracle 26ai — schema VECTOR (in-database embedding backend)
+
+```sql
+-- PDB: FREEPDB1, schema: VECTOR
+-- Photos stored as BLOBs inside Oracle — no filesystem access at query time
+
+CREATE TABLE foto_vektor (
+    id        NUMBER GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
+    filename  VARCHAR2(100),
+    foto      BLOB,                  -- full JPEG stored in Oracle
+    foto_vek  VECTOR                 -- embedding computed by CLIP_IMG ONNX model
+);
+
+CREATE VECTOR INDEX foto_vektor_idx
+    ON foto_vektor(foto_vek)
+    ORGANIZATION INMEMORY NEIGHBOR GRAPH
+    WITH DISTANCE COSINE
+    WITH TARGET ACCURACY 95
+    PARAMETERS (type HNSW, neighbors 32, efconstruction 200);
+```
+
+**Key difference between the two Oracle schemas:**
+
+| Aspect | VECTORS_USER | VECTOR |
+|---|---|---|
+| Photo storage | File path (filesystem) | BLOB (inside Oracle) |
+| Embedding at index time | Python CLIP | Oracle `VECTOR_EMBEDDING(CLIP_IMG)` |
+| Embedding at query time | Python CLIP | Oracle `VECTOR_EMBEDDING(CLIP_TXT)` |
+| Indexed by | `index_images_oracle.py` | `index_images_indb.py` |
+
 **Key schema differences:**

 | Aspect | PostgreSQL/pgvector | Oracle 26ai |
@@ -268,21 +300,29 @@ Runs in **thin mode** — no Oracle Instant Client installation is required on t

 ### Indexing scripts

-Both scripts are idempotent: they check for existing rows and skip already-indexed
+All three scripts are idempotent: they check for existing rows and skip already-indexed
 photos. Each photo is committed individually so a crash does not lose prior work.

-| | `index_images.py` | `index_images_oracle.py` |
-|---|---|---|
-| Run command | `python3 index_images.py` | `python3 index_images_oracle.py` |
-| Vector bind | Python `list` passed directly | `array.array("f", embedding)` required |
-| Bind style | `%s` placeholders (psycopg2) | `:1`, `:2`, `:3` positional (oracledb) |
-| Runtime (116 photos, CPU) | ~26 seconds | ~16 seconds |
+| | `index_images.py` | `index_images_oracle.py` | `index_images_indb.py` |
+|---|---|---|---|
+| Schema | PostgreSQL `vectors_demo` | Oracle `VECTORS_USER` | Oracle `VECTOR` |
+| Run command | `python3 index_images.py` | `python3 index_images_oracle.py` | `python3 index_images_indb.py` |
+| Photo data sent | File path | File path | Full JPEG as BLOB |
+| Embedding | Python CLIP | Python CLIP | Oracle `VECTOR_EMBEDDING(CLIP_IMG)` |
+| Vector bind | Python `list` | `array.array("f", ...)` | Computed inside Oracle |
+| Avg runtime (3 runs, CPU) | **12.1 s** | **12.1 s** | **13.6 s** |

-**Why `array.array` for Oracle?**
+**Why `array.array` for `index_images_oracle.py`?**
 The `python-oracledb` driver does not accept a plain Python list for a `VECTOR`
 column. The data must be a Python `array.array` with typecode `"f"` (32-bit float),
 matching the `FLOAT32` declaration in the Oracle column type.

+**Why two SQL statements in `index_images_indb.py`?**
+Oracle raises `ORA-24816` if a BLOB bind variable appears before another bind in the
+same `VALUES` clause. The script works around this by inserting the BLOB first, then
+updating the vector in a second statement — letting Oracle read the stored BLOB to
+compute the embedding internally.
+
 ---

 ### FastAPI applications
@@ -343,7 +383,8 @@ Three single-file HTML frontends, each served by its own backend at `/ui/`:

 Features: search box, Enter-key support, suggestion chips (trees, water, people,
 buildings, sky, street, night, cars), result grid with thumbnails and similarity
-scores in percent.
+scores in percent. Click any photo to view it full size in a lightbox overlay;
+close with a click anywhere or `Escape`.

 ---

@@ -469,16 +510,22 @@ podman cp oravector-demo/sql/setup_vector_schema.sql oracle.free:/tmp/
 podman exec oracle.free bash -c "sqlplus -s / as sysdba @/tmp/setup_vector_schema.sql"
 ```

-**Populate `FOTO_VEKTOR`** with images and their vectors (run as VECTOR user in SQL):
-```sql
-- Example: insert one photo with its CLIP_IMG embedding
-INSERT INTO vector.foto_vektor (filename, foto, foto_vek)
-VALUES (
-    'photo.jpg',
-    TO_BLOB(BFILENAME('VEC_DUMP', 'photo.jpg')),
-    VECTOR_EMBEDDING(CLIP_IMG USING TO_BLOB(BFILENAME('VEC_DUMP', 'photo.jpg')) AS data)
-);
-COMMIT;
+**Add HNSW index** (after the table is created):
+```bash
+podman exec oracle.free bash -c "sqlplus -s 'vector/Vektor@localhost:1521/FREEPDB1' <<'EOF'
+CREATE VECTOR INDEX foto_vektor_idx
+    ON VECTOR.FOTO_VEKTOR(foto_vek)
+    ORGANIZATION INMEMORY NEIGHBOR GRAPH
+    WITH DISTANCE COSINE WITH TARGET ACCURACY 95
+    PARAMETERS (type HNSW, neighbors 32, efconstruction 200);
+EXIT;
+EOF"
+```
+
+**Populate `FOTO_VEKTOR`** using the indexing script (reads JPEGs from `PHOTOS_DIR`,
+sends them as BLOBs to Oracle, which computes embeddings via `VECTOR_EMBEDDING(CLIP_IMG)`):
+```bash
+cd oravector-demo/backend && python3 index_images_indb.py
 ```

 ---
@@ -518,11 +565,11 @@ cd oravector-demo/backend && uvicorn main_oracle_indb:app --host 0.0.0.0 --port
 # PostgreSQL
 cd pgvector-demo/backend && python3 index_images.py

-# Oracle (Python embedding)
+# Oracle VECTORS_USER (Python embedding)
 cd oravector-demo/backend && python3 index_images_oracle.py

-# Oracle in-database: re-indexing is done in SQL directly
-# (the VECTOR schema's FOTO_VEKTOR table is managed by Oracle)
+# Oracle VECTOR (in-database embedding)
+cd oravector-demo/backend && python3 index_images_indb.py
 ```

 ---
@@ -536,14 +583,15 @@ installation. The setup involved:
 1. Creating a `VECTOR` database user
 2. Exporting CLIP (ViT-B/32) to ONNX format and loading the models via
   `DBMS_VECTOR.LOAD_ONNX_MODEL`
-3. Creating and populating the `FOTO_VEKTOR` table with images and their vectors
+3. Creating the `FOTO_VEKTOR` table and HNSW index
+4. Populating `FOTO_VEKTOR` using `index_images_indb.py`

 The resulting models and table are:

 | Object | Type | Input | Output | Purpose |
 |---|---|---|---|---|
-| `VECTOR.CLIP_TXT` | ONNX model | `VARCHAR2` text | `VECTOR(512)` | Embed text queries |
-| `VECTOR.CLIP_IMG` | ONNX model | `BLOB` image | `VECTOR(512)` | Embed image data |
+| `VECTOR.CLIP_TXT` | ONNX model | `VARCHAR2` text | `VECTOR(512)` | Embed text queries at search time |
+| `VECTOR.CLIP_IMG` | ONNX model | `BLOB` image | `VECTOR(512)` | Embed images at index time |
 | `VECTOR.FOTO_VEKTOR` | Table | — | — | Stores filenames, image BLOBs, and vectors |

 These are called with the `VECTOR_EMBEDDING()` SQL function. The table
@@ -590,18 +638,20 @@ Measured on this installation (CPU only, no GPU):

 | Metric | PostgreSQL + pgvector | Oracle 26ai (Python embed) | Oracle 26ai (in-DB embed) |
 |---|---|---|---|
-| Photos indexed | 116 | 116 | 116 (manually indexed) |
-| Indexing time | ~26 seconds | ~16 seconds | 0 (indexed separately by admin) |
-| Index type | HNSW (on disk) | HNSW (in-memory) | Full table scan (116 rows) |
+| Photos indexed | 116 | 116 | 116 |
+| Avg indexing time (3 runs, CPU) | **12.1 s** | **12.1 s** | **13.6 s** |
+| Index type | HNSW (on disk) | HNSW (in-memory) | HNSW (in-memory) |
 | Memory required | None | 512 MB SGA | 512 MB SGA |
+| Photo storage | File path (filesystem) | File path (filesystem) | BLOB (in Oracle) |
 | Python CLIP at query time | Yes | Yes | **No** |
-| Embedding location | Python process | Python process | Inside Oracle SQL |
+| Embedding at index time | Python CLIP | Python CLIP | Oracle `VECTOR_EMBEDDING(CLIP_IMG)` |
+| Embedding at query time | Python CLIP | Python CLIP | Oracle `VECTOR_EMBEDDING(CLIP_TXT)` |
 | `VECTOR_EMBEDDING()` used | No | No | **Yes** |
+| Oracle schema | — | `VECTORS_USER` | `VECTOR` |

-Note: indexing time for backends 1 and 2 is dominated by CLIP inference (CPU),
-not database write speed. The in-database backend uses the manually loaded CLIP
-models in the `VECTOR` schema; their indexing time is not measured here as it
-was performed separately by the administrator.
+Note: indexing time is dominated by CLIP inference for backends 1 and 2 (CPU, no GPU).
+Backend 3 is slightly slower because each photo is transferred as a full JPEG BLOB
+to Oracle over the network before Oracle computes the embedding internally.

 ---

@@ -10,6 +10,12 @@ from pptx.enum.text import PP_ALIGN
 from pptx.oxml.ns import qn
 from pptx.oxml import parse_xml
 from lxml import etree
+import os
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches

 _A_NS = "http://schemas.openxmlformats.org/drawingml/2006/main"

@@ -17,6 +23,242 @@ def OxmlElement(tag):
    local = tag.split(":")[1]
    return etree.fromstring(f'<a:{local} xmlns:a="{_A_NS}"/>')

+
+# ── Diagram generation (matplotlib → PNG → embedded in slide) ────────────────
+
+DIAG_BG   = "#1e1e2e"
+DIAG_GRID = "#313244"
+DIAG_AXIS = "#6c7086"
+
+def _fig(w, h):
+    fig, ax = plt.subplots(figsize=(w, h))
+    fig.patch.set_facecolor(DIAG_BG)
+    ax.set_facecolor(DIAG_BG)
+    return fig, ax
+
+def _save(fig, name):
+    path = os.path.join("diagrams", name)
+    fig.savefig(path, dpi=150, bbox_inches="tight", facecolor=DIAG_BG)
+    plt.close(fig)
+    return path
+
+
+def diagram_s3_vectors():
+    """Slide 3: 2-D vector space with Hund / Katze / Auto."""
+    fig, ax = _fig(5, 5)
+    ax.set_xlim(-1.3, 1.3)
+    ax.set_ylim(-1.3, 1.3)
+    ax.set_aspect("equal")
+    ax.grid(True, color=DIAG_GRID, linewidth=0.5, alpha=0.6)
+    ax.axhline(0, color=DIAG_AXIS, linewidth=1)
+    ax.axvline(0, color=DIAG_AXIS, linewidth=1)
+    ax.set_xticks([]); ax.set_yticks([])
+    for sp in ax.spines.values(): sp.set_visible(False)
+    ax.text(1.27,  0.05, "x₁", color=DIAG_AXIS, fontsize=12)
+    ax.text( 0.05, 1.27, "x₂", color=DIAG_AXIS, fontsize=12)
+
+    vecs = [
+        ((0.91,  0.12), "#89b4fa", "Hund"),
+        ((0.87,  0.18), "#74c7ec", "Katze"),
+        ((-0.30,  0.90), "#f38ba8", "Auto"),
+    ]
+    for (vx, vy), color, label in vecs:
+        ax.annotate("", xy=(vx, vy), xytext=(0, 0),
+                    arrowprops=dict(arrowstyle="->", color=color, lw=2.5))
+        ox, oy = 0.10, 0.07
+        ax.text(vx + ox * np.sign(vx or 1),
+                vy + oy * np.sign(vy or 1),
+                label, color=color, fontsize=13, fontweight="bold")
+
+    # Small arc: Hund ↔ Katze
+    a1 = np.degrees(np.arctan2(0.12, 0.91))
+    a2 = np.degrees(np.arctan2(0.18, 0.87))
+    ax.add_patch(mpatches.Arc((0, 0), 0.32, 0.32, angle=0,
+                               theta1=min(a1, a2), theta2=max(a1, a2),
+                               color="#a6e3a1", lw=2))
+    ax.text(0.22, -0.10, "klein", color="#a6e3a1", fontsize=10, ha="center")
+
+    # Large arc: Hund ↔ Auto
+    a3 = np.degrees(np.arctan2(0.90, -0.30))
+    ax.add_patch(mpatches.Arc((0, 0), 0.52, 0.52, angle=0,
+                               theta1=a1, theta2=a3,
+                               color="#fab387", lw=2))
+    ax.text(-0.10, 0.34, "groß", color="#fab387", fontsize=10)
+
+    plt.tight_layout(pad=0.3)
+    return _save(fig, "s3_vectors.png")
+
+
+def diagram_s4_flow():
+    """Slide 4: Semantic search pipeline as a flow diagram."""
+    fig, ax = _fig(12, 1.9)        # flat figure — matches slide aspect ratio
+    ax.set_xlim(0, 12); ax.set_ylim(0, 1.9)
+    ax.axis("off")
+
+    steps = [
+        (1.2,  'Text-Anfrage\n"Bäume"',  "#89b4fa"),
+        (3.6,  "CLIP-Modell",            "#cba6f7"),
+        (6.0,  "Vektor  512 floats",     "#74c7ec"),
+        (8.4,  "Datenbank k-NN",         "#f38ba8"),
+        (10.8, "Ergebnisse\nnach Score", "#a6e3a1"),
+    ]
+    for x, label, color in steps:
+        box = mpatches.FancyBboxPatch((x - 1.05, 0.22), 2.1, 1.4,
+                                       boxstyle="round,pad=0.1",
+                                       facecolor="#313244", edgecolor=color, linewidth=2)
+        ax.add_patch(box)
+        ax.text(x, 0.92, label, ha="center", va="center",
+                color=color, fontsize=13, fontweight="bold", multialignment="center",
+                fontfamily="sans-serif")
+
+    for i in range(len(steps) - 1):
+        x1 = steps[i][0]   + 1.05
+        x2 = steps[i+1][0] - 1.05
+        ax.annotate("", xy=(x2, 0.92), xytext=(x1, 0.92),
+                    arrowprops=dict(arrowstyle="->", color=DIAG_AXIS, lw=2.5))
+
+    plt.tight_layout(pad=0.15)
+    return _save(fig, "s4_flow.png")
+
+
+def diagram_s6_cosine():
+    """Slide 6: Two vectors with the cosine angle between them."""
+    fig, ax = _fig(5, 4.5)
+    ax.set_xlim(-0.2, 1.35); ax.set_ylim(-0.15, 1.35)
+    ax.set_aspect("equal")
+    ax.axis("off")
+
+    vA = np.array([1.1,  0.25])   # image vector
+    vB = np.array([0.55, 1.0 ])   # text vector
+
+    for v, color, label, lpos in [
+        (vA, "#89b4fa", "Bild-Vektor",         (1.12,  0.18)),
+        (vB, "#cba6f7", 'Text-Vektor\n"Bäume"', (0.56,  1.07)),
+    ]:
+        ax.annotate("", xy=v, xytext=(0, 0),
+                    arrowprops=dict(arrowstyle="->", color=color, lw=3))
+        ax.text(*lpos, label, color=color, fontsize=12,
+                fontweight="bold", ha="center", multialignment="center")
+
+    # Angle arc
+    a1 = np.degrees(np.arctan2(vA[1], vA[0]))
+    a2 = np.degrees(np.arctan2(vB[1], vB[0]))
+    ax.add_patch(mpatches.Arc((0, 0), 0.45, 0.45, angle=0,
+                               theta1=a1, theta2=a2,
+                               color="#a6e3a1", lw=2.5))
+    mid_angle = np.radians((a1 + a2) / 2)
+    ax.text(0.28 * np.cos(mid_angle), 0.28 * np.sin(mid_angle),
+            "θ", color="#a6e3a1", fontsize=16, fontweight="bold",
+            ha="center", va="center")
+
+    # Origin dot
+    ax.plot(0, 0, "o", color=DIAG_AXIS, markersize=6)
+
+    # Formula
+    ax.text(0.58, -0.12,
+            "Ähnlichkeit = 1 − cos(θ)",
+            color="#cdd6f4", fontsize=11, ha="center",
+            fontfamily="monospace")
+
+    plt.tight_layout(pad=0.3)
+    return _save(fig, "s6_cosine.png")
+
+
+def diagram_architecture():
+    """Architecture slide: 3 columns showing app server, database, and where CLIP runs."""
+    CLIP_CLR = "#a6e3a1"
+    # (x, db_name, color, port, clip_app, clip_db, db_tech, vec_embed_fn)
+    COLS = [
+        (2.3,  "PostgreSQL 18",              "#89b4fa", "Port 8000", True,  False, "pgvector 0.8.2\nHNSW (Disk)",  None),
+        (6.65, "Oracle 26ai\nVECTORS_USER",  "#f38ba8", "Port 8001", True,  False, "HNSW (SGA)",                   None),
+        (11.0, "Oracle 26ai\nVECTOR",        "#cba6f7", "Port 8002", False, True,  "HNSW (SGA)",                   "VECTOR_EMBEDDING()"),
+    ]
+
+    fig, ax = _fig(13.5, 6.5)
+    ax.set_xlim(0, 13.5); ax.set_ylim(-0.8, 6.0)
+    ax.axis("off")
+
+    for x, db_name, color, port, clip_app, clip_db, db_tech, vec_fn in COLS:
+        # ── Column title + port
+        ax.text(x, 5.78, port, ha="center", color=color, fontsize=13, fontweight="bold")
+
+        # ── App server box
+        ax.add_patch(mpatches.FancyBboxPatch(
+            (x-1.7, 3.7), 3.4, 1.85,
+            boxstyle="round,pad=0.1", facecolor="#28293d", edgecolor=color, lw=2))
+        ax.text(x, 5.38, "App-Server  (FastAPI)", ha="center",
+                color=color, fontsize=11, fontweight="bold")
+
+        if clip_app:
+            ax.add_patch(mpatches.FancyBboxPatch(
+                (x-1.2, 3.78), 2.4, 0.82,
+                boxstyle="round,pad=0.08", facecolor="#1e1e2e", edgecolor=CLIP_CLR, lw=2))
+            ax.text(x, 4.19, "CLIP-Modell\n(sentence-transformers)",
+                    ha="center", va="center", color=CLIP_CLR, fontsize=9.5, fontweight="bold",
+                    multialignment="center")
+        else:
+            ax.add_patch(mpatches.FancyBboxPatch(
+                (x-1.2, 3.78), 2.4, 0.82,
+                boxstyle="round,pad=0.08", facecolor="#1e1e2e", edgecolor=DIAG_AXIS, lw=1,
+                linestyle="dashed"))
+            ax.text(x, 4.19, "kein CLIP",
+                    ha="center", va="center", color=DIAG_AXIS, fontsize=10, style="italic")
+
+        # ── Arrow + what is sent
+        ax.annotate("", xy=(x, 3.05), xytext=(x, 3.65),
+                    arrowprops=dict(arrowstyle="->", color=DIAG_AXIS, lw=2))
+        arrow_lbl = "Vektor (512 floats)" if clip_app else "Text-String"
+        ax.text(x, 3.35, arrow_lbl, ha="center", va="center",
+                color=DIAG_AXIS, fontsize=9, style="italic")
+
+        # ── Database box
+        db_h = 2.8 if clip_db else 1.9
+        ax.add_patch(mpatches.FancyBboxPatch(
+            (x-1.7, 0.15), 3.4, db_h,
+            boxstyle="round,pad=0.1", facecolor="#28293d", edgecolor=color, lw=2))
+
+        if clip_db:
+            # CLIP ONNX box inside DB
+            ax.add_patch(mpatches.FancyBboxPatch(
+                (x-1.2, 0.25), 2.4, 0.82,
+                boxstyle="round,pad=0.08", facecolor="#1e1e2e", edgecolor=CLIP_CLR, lw=2))
+            ax.text(x, 0.66, "CLIP-Modell\n(ONNX, in Oracle)",
+                    ha="center", va="center", color=CLIP_CLR, fontsize=9.5, fontweight="bold",
+                    multialignment="center")
+            # VECTOR_EMBEDDING() label
+            ax.text(x, 1.22, vec_fn,
+                    ha="center", color="#fab387", fontsize=10, fontweight="bold",
+                    fontfamily="monospace")
+            # DB name
+            ax.text(x, 1.65, db_name, ha="center", color=color,
+                    fontsize=11, fontweight="bold")
+            ax.text(x, 2.35, db_tech, ha="center", color=DIAG_AXIS,
+                    fontsize=9, multialignment="center")
+        else:
+            ax.text(x, 1.5, db_name, ha="center", color=color,
+                    fontsize=11, fontweight="bold")
+            ax.text(x, 0.72, db_tech, ha="center", color=DIAG_AXIS,
+                    fontsize=9, multialignment="center")
+
+    # ── Vertical separators
+    for xsep in [4.5, 8.85]:
+        ax.plot([xsep, xsep], [0.05, 5.9], color=DIAG_GRID, lw=1, linestyle="--")
+
+    # ── Caption — separated from boxes, applies to all three columns
+    ax.plot([0.3, 13.2], [-0.18, -0.18], color=DIAG_GRID, lw=1)
+    ax.text(6.75, -0.5, "116 Street Fotos  ·  CLIP ViT-B/32  ·  512-dimensionale Vektoren",
+            ha="center", va="center", color="#cdd6f4", fontsize=13, style="italic")
+
+    plt.tight_layout(pad=0.2)
+    return _save(fig, "architecture.png")
+
+# Generate diagrams up front
+os.makedirs("diagrams", exist_ok=True)
+DIAG_S3   = diagram_s3_vectors()
+DIAG_S4   = diagram_s4_flow()
+DIAG_S6   = diagram_s6_cosine()
+DIAG_ARCH = diagram_architecture()
+
 import copy

 # ── Colour palette (dark theme) ──────────────────────────────────────────────
@@ -235,13 +477,13 @@ bullet_box(s, [
    "▸  Moderne KI-Modelle erzeugen Vektoren mit 512 bis 1536 Dimensionen",
    "▸  Ähnliche Inhalte → ähnliche Vektoren → kleiner Abstand im Raum",
    "▸  Texte, Bilder, Audio — alles lässt sich in denselben Vektorraum einbetten",
-], Inches(0.8), Inches(1.3), Inches(7.5), Inches(4), size=20)
+], Inches(0.8), Inches(1.3), Inches(7.2), Inches(4), size=20)

-code_box(s, '# 4-dimensionaler Beispielvektor\nvec_hund  = [0.91,  0.12, -0.44,  0.72]\nvec_katze = [0.87,  0.18, -0.39,  0.68]\n# ähnlich! Abstand ≈ 0.04\nvec_auto  = [-0.3, -0.82,  0.91, -0.11]\n# weit entfernt',
-    Inches(8.8), Inches(1.5), Inches(4.3), Inches(2.6), size=12)
+# 2-D vector diagram on the right
+s.shapes.add_picture(DIAG_S3, Inches(7.8), Inches(1.1), Inches(5.3), Inches(5.3))

 txb(s, "Vektoren machen Ähnlichkeit berechenbar.",
-    Inches(0.8), Inches(5.8), Inches(11), Inches(0.7),
+    Inches(0.8), Inches(5.8), Inches(6.8), Inches(0.7),
    size=22, bold=True, color=ACCENT_GRN)

 # ════════════════════════════════════════════════════════════════════════════
@@ -263,7 +505,10 @@ bullet_box(s, [
    "▸  Datenbankabfrage: finde die k nächsten Nachbarn (k-NN)",
    "▸  Ergebnis: Bilder nach semantischer Ähnlichkeit gerankt",
    "▸  Kein manuelles Tagging, keine Metadaten nötig",
-], Inches(0.8), Inches(3.9), Inches(11.5), Inches(2.8), size=20)
+], Inches(0.8), Inches(3.9), Inches(11.5), Inches(1.1), size=20)
+
+# Flow diagram
+s.shapes.add_picture(DIAG_S4, Inches(0.5), Inches(5.1), Inches(12.3), Inches(1.75))

 # ════════════════════════════════════════════════════════════════════════════
 # Slide 5 — CLIP-Modell
@@ -297,14 +542,17 @@ bullet_box(s, [
    "▸  Cosinus-Distanz = 0   →  identisch",
    "▸  Cosinus-Distanz = 1   →  völlig unähnlich",
    "▸  Ähnlichkeitswert = 1 − Distanz  →  1.0 = perfekte Übereinstimmung",
-], Inches(0.8), Inches(1.3), Inches(8.5), Inches(3.5), size=20)
+], Inches(0.8), Inches(1.3), Inches(7.5), Inches(3.5), size=20)
+
+# Cosine diagram on the right
+s.shapes.add_picture(DIAG_S6, Inches(8.0), Inches(1.1), Inches(5.1), Inches(3.7))

 code_box(s,
    "-- PostgreSQL\n1 - (embedding <=> query_vec)\n\n-- Oracle 26ai\n1 - VECTOR_DISTANCE(embedding, query_vec, COSINE)",
-    Inches(0.8), Inches(5.0), Inches(6.0), Inches(1.9), size=13)
+    Inches(0.8), Inches(5.0), Inches(6.0), Inches(1.85), size=13)

-txb(s, "In der Demo:\nScore 28 % = schwache Übereinstimmung\nScore 75 % = starke Übereinstimmung",
-    Inches(7.5), Inches(5.0), Inches(5.0), Inches(2.0),
+txb(s, "In der Demo:\nScore 28 % = schwach\nScore 75 % = stark",
+    Inches(7.0), Inches(5.0), Inches(5.0), Inches(1.85),
    size=18, color=ACCENT_GRN)

 # ════════════════════════════════════════════════════════════════════════════
@@ -442,10 +690,13 @@ section_header(s, "Oracle 26ai — Embedding in der Datenbank", ACCENT_IDB)

 bullet_box(s, [
    "▸  Oracle kann ONNX-Modelle direkt in die Datenbank laden",
+    "     (ONNX = Open Neural Network Exchange)",
    "▸  VECTOR_EMBEDDING() ruft das Modell innerhalb einer SQL-Abfrage auf",
    "▸  Kein Python, keine KI-Bibliothek auf dem Anwendungsserver zur Laufzeit",
    "▸  Der Text-String ist der einzige Parameter aus Python",
-], Inches(0.8), Inches(1.3), Inches(11.5), Inches(2.2), size=20)
+    "▸  Schema: VECTOR  —  Tabelle: FOTO_VEKTOR  —  Bilder als BLOB gespeichert",
+    "▸  HNSW-Index auf FOTO_VEKTOR (wie in Schema VECTORS_USER)",
+], Inches(0.8), Inches(1.3), Inches(11.5), Inches(3.0), size=19)

 code_box(s,
    "-- Gesamte Logik in einem SQL-Statement\nSELECT filename,\n       1 - VECTOR_DISTANCE(\n               foto_vek,\n               VECTOR_EMBEDDING(CLIP_TXT USING :q AS data),\n               COSINE\n           ) AS score\nFROM   VECTOR.FOTO_VEKTOR\nORDER  BY VECTOR_DISTANCE(\n             foto_vek,\n             VECTOR_EMBEDDING(CLIP_TXT USING :q AS data), COSINE)\nFETCH  FIRST 12 ROWS ONLY;",
@@ -485,45 +736,13 @@ code_box(s,
    Inches(0.8), Inches(5.2), Inches(11.5), Inches(1.6), size=13)

 # ════════════════════════════════════════════════════════════════════════════
-# Slide 13 — Architektur der Demo
+# Slide 13 — Architektur: Wo wird CLIP berechnet?
 # ════════════════════════════════════════════════════════════════════════════
 s = add_slide()
 section_header(s, "Architektur der Demo", ACCENT_GRN)
+s.shapes.add_picture(DIAG_ARCH, Inches(0.3), Inches(1.1), Inches(12.73), Inches(5.7))

-# Three columns
-for i, (label, port, color) in enumerate([
-    ("pgvector", "Port 8000", ACCENT_PG),
-    ("Oracle 26ai\n(Python)", "Port 8001", ACCENT_ORA),
-    ("Oracle 26ai\n(In-DB)", "Port 8002", ACCENT_IDB),
-]):
-    x = Inches(0.5 + i * 4.27)
-    # Box
-    box = s.shapes.add_shape(1, x, Inches(1.3), Inches(3.8), Inches(4.8))
-    box.fill.solid()
-    box.fill.fore_color.rgb = RGBColor(0x28, 0x29, 0x3d)
-    box.line.color.rgb = color
-
-    txb(s, label, x + Inches(0.1), Inches(1.4), Inches(3.6), Inches(0.8),
-        size=22, bold=True, color=color, align=PP_ALIGN.CENTER)
-    txb(s, port,  x + Inches(0.1), Inches(2.1), Inches(3.6), Inches(0.4),
-        size=16, color=DIM_CLR, align=PP_ALIGN.CENTER)
-
-    items = {
-        "pgvector":         ["Browser /ui/", "FastAPI", "CLIP (Python)", "PostgreSQL 18", "pgvector 0.8.2"],
-        "Oracle 26ai\n(Python)": ["Browser /ui/", "FastAPI", "CLIP (Python)", "Oracle 26ai", "HNSW (SGA)"],
-        "Oracle 26ai\n(In-DB)":  ["Browser /ui/", "FastAPI", "(kein CLIP)", "Oracle 26ai", "VECTOR_EMBEDDING()"],
-    }[label]
-
-    for j, item in enumerate(items):
-        txb(s, "▸ " + item, x + Inches(0.2), Inches(2.65 + j * 0.52), Inches(3.5), Inches(0.48),
-            size=16, color=BODY_CLR)
-
-txb(s, "116 Street Fotos  ·  CLIP ViT-B/32  ·  512-dimensionale Vektoren",
-    Inches(0.5), Inches(6.6), Inches(12.33), Inches(0.3),
-    size=16, color=DIM_CLR, align=PP_ALIGN.CENTER)
-
-# ════════════════════════════════════════════════════════════════════════════
-# Slide 14 — Demo-Hinweis
+# Slide 15 — Demo-Hinweis
 # ════════════════════════════════════════════════════════════════════════════
 s = add_slide()
 section_header(s, "Demo", ACCENT_GRN)
@@ -548,10 +767,10 @@ s = add_slide()
 section_header(s, "Vergleich", ACCENT_PG)

 rows = [
-    ("Merkmal",                "PostgreSQL + pgvector",       "Oracle 26ai (Python)",       "Oracle 26ai (In-DB)"),
+    ("Merkmal",                "PostgreSQL + pgvector",       "Oracle · VECTORS_USER",      "Oracle · VECTOR"),
    ("Fotos indiziert",        "116",                          "116",                         "116"),
-    ("Indizierungszeit",       "~26 Sek. (CPU)",               "~16 Sek. (CPU)",              "—  (separat)"),
-    ("Index-Typ",              "HNSW (auf Disk)",              "HNSW (im Speicher)",          "Full Table Scan"),
+    ("Indizierungszeit",       "Ø 12,1 Sek.  (3 Läufe)",      "Ø 12,1 Sek.  (3 Läufe)",     "Ø 13,6 Sek.  (3 Läufe)"),
+    ("Index-Typ",              "HNSW (auf Disk)",              "HNSW (im Speicher)",          "HNSW (im Speicher)"),
    ("RAM-Bedarf",             "Keiner",                       "512 MB SGA",                  "512 MB SGA"),
    ("CLIP zur Laufzeit",      "Ja (Python)",                  "Ja (Python)",                 "Nein"),
    ("Embedding-Ort",          "Python-Prozess",               "Python-Prozess",              "In der Datenbank"),
@@ -0,0 +1,49 @@
+import os
+import time
+from dotenv import load_dotenv
+from db_oracle import get_connection_indb
+
+load_dotenv()
+
+PHOTOS_DIR = os.getenv("PHOTOS_DIR")
+
+def main():
+    conn = get_connection_indb()
+    cur = conn.cursor()
+
+    cur.execute("SELECT COUNT(*) FROM VECTOR.FOTO_VEKTOR")
+    print(f"Rows before: {cur.fetchone()[0]}")
+
+    files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))]
+    print(f"Found {len(files)} photos in {PHOTOS_DIR}")
+
+    start = time.time()
+    for i, filename in enumerate(files, 1):
+        filepath = os.path.join(PHOTOS_DIR, filename)
+        cur.execute("SELECT 1 FROM VECTOR.FOTO_VEKTOR WHERE filename = :1", (filename,))
+        if cur.fetchone():
+            print(f"[{i}/{len(files)}] Skipping {filename} (already indexed)")
+            continue
+        with open(filepath, "rb") as f:
+            blob_data = f.read()
+        # ORA-24816: Oracle cannot bind the same BLOB as both column value and
+        # VECTOR_EMBEDDING() input in one statement. Insert the BLOB first, then
+        # let Oracle compute the embedding from the stored data in a second step.
+        cur.execute(
+            "INSERT INTO VECTOR.FOTO_VEKTOR (filename, foto) VALUES (:1, :2)",
+            (filename, blob_data),
+        )
+        cur.execute(
+            """UPDATE VECTOR.FOTO_VEKTOR
+               SET foto_vek = VECTOR_EMBEDDING(CLIP_IMG USING foto AS data)
+               WHERE filename = :1""",
+            (filename,),
+        )
+        conn.commit()
+        print(f"[{i}/{len(files)}] Indexed {filename}")
+
+    elapsed = time.time() - start
+    print(f"Done in {elapsed:.1f} seconds.")
+
+if __name__ == "__main__":
+    main()
@@ -1,5 +1,6 @@
 import os
 import array
+import time
 from dotenv import load_dotenv
 from db_oracle import get_connection
 from embedder import embed_image
@@ -47,6 +48,7 @@ def main():
    files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))]
    print(f"Found {len(files)} photos in {PHOTOS_DIR}")

+    start = time.time()
    for i, filename in enumerate(files, 1):
        filepath = os.path.join(PHOTOS_DIR, filename)
        cur.execute("SELECT 1 FROM images WHERE filename = :1", (filename,))
@@ -61,7 +63,7 @@ def main():

    cur.close()
    conn.close()
-    print("Done.")
+    print(f"Done in {time.time() - start:.1f} seconds.")

 if __name__ == "__main__":
    main()
@@ -1,4 +1,5 @@
 import os
+import time
 from dotenv import load_dotenv
 from db import get_connection
 from embedder import embed_image
@@ -37,6 +38,7 @@ def main():
    files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))]
    print(f"Found {len(files)} photos in {PHOTOS_DIR}")

+    start = time.time()
    for i, filename in enumerate(files, 1):
        filepath = os.path.join(PHOTOS_DIR, filename)
        cur.execute("SELECT 1 FROM images WHERE filename = %s", (filename,))
@@ -50,7 +52,7 @@ def main():

    cur.close()
    conn.close()
-    print("Done.")
+    print(f"Done in {time.time() - start:.1f} seconds.")

 if __name__ == "__main__":
    main()