Add in-DB indexing script, benchmark results, schema names in presentation
- index_images_indb.py: new script indexing via VECTOR_EMBEDDING(CLIP_IMG) using a two-step INSERT+UPDATE to work around ORA-24816 - index_images_oracle.py / index_images.py: add timing output - Presentation: schema names VECTORS_USER/VECTOR in diagram and comparison, ONNX expansion, HNSW index note on slide 11, indexing times updated from 3-run benchmark (avg: PG 12.1s, Ora 12.1s, InDB 13.6s) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,3 +3,5 @@ __pycache__/
|
|||||||
photos/
|
photos/
|
||||||
.~lock.*
|
.~lock.*
|
||||||
present.sh
|
present.sh
|
||||||
|
benchmark.sh
|
||||||
|
diagrams/
|
||||||
|
|||||||
+267
-48
@@ -10,6 +10,12 @@ from pptx.enum.text import PP_ALIGN
|
|||||||
from pptx.oxml.ns import qn
|
from pptx.oxml.ns import qn
|
||||||
from pptx.oxml import parse_xml
|
from pptx.oxml import parse_xml
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib
|
||||||
|
matplotlib.use("Agg")
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import matplotlib.patches as mpatches
|
||||||
|
|
||||||
_A_NS = "http://schemas.openxmlformats.org/drawingml/2006/main"
|
_A_NS = "http://schemas.openxmlformats.org/drawingml/2006/main"
|
||||||
|
|
||||||
@@ -17,6 +23,242 @@ def OxmlElement(tag):
|
|||||||
local = tag.split(":")[1]
|
local = tag.split(":")[1]
|
||||||
return etree.fromstring(f'<a:{local} xmlns:a="{_A_NS}"/>')
|
return etree.fromstring(f'<a:{local} xmlns:a="{_A_NS}"/>')
|
||||||
|
|
||||||
|
|
||||||
|
# ── Diagram generation (matplotlib → PNG → embedded in slide) ────────────────
|
||||||
|
|
||||||
|
DIAG_BG = "#1e1e2e"
|
||||||
|
DIAG_GRID = "#313244"
|
||||||
|
DIAG_AXIS = "#6c7086"
|
||||||
|
|
||||||
|
def _fig(w, h):
|
||||||
|
fig, ax = plt.subplots(figsize=(w, h))
|
||||||
|
fig.patch.set_facecolor(DIAG_BG)
|
||||||
|
ax.set_facecolor(DIAG_BG)
|
||||||
|
return fig, ax
|
||||||
|
|
||||||
|
def _save(fig, name):
|
||||||
|
path = os.path.join("diagrams", name)
|
||||||
|
fig.savefig(path, dpi=150, bbox_inches="tight", facecolor=DIAG_BG)
|
||||||
|
plt.close(fig)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def diagram_s3_vectors():
|
||||||
|
"""Slide 3: 2-D vector space with Hund / Katze / Auto."""
|
||||||
|
fig, ax = _fig(5, 5)
|
||||||
|
ax.set_xlim(-1.3, 1.3)
|
||||||
|
ax.set_ylim(-1.3, 1.3)
|
||||||
|
ax.set_aspect("equal")
|
||||||
|
ax.grid(True, color=DIAG_GRID, linewidth=0.5, alpha=0.6)
|
||||||
|
ax.axhline(0, color=DIAG_AXIS, linewidth=1)
|
||||||
|
ax.axvline(0, color=DIAG_AXIS, linewidth=1)
|
||||||
|
ax.set_xticks([]); ax.set_yticks([])
|
||||||
|
for sp in ax.spines.values(): sp.set_visible(False)
|
||||||
|
ax.text(1.27, 0.05, "x₁", color=DIAG_AXIS, fontsize=12)
|
||||||
|
ax.text( 0.05, 1.27, "x₂", color=DIAG_AXIS, fontsize=12)
|
||||||
|
|
||||||
|
vecs = [
|
||||||
|
((0.91, 0.12), "#89b4fa", "Hund"),
|
||||||
|
((0.87, 0.18), "#74c7ec", "Katze"),
|
||||||
|
((-0.30, 0.90), "#f38ba8", "Auto"),
|
||||||
|
]
|
||||||
|
for (vx, vy), color, label in vecs:
|
||||||
|
ax.annotate("", xy=(vx, vy), xytext=(0, 0),
|
||||||
|
arrowprops=dict(arrowstyle="->", color=color, lw=2.5))
|
||||||
|
ox, oy = 0.10, 0.07
|
||||||
|
ax.text(vx + ox * np.sign(vx or 1),
|
||||||
|
vy + oy * np.sign(vy or 1),
|
||||||
|
label, color=color, fontsize=13, fontweight="bold")
|
||||||
|
|
||||||
|
# Small arc: Hund ↔ Katze
|
||||||
|
a1 = np.degrees(np.arctan2(0.12, 0.91))
|
||||||
|
a2 = np.degrees(np.arctan2(0.18, 0.87))
|
||||||
|
ax.add_patch(mpatches.Arc((0, 0), 0.32, 0.32, angle=0,
|
||||||
|
theta1=min(a1, a2), theta2=max(a1, a2),
|
||||||
|
color="#a6e3a1", lw=2))
|
||||||
|
ax.text(0.22, -0.10, "klein", color="#a6e3a1", fontsize=10, ha="center")
|
||||||
|
|
||||||
|
# Large arc: Hund ↔ Auto
|
||||||
|
a3 = np.degrees(np.arctan2(0.90, -0.30))
|
||||||
|
ax.add_patch(mpatches.Arc((0, 0), 0.52, 0.52, angle=0,
|
||||||
|
theta1=a1, theta2=a3,
|
||||||
|
color="#fab387", lw=2))
|
||||||
|
ax.text(-0.10, 0.34, "groß", color="#fab387", fontsize=10)
|
||||||
|
|
||||||
|
plt.tight_layout(pad=0.3)
|
||||||
|
return _save(fig, "s3_vectors.png")
|
||||||
|
|
||||||
|
|
||||||
|
def diagram_s4_flow():
|
||||||
|
"""Slide 4: Semantic search pipeline as a flow diagram."""
|
||||||
|
fig, ax = _fig(12, 1.9) # flat figure — matches slide aspect ratio
|
||||||
|
ax.set_xlim(0, 12); ax.set_ylim(0, 1.9)
|
||||||
|
ax.axis("off")
|
||||||
|
|
||||||
|
steps = [
|
||||||
|
(1.2, 'Text-Anfrage\n"Bäume"', "#89b4fa"),
|
||||||
|
(3.6, "CLIP-Modell", "#cba6f7"),
|
||||||
|
(6.0, "Vektor 512 floats", "#74c7ec"),
|
||||||
|
(8.4, "Datenbank k-NN", "#f38ba8"),
|
||||||
|
(10.8, "Ergebnisse\nnach Score", "#a6e3a1"),
|
||||||
|
]
|
||||||
|
for x, label, color in steps:
|
||||||
|
box = mpatches.FancyBboxPatch((x - 1.05, 0.22), 2.1, 1.4,
|
||||||
|
boxstyle="round,pad=0.1",
|
||||||
|
facecolor="#313244", edgecolor=color, linewidth=2)
|
||||||
|
ax.add_patch(box)
|
||||||
|
ax.text(x, 0.92, label, ha="center", va="center",
|
||||||
|
color=color, fontsize=13, fontweight="bold", multialignment="center",
|
||||||
|
fontfamily="sans-serif")
|
||||||
|
|
||||||
|
for i in range(len(steps) - 1):
|
||||||
|
x1 = steps[i][0] + 1.05
|
||||||
|
x2 = steps[i+1][0] - 1.05
|
||||||
|
ax.annotate("", xy=(x2, 0.92), xytext=(x1, 0.92),
|
||||||
|
arrowprops=dict(arrowstyle="->", color=DIAG_AXIS, lw=2.5))
|
||||||
|
|
||||||
|
plt.tight_layout(pad=0.15)
|
||||||
|
return _save(fig, "s4_flow.png")
|
||||||
|
|
||||||
|
|
||||||
|
def diagram_s6_cosine():
|
||||||
|
"""Slide 6: Two vectors with the cosine angle between them."""
|
||||||
|
fig, ax = _fig(5, 4.5)
|
||||||
|
ax.set_xlim(-0.2, 1.35); ax.set_ylim(-0.15, 1.35)
|
||||||
|
ax.set_aspect("equal")
|
||||||
|
ax.axis("off")
|
||||||
|
|
||||||
|
vA = np.array([1.1, 0.25]) # image vector
|
||||||
|
vB = np.array([0.55, 1.0 ]) # text vector
|
||||||
|
|
||||||
|
for v, color, label, lpos in [
|
||||||
|
(vA, "#89b4fa", "Bild-Vektor", (1.12, 0.18)),
|
||||||
|
(vB, "#cba6f7", 'Text-Vektor\n"Bäume"', (0.56, 1.07)),
|
||||||
|
]:
|
||||||
|
ax.annotate("", xy=v, xytext=(0, 0),
|
||||||
|
arrowprops=dict(arrowstyle="->", color=color, lw=3))
|
||||||
|
ax.text(*lpos, label, color=color, fontsize=12,
|
||||||
|
fontweight="bold", ha="center", multialignment="center")
|
||||||
|
|
||||||
|
# Angle arc
|
||||||
|
a1 = np.degrees(np.arctan2(vA[1], vA[0]))
|
||||||
|
a2 = np.degrees(np.arctan2(vB[1], vB[0]))
|
||||||
|
ax.add_patch(mpatches.Arc((0, 0), 0.45, 0.45, angle=0,
|
||||||
|
theta1=a1, theta2=a2,
|
||||||
|
color="#a6e3a1", lw=2.5))
|
||||||
|
mid_angle = np.radians((a1 + a2) / 2)
|
||||||
|
ax.text(0.28 * np.cos(mid_angle), 0.28 * np.sin(mid_angle),
|
||||||
|
"θ", color="#a6e3a1", fontsize=16, fontweight="bold",
|
||||||
|
ha="center", va="center")
|
||||||
|
|
||||||
|
# Origin dot
|
||||||
|
ax.plot(0, 0, "o", color=DIAG_AXIS, markersize=6)
|
||||||
|
|
||||||
|
# Formula
|
||||||
|
ax.text(0.58, -0.12,
|
||||||
|
"Ähnlichkeit = 1 − cos(θ)",
|
||||||
|
color="#cdd6f4", fontsize=11, ha="center",
|
||||||
|
fontfamily="monospace")
|
||||||
|
|
||||||
|
plt.tight_layout(pad=0.3)
|
||||||
|
return _save(fig, "s6_cosine.png")
|
||||||
|
|
||||||
|
|
||||||
|
def diagram_architecture():
|
||||||
|
"""Architecture slide: 3 columns showing app server, database, and where CLIP runs."""
|
||||||
|
CLIP_CLR = "#a6e3a1"
|
||||||
|
# (x, db_name, color, port, clip_app, clip_db, db_tech, vec_embed_fn)
|
||||||
|
COLS = [
|
||||||
|
(2.3, "PostgreSQL 18", "#89b4fa", "Port 8000", True, False, "pgvector 0.8.2\nHNSW (Disk)", None),
|
||||||
|
(6.65, "Oracle 26ai\nVECTORS_USER", "#f38ba8", "Port 8001", True, False, "HNSW (SGA)", None),
|
||||||
|
(11.0, "Oracle 26ai\nVECTOR", "#cba6f7", "Port 8002", False, True, "HNSW (SGA)", "VECTOR_EMBEDDING()"),
|
||||||
|
]
|
||||||
|
|
||||||
|
fig, ax = _fig(13.5, 6.5)
|
||||||
|
ax.set_xlim(0, 13.5); ax.set_ylim(-0.8, 6.0)
|
||||||
|
ax.axis("off")
|
||||||
|
|
||||||
|
for x, db_name, color, port, clip_app, clip_db, db_tech, vec_fn in COLS:
|
||||||
|
# ── Column title + port
|
||||||
|
ax.text(x, 5.78, port, ha="center", color=color, fontsize=13, fontweight="bold")
|
||||||
|
|
||||||
|
# ── App server box
|
||||||
|
ax.add_patch(mpatches.FancyBboxPatch(
|
||||||
|
(x-1.7, 3.7), 3.4, 1.85,
|
||||||
|
boxstyle="round,pad=0.1", facecolor="#28293d", edgecolor=color, lw=2))
|
||||||
|
ax.text(x, 5.38, "App-Server (FastAPI)", ha="center",
|
||||||
|
color=color, fontsize=11, fontweight="bold")
|
||||||
|
|
||||||
|
if clip_app:
|
||||||
|
ax.add_patch(mpatches.FancyBboxPatch(
|
||||||
|
(x-1.2, 3.78), 2.4, 0.82,
|
||||||
|
boxstyle="round,pad=0.08", facecolor="#1e1e2e", edgecolor=CLIP_CLR, lw=2))
|
||||||
|
ax.text(x, 4.19, "CLIP-Modell\n(sentence-transformers)",
|
||||||
|
ha="center", va="center", color=CLIP_CLR, fontsize=9.5, fontweight="bold",
|
||||||
|
multialignment="center")
|
||||||
|
else:
|
||||||
|
ax.add_patch(mpatches.FancyBboxPatch(
|
||||||
|
(x-1.2, 3.78), 2.4, 0.82,
|
||||||
|
boxstyle="round,pad=0.08", facecolor="#1e1e2e", edgecolor=DIAG_AXIS, lw=1,
|
||||||
|
linestyle="dashed"))
|
||||||
|
ax.text(x, 4.19, "kein CLIP",
|
||||||
|
ha="center", va="center", color=DIAG_AXIS, fontsize=10, style="italic")
|
||||||
|
|
||||||
|
# ── Arrow + what is sent
|
||||||
|
ax.annotate("", xy=(x, 3.05), xytext=(x, 3.65),
|
||||||
|
arrowprops=dict(arrowstyle="->", color=DIAG_AXIS, lw=2))
|
||||||
|
arrow_lbl = "Vektor (512 floats)" if clip_app else "Text-String"
|
||||||
|
ax.text(x, 3.35, arrow_lbl, ha="center", va="center",
|
||||||
|
color=DIAG_AXIS, fontsize=9, style="italic")
|
||||||
|
|
||||||
|
# ── Database box
|
||||||
|
db_h = 2.8 if clip_db else 1.9
|
||||||
|
ax.add_patch(mpatches.FancyBboxPatch(
|
||||||
|
(x-1.7, 0.15), 3.4, db_h,
|
||||||
|
boxstyle="round,pad=0.1", facecolor="#28293d", edgecolor=color, lw=2))
|
||||||
|
|
||||||
|
if clip_db:
|
||||||
|
# CLIP ONNX box inside DB
|
||||||
|
ax.add_patch(mpatches.FancyBboxPatch(
|
||||||
|
(x-1.2, 0.25), 2.4, 0.82,
|
||||||
|
boxstyle="round,pad=0.08", facecolor="#1e1e2e", edgecolor=CLIP_CLR, lw=2))
|
||||||
|
ax.text(x, 0.66, "CLIP-Modell\n(ONNX, in Oracle)",
|
||||||
|
ha="center", va="center", color=CLIP_CLR, fontsize=9.5, fontweight="bold",
|
||||||
|
multialignment="center")
|
||||||
|
# VECTOR_EMBEDDING() label
|
||||||
|
ax.text(x, 1.22, vec_fn,
|
||||||
|
ha="center", color="#fab387", fontsize=10, fontweight="bold",
|
||||||
|
fontfamily="monospace")
|
||||||
|
# DB name
|
||||||
|
ax.text(x, 1.65, db_name, ha="center", color=color,
|
||||||
|
fontsize=11, fontweight="bold")
|
||||||
|
ax.text(x, 2.35, db_tech, ha="center", color=DIAG_AXIS,
|
||||||
|
fontsize=9, multialignment="center")
|
||||||
|
else:
|
||||||
|
ax.text(x, 1.5, db_name, ha="center", color=color,
|
||||||
|
fontsize=11, fontweight="bold")
|
||||||
|
ax.text(x, 0.72, db_tech, ha="center", color=DIAG_AXIS,
|
||||||
|
fontsize=9, multialignment="center")
|
||||||
|
|
||||||
|
# ── Vertical separators
|
||||||
|
for xsep in [4.5, 8.85]:
|
||||||
|
ax.plot([xsep, xsep], [0.05, 5.9], color=DIAG_GRID, lw=1, linestyle="--")
|
||||||
|
|
||||||
|
# ── Caption — separated from boxes, applies to all three columns
|
||||||
|
ax.plot([0.3, 13.2], [-0.18, -0.18], color=DIAG_GRID, lw=1)
|
||||||
|
ax.text(6.75, -0.5, "116 Street Fotos · CLIP ViT-B/32 · 512-dimensionale Vektoren",
|
||||||
|
ha="center", va="center", color="#cdd6f4", fontsize=13, style="italic")
|
||||||
|
|
||||||
|
plt.tight_layout(pad=0.2)
|
||||||
|
return _save(fig, "architecture.png")
|
||||||
|
|
||||||
|
# Generate diagrams up front
|
||||||
|
os.makedirs("diagrams", exist_ok=True)
|
||||||
|
DIAG_S3 = diagram_s3_vectors()
|
||||||
|
DIAG_S4 = diagram_s4_flow()
|
||||||
|
DIAG_S6 = diagram_s6_cosine()
|
||||||
|
DIAG_ARCH = diagram_architecture()
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
|
|
||||||
# ── Colour palette (dark theme) ──────────────────────────────────────────────
|
# ── Colour palette (dark theme) ──────────────────────────────────────────────
|
||||||
@@ -235,13 +477,13 @@ bullet_box(s, [
|
|||||||
"▸ Moderne KI-Modelle erzeugen Vektoren mit 512 bis 1536 Dimensionen",
|
"▸ Moderne KI-Modelle erzeugen Vektoren mit 512 bis 1536 Dimensionen",
|
||||||
"▸ Ähnliche Inhalte → ähnliche Vektoren → kleiner Abstand im Raum",
|
"▸ Ähnliche Inhalte → ähnliche Vektoren → kleiner Abstand im Raum",
|
||||||
"▸ Texte, Bilder, Audio — alles lässt sich in denselben Vektorraum einbetten",
|
"▸ Texte, Bilder, Audio — alles lässt sich in denselben Vektorraum einbetten",
|
||||||
], Inches(0.8), Inches(1.3), Inches(7.5), Inches(4), size=20)
|
], Inches(0.8), Inches(1.3), Inches(7.2), Inches(4), size=20)
|
||||||
|
|
||||||
code_box(s, '# 4-dimensionaler Beispielvektor\nvec_hund = [0.91, 0.12, -0.44, 0.72]\nvec_katze = [0.87, 0.18, -0.39, 0.68]\n# ähnlich! Abstand ≈ 0.04\nvec_auto = [-0.3, -0.82, 0.91, -0.11]\n# weit entfernt',
|
# 2-D vector diagram on the right
|
||||||
Inches(8.8), Inches(1.5), Inches(4.3), Inches(2.6), size=12)
|
s.shapes.add_picture(DIAG_S3, Inches(7.8), Inches(1.1), Inches(5.3), Inches(5.3))
|
||||||
|
|
||||||
txb(s, "Vektoren machen Ähnlichkeit berechenbar.",
|
txb(s, "Vektoren machen Ähnlichkeit berechenbar.",
|
||||||
Inches(0.8), Inches(5.8), Inches(11), Inches(0.7),
|
Inches(0.8), Inches(5.8), Inches(6.8), Inches(0.7),
|
||||||
size=22, bold=True, color=ACCENT_GRN)
|
size=22, bold=True, color=ACCENT_GRN)
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════════════════
|
||||||
@@ -263,7 +505,10 @@ bullet_box(s, [
|
|||||||
"▸ Datenbankabfrage: finde die k nächsten Nachbarn (k-NN)",
|
"▸ Datenbankabfrage: finde die k nächsten Nachbarn (k-NN)",
|
||||||
"▸ Ergebnis: Bilder nach semantischer Ähnlichkeit gerankt",
|
"▸ Ergebnis: Bilder nach semantischer Ähnlichkeit gerankt",
|
||||||
"▸ Kein manuelles Tagging, keine Metadaten nötig",
|
"▸ Kein manuelles Tagging, keine Metadaten nötig",
|
||||||
], Inches(0.8), Inches(3.9), Inches(11.5), Inches(2.8), size=20)
|
], Inches(0.8), Inches(3.9), Inches(11.5), Inches(1.1), size=20)
|
||||||
|
|
||||||
|
# Flow diagram
|
||||||
|
s.shapes.add_picture(DIAG_S4, Inches(0.5), Inches(5.1), Inches(12.3), Inches(1.75))
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════════════════
|
||||||
# Slide 5 — CLIP-Modell
|
# Slide 5 — CLIP-Modell
|
||||||
@@ -297,14 +542,17 @@ bullet_box(s, [
|
|||||||
"▸ Cosinus-Distanz = 0 → identisch",
|
"▸ Cosinus-Distanz = 0 → identisch",
|
||||||
"▸ Cosinus-Distanz = 1 → völlig unähnlich",
|
"▸ Cosinus-Distanz = 1 → völlig unähnlich",
|
||||||
"▸ Ähnlichkeitswert = 1 − Distanz → 1.0 = perfekte Übereinstimmung",
|
"▸ Ähnlichkeitswert = 1 − Distanz → 1.0 = perfekte Übereinstimmung",
|
||||||
], Inches(0.8), Inches(1.3), Inches(8.5), Inches(3.5), size=20)
|
], Inches(0.8), Inches(1.3), Inches(7.5), Inches(3.5), size=20)
|
||||||
|
|
||||||
|
# Cosine diagram on the right
|
||||||
|
s.shapes.add_picture(DIAG_S6, Inches(8.0), Inches(1.1), Inches(5.1), Inches(3.7))
|
||||||
|
|
||||||
code_box(s,
|
code_box(s,
|
||||||
"-- PostgreSQL\n1 - (embedding <=> query_vec)\n\n-- Oracle 26ai\n1 - VECTOR_DISTANCE(embedding, query_vec, COSINE)",
|
"-- PostgreSQL\n1 - (embedding <=> query_vec)\n\n-- Oracle 26ai\n1 - VECTOR_DISTANCE(embedding, query_vec, COSINE)",
|
||||||
Inches(0.8), Inches(5.0), Inches(6.0), Inches(1.9), size=13)
|
Inches(0.8), Inches(5.0), Inches(6.0), Inches(1.85), size=13)
|
||||||
|
|
||||||
txb(s, "In der Demo:\nScore 28 % = schwache Übereinstimmung\nScore 75 % = starke Übereinstimmung",
|
txb(s, "In der Demo:\nScore 28 % = schwach\nScore 75 % = stark",
|
||||||
Inches(7.5), Inches(5.0), Inches(5.0), Inches(2.0),
|
Inches(7.0), Inches(5.0), Inches(5.0), Inches(1.85),
|
||||||
size=18, color=ACCENT_GRN)
|
size=18, color=ACCENT_GRN)
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════════════════
|
||||||
@@ -442,10 +690,13 @@ section_header(s, "Oracle 26ai — Embedding in der Datenbank", ACCENT_IDB)
|
|||||||
|
|
||||||
bullet_box(s, [
|
bullet_box(s, [
|
||||||
"▸ Oracle kann ONNX-Modelle direkt in die Datenbank laden",
|
"▸ Oracle kann ONNX-Modelle direkt in die Datenbank laden",
|
||||||
|
" (ONNX = Open Neural Network Exchange)",
|
||||||
"▸ VECTOR_EMBEDDING() ruft das Modell innerhalb einer SQL-Abfrage auf",
|
"▸ VECTOR_EMBEDDING() ruft das Modell innerhalb einer SQL-Abfrage auf",
|
||||||
"▸ Kein Python, keine KI-Bibliothek auf dem Anwendungsserver zur Laufzeit",
|
"▸ Kein Python, keine KI-Bibliothek auf dem Anwendungsserver zur Laufzeit",
|
||||||
"▸ Der Text-String ist der einzige Parameter aus Python",
|
"▸ Der Text-String ist der einzige Parameter aus Python",
|
||||||
], Inches(0.8), Inches(1.3), Inches(11.5), Inches(2.2), size=20)
|
"▸ Schema: VECTOR — Tabelle: FOTO_VEKTOR — Bilder als BLOB gespeichert",
|
||||||
|
"▸ HNSW-Index auf FOTO_VEKTOR (wie in Schema VECTORS_USER)",
|
||||||
|
], Inches(0.8), Inches(1.3), Inches(11.5), Inches(3.0), size=19)
|
||||||
|
|
||||||
code_box(s,
|
code_box(s,
|
||||||
"-- Gesamte Logik in einem SQL-Statement\nSELECT filename,\n 1 - VECTOR_DISTANCE(\n foto_vek,\n VECTOR_EMBEDDING(CLIP_TXT USING :q AS data),\n COSINE\n ) AS score\nFROM VECTOR.FOTO_VEKTOR\nORDER BY VECTOR_DISTANCE(\n foto_vek,\n VECTOR_EMBEDDING(CLIP_TXT USING :q AS data), COSINE)\nFETCH FIRST 12 ROWS ONLY;",
|
"-- Gesamte Logik in einem SQL-Statement\nSELECT filename,\n 1 - VECTOR_DISTANCE(\n foto_vek,\n VECTOR_EMBEDDING(CLIP_TXT USING :q AS data),\n COSINE\n ) AS score\nFROM VECTOR.FOTO_VEKTOR\nORDER BY VECTOR_DISTANCE(\n foto_vek,\n VECTOR_EMBEDDING(CLIP_TXT USING :q AS data), COSINE)\nFETCH FIRST 12 ROWS ONLY;",
|
||||||
@@ -485,45 +736,13 @@ code_box(s,
|
|||||||
Inches(0.8), Inches(5.2), Inches(11.5), Inches(1.6), size=13)
|
Inches(0.8), Inches(5.2), Inches(11.5), Inches(1.6), size=13)
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════════════════
|
||||||
# Slide 13 — Architektur der Demo
|
# Slide 13 — Architektur: Wo wird CLIP berechnet?
|
||||||
# ════════════════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════════════════
|
||||||
s = add_slide()
|
s = add_slide()
|
||||||
section_header(s, "Architektur der Demo", ACCENT_GRN)
|
section_header(s, "Architektur der Demo", ACCENT_GRN)
|
||||||
|
s.shapes.add_picture(DIAG_ARCH, Inches(0.3), Inches(1.1), Inches(12.73), Inches(5.7))
|
||||||
|
|
||||||
# Three columns
|
# Slide 15 — Demo-Hinweis
|
||||||
for i, (label, port, color) in enumerate([
|
|
||||||
("pgvector", "Port 8000", ACCENT_PG),
|
|
||||||
("Oracle 26ai\n(Python)", "Port 8001", ACCENT_ORA),
|
|
||||||
("Oracle 26ai\n(In-DB)", "Port 8002", ACCENT_IDB),
|
|
||||||
]):
|
|
||||||
x = Inches(0.5 + i * 4.27)
|
|
||||||
# Box
|
|
||||||
box = s.shapes.add_shape(1, x, Inches(1.3), Inches(3.8), Inches(4.8))
|
|
||||||
box.fill.solid()
|
|
||||||
box.fill.fore_color.rgb = RGBColor(0x28, 0x29, 0x3d)
|
|
||||||
box.line.color.rgb = color
|
|
||||||
|
|
||||||
txb(s, label, x + Inches(0.1), Inches(1.4), Inches(3.6), Inches(0.8),
|
|
||||||
size=22, bold=True, color=color, align=PP_ALIGN.CENTER)
|
|
||||||
txb(s, port, x + Inches(0.1), Inches(2.1), Inches(3.6), Inches(0.4),
|
|
||||||
size=16, color=DIM_CLR, align=PP_ALIGN.CENTER)
|
|
||||||
|
|
||||||
items = {
|
|
||||||
"pgvector": ["Browser /ui/", "FastAPI", "CLIP (Python)", "PostgreSQL 18", "pgvector 0.8.2"],
|
|
||||||
"Oracle 26ai\n(Python)": ["Browser /ui/", "FastAPI", "CLIP (Python)", "Oracle 26ai", "HNSW (SGA)"],
|
|
||||||
"Oracle 26ai\n(In-DB)": ["Browser /ui/", "FastAPI", "(kein CLIP)", "Oracle 26ai", "VECTOR_EMBEDDING()"],
|
|
||||||
}[label]
|
|
||||||
|
|
||||||
for j, item in enumerate(items):
|
|
||||||
txb(s, "▸ " + item, x + Inches(0.2), Inches(2.65 + j * 0.52), Inches(3.5), Inches(0.48),
|
|
||||||
size=16, color=BODY_CLR)
|
|
||||||
|
|
||||||
txb(s, "116 Street Fotos · CLIP ViT-B/32 · 512-dimensionale Vektoren",
|
|
||||||
Inches(0.5), Inches(6.6), Inches(12.33), Inches(0.3),
|
|
||||||
size=16, color=DIM_CLR, align=PP_ALIGN.CENTER)
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════════════════
|
|
||||||
# Slide 14 — Demo-Hinweis
|
|
||||||
# ════════════════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════════════════
|
||||||
s = add_slide()
|
s = add_slide()
|
||||||
section_header(s, "Demo", ACCENT_GRN)
|
section_header(s, "Demo", ACCENT_GRN)
|
||||||
@@ -548,10 +767,10 @@ s = add_slide()
|
|||||||
section_header(s, "Vergleich", ACCENT_PG)
|
section_header(s, "Vergleich", ACCENT_PG)
|
||||||
|
|
||||||
rows = [
|
rows = [
|
||||||
("Merkmal", "PostgreSQL + pgvector", "Oracle 26ai (Python)", "Oracle 26ai (In-DB)"),
|
("Merkmal", "PostgreSQL + pgvector", "Oracle · VECTORS_USER", "Oracle · VECTOR"),
|
||||||
("Fotos indiziert", "116", "116", "116"),
|
("Fotos indiziert", "116", "116", "116"),
|
||||||
("Indizierungszeit", "~26 Sek. (CPU)", "~16 Sek. (CPU)", "— (separat)"),
|
("Indizierungszeit", "Ø 12,1 Sek. (3 Läufe)", "Ø 12,1 Sek. (3 Läufe)", "Ø 13,6 Sek. (3 Läufe)"),
|
||||||
("Index-Typ", "HNSW (auf Disk)", "HNSW (im Speicher)", "Full Table Scan"),
|
("Index-Typ", "HNSW (auf Disk)", "HNSW (im Speicher)", "HNSW (im Speicher)"),
|
||||||
("RAM-Bedarf", "Keiner", "512 MB SGA", "512 MB SGA"),
|
("RAM-Bedarf", "Keiner", "512 MB SGA", "512 MB SGA"),
|
||||||
("CLIP zur Laufzeit", "Ja (Python)", "Ja (Python)", "Nein"),
|
("CLIP zur Laufzeit", "Ja (Python)", "Ja (Python)", "Nein"),
|
||||||
("Embedding-Ort", "Python-Prozess", "Python-Prozess", "In der Datenbank"),
|
("Embedding-Ort", "Python-Prozess", "Python-Prozess", "In der Datenbank"),
|
||||||
|
|||||||
@@ -0,0 +1,49 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from db_oracle import get_connection_indb
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
PHOTOS_DIR = os.getenv("PHOTOS_DIR")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
conn = get_connection_indb()
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
cur.execute("SELECT COUNT(*) FROM VECTOR.FOTO_VEKTOR")
|
||||||
|
print(f"Rows before: {cur.fetchone()[0]}")
|
||||||
|
|
||||||
|
files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))]
|
||||||
|
print(f"Found {len(files)} photos in {PHOTOS_DIR}")
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
for i, filename in enumerate(files, 1):
|
||||||
|
filepath = os.path.join(PHOTOS_DIR, filename)
|
||||||
|
cur.execute("SELECT 1 FROM VECTOR.FOTO_VEKTOR WHERE filename = :1", (filename,))
|
||||||
|
if cur.fetchone():
|
||||||
|
print(f"[{i}/{len(files)}] Skipping {filename} (already indexed)")
|
||||||
|
continue
|
||||||
|
with open(filepath, "rb") as f:
|
||||||
|
blob_data = f.read()
|
||||||
|
# ORA-24816: Oracle cannot bind the same BLOB as both column value and
|
||||||
|
# VECTOR_EMBEDDING() input in one statement. Insert the BLOB first, then
|
||||||
|
# let Oracle compute the embedding from the stored data in a second step.
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO VECTOR.FOTO_VEKTOR (filename, foto) VALUES (:1, :2)",
|
||||||
|
(filename, blob_data),
|
||||||
|
)
|
||||||
|
cur.execute(
|
||||||
|
"""UPDATE VECTOR.FOTO_VEKTOR
|
||||||
|
SET foto_vek = VECTOR_EMBEDDING(CLIP_IMG USING foto AS data)
|
||||||
|
WHERE filename = :1""",
|
||||||
|
(filename,),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
print(f"[{i}/{len(files)}] Indexed {filename}")
|
||||||
|
|
||||||
|
elapsed = time.time() - start
|
||||||
|
print(f"Done in {elapsed:.1f} seconds.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
import array
|
import array
|
||||||
|
import time
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from db_oracle import get_connection
|
from db_oracle import get_connection
|
||||||
from embedder import embed_image
|
from embedder import embed_image
|
||||||
@@ -47,6 +48,7 @@ def main():
|
|||||||
files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))]
|
files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))]
|
||||||
print(f"Found {len(files)} photos in {PHOTOS_DIR}")
|
print(f"Found {len(files)} photos in {PHOTOS_DIR}")
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
for i, filename in enumerate(files, 1):
|
for i, filename in enumerate(files, 1):
|
||||||
filepath = os.path.join(PHOTOS_DIR, filename)
|
filepath = os.path.join(PHOTOS_DIR, filename)
|
||||||
cur.execute("SELECT 1 FROM images WHERE filename = :1", (filename,))
|
cur.execute("SELECT 1 FROM images WHERE filename = :1", (filename,))
|
||||||
@@ -61,7 +63,7 @@ def main():
|
|||||||
|
|
||||||
cur.close()
|
cur.close()
|
||||||
conn.close()
|
conn.close()
|
||||||
print("Done.")
|
print(f"Done in {time.time() - start:.1f} seconds.")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
import time
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from db import get_connection
|
from db import get_connection
|
||||||
from embedder import embed_image
|
from embedder import embed_image
|
||||||
@@ -37,6 +38,7 @@ def main():
|
|||||||
files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))]
|
files = [f for f in os.listdir(PHOTOS_DIR) if f.lower().endswith((".jpg", ".jpeg"))]
|
||||||
print(f"Found {len(files)} photos in {PHOTOS_DIR}")
|
print(f"Found {len(files)} photos in {PHOTOS_DIR}")
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
for i, filename in enumerate(files, 1):
|
for i, filename in enumerate(files, 1):
|
||||||
filepath = os.path.join(PHOTOS_DIR, filename)
|
filepath = os.path.join(PHOTOS_DIR, filename)
|
||||||
cur.execute("SELECT 1 FROM images WHERE filename = %s", (filename,))
|
cur.execute("SELECT 1 FROM images WHERE filename = %s", (filename,))
|
||||||
@@ -50,7 +52,7 @@ def main():
|
|||||||
|
|
||||||
cur.close()
|
cur.close()
|
||||||
conn.close()
|
conn.close()
|
||||||
print("Done.")
|
print(f"Done in {time.time() - start:.1f} seconds.")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
Reference in New Issue
Block a user