Files
vector-search-demo/make_presentation.py
T

615 lines
32 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Generates "Vektoren in der Datenbank.pptx" — a LibreOffice-compatible presentation.
Run from the project root: python3 make_presentation.py
"""
from pptx import Presentation
from pptx.util import Inches, Pt, Emu
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN
from pptx.oxml.ns import qn
from pptx.oxml import parse_xml
from lxml import etree
_A_NS = "http://schemas.openxmlformats.org/drawingml/2006/main"
def OxmlElement(tag):
local = tag.split(":")[1]
return etree.fromstring(f'<a:{local} xmlns:a="{_A_NS}"/>')
import copy
# ── Colour palette (dark theme) ──────────────────────────────────────────────
BG = RGBColor(0x1e, 0x1e, 0x2e) # slide background
TITLE_CLR = RGBColor(0xcb, 0xd3, 0xff) # slide titles
BODY_CLR = RGBColor(0xcd, 0xd6, 0xf4) # body text
DIM_CLR = RGBColor(0x6c, 0x70, 0x86) # dimmed / captions
ACCENT_PG = RGBColor(0x89, 0xb4, 0xfa) # pgvector blue
ACCENT_ORA = RGBColor(0xf3, 0x8b, 0xa8) # Oracle red/pink
ACCENT_IDB = RGBColor(0xcb, 0xa6, 0xf7) # in-DB purple
ACCENT_GRN = RGBColor(0xa6, 0xe3, 0xa1) # green for highlights
CODE_BG = RGBColor(0x31, 0x32, 0x44) # code block background
CODE_CLR = RGBColor(0xa6, 0xe3, 0xa1) # code text
W = Inches(13.33) # widescreen 16:9
H = Inches(7.5)
FONT = "Roboto"
prs = Presentation()
prs.slide_width = W
prs.slide_height = H
blank_layout = prs.slide_layouts[6] # completely blank
LOGO_PATH = "/home/dierk/Bilder/Logo/Logo DLC Final.png"
CONFERENCE = "Quest Data Minds Konferenz"
EVENT_DATE = "28. Mai 2026"
EVENT_CITY = "Köln"
_slide_num = [0] # mutable counter so nested calls can increment it
def add_slide(logo=True, footer=True):
slide = prs.slides.add_slide(blank_layout)
bg = slide.background
fill = bg.fill
fill.solid()
fill.fore_color.rgb = BG
if logo:
slide.shapes.add_picture(LOGO_PATH,
Inches(11.6), Inches(7.0), Inches(1.6), Inches(0.42))
if footer:
_slide_num[0] += 1
# thin separator line
sep = slide.shapes.add_shape(1, Inches(0.3), Inches(6.95), Inches(11.1), Pt(1))
sep.fill.solid()
sep.fill.fore_color.rgb = DIM_CLR
sep.line.fill.background()
# left: conference info
txb(slide, f"{CONFERENCE} · {EVENT_CITY}, {EVENT_DATE}",
Inches(0.3), Inches(7.02), Inches(9.5), Inches(0.35),
size=11, color=DIM_CLR)
# right: page number (before logo)
txb(slide, str(_slide_num[0]),
Inches(10.9), Inches(7.02), Inches(0.6), Inches(0.35),
size=11, color=DIM_CLR, align=PP_ALIGN.RIGHT)
return slide
def txb(slide, text, x, y, w, h,
size=24, bold=False, color=BODY_CLR,
align=PP_ALIGN.LEFT, italic=False):
box = slide.shapes.add_textbox(x, y, w, h)
tf = box.text_frame
tf.word_wrap = True
p = tf.paragraphs[0]
p.alignment = align
run = p.add_run()
run.text = text
run.font.size = Pt(size)
run.font.bold = bold
run.font.italic = italic
run.font.color.rgb = color
run.font.name = FONT
return box
def title_slide_layout(slide, title, subtitle=None):
txb(slide, title,
Inches(1), Inches(2.8), Inches(11.33), Inches(1.2),
size=48, bold=True, color=TITLE_CLR, align=PP_ALIGN.CENTER)
if subtitle:
txb(slide, subtitle,
Inches(1), Inches(4.1), Inches(11.33), Inches(0.8),
size=24, color=DIM_CLR, align=PP_ALIGN.CENTER)
def section_header(slide, title, accent=ACCENT_PG):
"""Full-width coloured bar at top, then title."""
bar = slide.shapes.add_shape(
1, # MSO_SHAPE_TYPE.RECTANGLE
Inches(0), Inches(0), W, Inches(0.12)
)
bar.fill.solid()
bar.fill.fore_color.rgb = accent
bar.line.fill.background()
txb(slide, title,
Inches(0.5), Inches(0.2), Inches(12.33), Inches(0.8),
size=32, bold=True, color=TITLE_CLR)
def bullet_box(slide, items, x, y, w, h, size=20, color=BODY_CLR, indent=False):
box = slide.shapes.add_textbox(x, y, w, h)
tf = box.text_frame
tf.word_wrap = True
first = True
for item in items:
if first:
p = tf.paragraphs[0]
first = False
else:
p = tf.add_paragraph()
p.space_before = Pt(4)
run = p.add_run()
run.text = (" " if indent else "") + item
run.font.size = Pt(size)
run.font.color.rgb = color
run.font.name = FONT
def code_box(slide, code, x, y, w, h, size=13):
# Background rectangle (no text)
bg = slide.shapes.add_shape(1, x, y, w, h)
bg.fill.solid()
bg.fill.fore_color.rgb = CODE_BG
bg.line.color.rgb = RGBColor(0x58, 0x5b, 0x70)
bg.text_frame.text = ""
# Text box on top — textboxes have predictable left-aligned defaults
pad = Pt(7)
tb = slide.shapes.add_textbox(x + pad, y + pad, w - pad * 2, h - pad * 2)
tf = tb.text_frame
tf.word_wrap = False
tf.margin_left = Pt(0)
tf.margin_right = Pt(0)
tf.margin_top = Pt(0)
tf.margin_bottom = Pt(0)
first = True
for line in code.strip().split("\n"):
if first:
p = tf.paragraphs[0]
first = False
else:
p = tf.add_paragraph()
p.alignment = PP_ALIGN.LEFT
p.space_before = Pt(0)
p.space_after = Pt(0)
# Explicitly zero out left margin, hanging indent, and remove any bullet
pPr = p._p.get_or_add_pPr()
pPr.set("marL", "0")
pPr.set("indent", "0")
for tag in ("a:buClr","a:buClrTx","a:buFont","a:buFontTx","a:buChar","a:buAutoNum","a:buNone"):
for el in pPr.findall(qn(tag)):
pPr.remove(el)
pPr.append(OxmlElement("a:buNone"))
run = p.add_run()
run.text = line
run.font.size = Pt(size)
run.font.color.rgb = CODE_CLR
run.font.name = "Courier New"
def divider(slide, y, color=DIM_CLR):
line = slide.shapes.add_shape(1, Inches(0.5), y, Inches(12.33), Pt(1))
line.fill.solid()
line.fill.fore_color.rgb = color
line.line.fill.background()
# ════════════════════════════════════════════════════════════════════════════
# Slide 1 — Titelfolie
# ════════════════════════════════════════════════════════════════════════════
s = add_slide(logo=False, footer=False) # title slide: custom layout
title_slide_layout(s,
"Vektoren in der Datenbank",
"Semantische Bildsuche mit PostgreSQL/pgvector und Oracle 26ai")
# Conference details
txb(s, CONFERENCE,
Inches(1), Inches(5.0), Inches(11.33), Inches(0.5),
size=20, bold=True, color=ACCENT_PG, align=PP_ALIGN.CENTER)
txb(s, f"{EVENT_DATE} · {EVENT_CITY}",
Inches(1), Inches(5.5), Inches(11.33), Inches(0.45),
size=18, color=DIM_CLR, align=PP_ALIGN.CENTER)
# Larger centred logo
s.shapes.add_picture(LOGO_PATH, Inches(4.67), Inches(6.1), Inches(4.0), Inches(1.06))
# ════════════════════════════════════════════════════════════════════════════
# Slide 2 — Agenda
# ════════════════════════════════════════════════════════════════════════════
s = add_slide()
section_header(s, "Agenda", ACCENT_PG)
bullet_box(s, [
"01 Was ist ein Vektor?",
"02 Semantische Suche — jenseits von Schlüsselwörtern",
"03 Das CLIP-Modell",
"04 Ähnlichkeit messen: Cosinus-Distanz",
"05 PostgreSQL + pgvector",
"06 Oracle 26ai — nativer Vektor-Support",
"07 Oracle 26ai — Embedding in der Datenbank",
"08 Architektur der Demo",
"09 Demo",
"10 Vergleich & Fazit",
], Inches(1.5), Inches(1.3), Inches(10), Inches(5.5), size=20)
# ════════════════════════════════════════════════════════════════════════════
# Slide 3 — Was ist ein Vektor?
# ════════════════════════════════════════════════════════════════════════════
s = add_slide()
section_header(s, "Was ist ein Vektor?", ACCENT_PG)
bullet_box(s, [
"▸ Ein Vektor ist eine geordnete Liste von Zahlen: [0.12, -0.87, 0.44, …]",
"▸ Jede Zahl beschreibt eine Dimension im semantischen Raum",
"▸ Moderne KI-Modelle erzeugen Vektoren mit 512 bis 1536 Dimensionen",
"▸ Ähnliche Inhalte → ähnliche Vektoren → kleiner Abstand im Raum",
"▸ Texte, Bilder, Audio — alles lässt sich in denselben Vektorraum einbetten",
], Inches(0.8), Inches(1.3), Inches(7.5), Inches(4), size=20)
code_box(s, '# 4-dimensionaler Beispielvektor\nvec_hund = [0.91, 0.12, -0.44, 0.72]\nvec_katze = [0.87, 0.18, -0.39, 0.68]\n# ähnlich! Abstand ≈ 0.04\nvec_auto = [-0.3, -0.82, 0.91, -0.11]\n# weit entfernt',
Inches(8.8), Inches(1.5), Inches(4.3), Inches(2.6), size=12)
txb(s, "Vektoren machen Ähnlichkeit berechenbar.",
Inches(0.8), Inches(5.8), Inches(11), Inches(0.7),
size=22, bold=True, color=ACCENT_GRN)
# ════════════════════════════════════════════════════════════════════════════
# Slide 4 — Semantische Suche
# ════════════════════════════════════════════════════════════════════════════
s = add_slide()
section_header(s, "Semantische Suche — jenseits von Schlüsselwörtern", ACCENT_PG)
bullet_box(s, [
"Klassische Suche: \"trees\" findet nur Dokumente mit dem Wort \"trees\"",
"",
"Semantische Suche: \"trees\" findet Bilder von Wäldern, Parks, Natur —",
" ohne dass das Wort irgendwo steht",
], Inches(0.8), Inches(1.3), Inches(11.5), Inches(2.2), size=20)
divider(s, Inches(3.7))
bullet_box(s, [
"▸ Text-Anfrage wird in denselben Vektorraum eingebettet wie die Bilder",
"▸ Datenbankabfrage: finde die k nächsten Nachbarn (k-NN)",
"▸ Ergebnis: Bilder nach semantischer Ähnlichkeit gerankt",
"▸ Kein manuelles Tagging, keine Metadaten nötig",
], Inches(0.8), Inches(3.9), Inches(11.5), Inches(2.8), size=20)
# ════════════════════════════════════════════════════════════════════════════
# Slide 5 — CLIP-Modell
# ════════════════════════════════════════════════════════════════════════════
s = add_slide()
section_header(s, "Das CLIP-Modell (OpenAI)", ACCENT_IDB)
bullet_box(s, [
"CLIP = Contrastive LanguageImage Pretraining",
"▸ Trainiert auf hunderten Millionen Bild-Text-Paaren",
"▸ Bildet sowohl Bilder als auch Text in denselben 512-dimensionalen Raum ab",
"▸ Modell: clip-ViT-B-32 (Vision Transformer, Patch-Größe 32×32)",
"▸ Quell-Gewichte: Hugging Face Hub (sentence-transformers/clip-ViT-B-32)",
], Inches(0.8), Inches(1.3), Inches(7.5), Inches(3.2), size=20)
code_box(s,
'from sentence_transformers import (\n SentenceTransformer)\n\nmodel = SentenceTransformer(\n "clip-ViT-B-32")\n\n# Bild einbetten\nvec = model.encode(image)\n# → 512 floats\n\n# Text einbetten\nvec = model.encode("Bäume")\n# → 512 floats, gleicher Raum!',
Inches(8.8), Inches(1.3), Inches(4.3), Inches(3.8), size=11)
txb(s, "Bild-Vektor und Text-Vektor zeigen in dieselbe Richtung,\nwenn Bild und Text inhaltlich übereinstimmen.",
Inches(0.8), Inches(5.0), Inches(11.5), Inches(1.0),
size=18, italic=True, color=ACCENT_IDB)
# ════════════════════════════════════════════════════════════════════════════
# Slide 6 — Cosinus-Distanz
# ════════════════════════════════════════════════════════════════════════════
s = add_slide()
section_header(s, "Ähnlichkeit messen: Cosinus-Distanz", ACCENT_PG)
bullet_box(s, [
"▸ CLIP-Vektoren haben unterschiedliche Beträge — daher kein euklidischer Abstand",
"▸ Cosinus-Distanz misst nur den Winkel zwischen zwei Vektoren",
"▸ Cosinus-Distanz = 0 → identisch",
"▸ Cosinus-Distanz = 1 → völlig unähnlich",
"▸ Ähnlichkeitswert = 1 Distanz → 1.0 = perfekte Übereinstimmung",
], Inches(0.8), Inches(1.3), Inches(8.5), Inches(3.5), size=20)
code_box(s,
"-- PostgreSQL\n1 - (embedding <=> query_vec)\n\n-- Oracle 26ai\n1 - VECTOR_DISTANCE(embedding, query_vec, COSINE)",
Inches(0.8), Inches(5.0), Inches(6.0), Inches(1.9), size=13)
txb(s, "In der Demo:\nScore 28 % = schwache Übereinstimmung\nScore 75 % = starke Übereinstimmung",
Inches(7.5), Inches(5.0), Inches(5.0), Inches(2.0),
size=18, color=ACCENT_GRN)
# ════════════════════════════════════════════════════════════════════════════
# Slide 7 — PostgreSQL + pgvector: Voraussetzungen
# ════════════════════════════════════════════════════════════════════════════
s = add_slide()
section_header(s, "PostgreSQL + pgvector", ACCENT_PG)
txb(s, "Was wird benötigt?", Inches(0.8), Inches(1.3), Inches(11), Inches(0.5),
size=22, bold=True, color=ACCENT_PG)
bullet_box(s, [
"▸ PostgreSQL (ab Version 13)",
"▸ pgvector-Extension — docker image: pgvector/pgvector:pg18",
"▸ Extension aktivieren: CREATE EXTENSION vector;",
"▸ Python-Paket: psycopg2-binary",
"▸ KI-Bibliothek: sentence-transformers (auf dem Anwendungsserver)",
], Inches(0.8), Inches(1.9), Inches(11.5), Inches(2.5), size=20)
divider(s, Inches(4.6))
txb(s, "Schema & Index", Inches(0.8), Inches(4.5), Inches(11), Inches(0.5),
size=22, bold=True, color=ACCENT_PG)
code_box(s,
"CREATE TABLE images (\n id SERIAL PRIMARY KEY,\n filename TEXT NOT NULL UNIQUE,\n embedding vector(512) -- pgvector-Typ\n);\n\nCREATE INDEX ON images USING hnsw (embedding vector_cosine_ops);",
Inches(0.8), Inches(5.0), Inches(7.5), Inches(1.85), size=13)
bullet_box(s, [
"HNSW = Hierarchical Navigable Small World",
"Approximativer k-NN Index",
"Sehr schnell bei der Suche",
], Inches(8.8), Inches(5.0), Inches(4.3), Inches(1.85), size=18, color=DIM_CLR)
# ════════════════════════════════════════════════════════════════════════════
# Slide 8 — PostgreSQL: Suchanfrage
# ════════════════════════════════════════════════════════════════════════════
s = add_slide()
section_header(s, "PostgreSQL: Suchanfrage", ACCENT_PG)
bullet_box(s, [
"1. Text-Anfrage mit CLIP in Python in einen Vektor umwandeln",
"2. Vektor an die SQL-Abfrage übergeben",
"3. PostgreSQL findet die ähnlichsten Bilder via HNSW-Index",
], Inches(0.8), Inches(1.3), Inches(11.5), Inches(1.5), size=20)
code_box(s,
"# Python\nvec = model.encode(\"Bäume\") # → 512 floats\n\n# SQL\nSELECT filename,\n 1 - (embedding <=> %s::vector) AS score\nFROM images\nORDER BY embedding <=> %s::vector\nLIMIT 12;",
Inches(0.8), Inches(3.0), Inches(7.5), Inches(3.5), size=16)
bullet_box(s, [
"<=> Cosinus-Distanz-Operator",
"(pgvector-spezifisch)",
"",
"$1::vector expliziter Cast",
"erforderlich",
"",
"LIMIT statt FETCH FIRST",
], Inches(9.0), Inches(3.0), Inches(4.0), Inches(3.5), size=18, color=DIM_CLR)
# ════════════════════════════════════════════════════════════════════════════
# Slide 9 — Oracle 26ai: Nativer Support
# ════════════════════════════════════════════════════════════════════════════
s = add_slide()
section_header(s, "Oracle 26ai — nativer Vektor-Support", ACCENT_ORA)
txb(s, "Was wird benötigt?", Inches(0.8), Inches(1.3), Inches(11), Inches(0.5),
size=22, bold=True, color=ACCENT_ORA)
bullet_box(s, [
"▸ Oracle AI Database 26ai Free (oder Enterprise)",
"▸ Keine Extension nötig — Vektoren sind eingebaut",
"▸ Vector Memory Area im SGA konfigurieren (für HNSW-Index)",
"▸ Python-Paket: oracledb (Thin Mode — kein Oracle Client nötig)",
"▸ KI-Bibliothek: sentence-transformers (auf dem Anwendungsserver)",
], Inches(0.8), Inches(1.9), Inches(11.5), Inches(2.2), size=20)
divider(s, Inches(4.2))
txb(s, "Schema & Index", Inches(0.8), Inches(4.3), Inches(11), Inches(0.45),
size=20, bold=True, color=ACCENT_ORA)
code_box(s,
"CREATE TABLE images (\n id NUMBER GENERATED ALWAYS AS IDENTITY PRIMARY KEY,\n filename VARCHAR2(255) NOT NULL UNIQUE,\n embedding VECTOR(512, FLOAT32) -- Typ + Dimension\n);\nCREATE VECTOR INDEX images_idx ON images(embedding)\n ORGANIZATION INMEMORY NEIGHBOR GRAPH\n WITH DISTANCE COSINE WITH TARGET ACCURACY 95;",
Inches(0.8), Inches(4.8), Inches(8.5), Inches(2.0), size=11)
bullet_box(s, [
"HNSW im SGA",
"(Vector Memory Area)",
"512 MB konfiguriert",
], Inches(9.8), Inches(4.8), Inches(3.3), Inches(2.0), size=17, color=DIM_CLR)
# ════════════════════════════════════════════════════════════════════════════
# Slide 10 — Oracle: Unterschiede zu pgvector
# ════════════════════════════════════════════════════════════════════════════
s = add_slide()
section_header(s, "Oracle vs. pgvector — Schema-Unterschiede", ACCENT_ORA)
rows = [
("Extension", "CREATE EXTENSION vector", "Eingebaut, keine Extension"),
("Vektor-Spalte", "vector(512) — nur Dimension", "VECTOR(512, FLOAT32) — Dim + Typ"),
("Primary Key", "SERIAL", "NUMBER GENERATED ALWAYS AS IDENTITY"),
("Text-Spalte", "TEXT (unbegrenzt)", "VARCHAR2(n) — Länge erforderlich"),
("HNSW-Syntax", "USING hnsw (...ops)", "ORGANIZATION INMEMORY NEIGHBOR GRAPH"),
("Genauigkeit", "Implizit via Index-Parameter", "WITH TARGET ACCURACY 95 (explizit)"),
("Speicher", "Kein Sonder-Speicher nötig", "vector_memory_size im SGA"),
("Abstand-Op", "<=> (Operator)", "VECTOR_DISTANCE(col, vec, COSINE)"),
("Top-N", "LIMIT n", "FETCH FIRST n ROWS ONLY"),
]
# Column header row
y = Inches(1.3)
hdr_bg = s.shapes.add_shape(1, Inches(0.3), y, Inches(12.7), Inches(0.55))
hdr_bg.fill.solid()
hdr_bg.fill.fore_color.rgb = RGBColor(0x18, 0x18, 0x28)
hdr_bg.line.fill.background()
txb(s, "Aspekt", Inches(0.4), y + Pt(6), Inches(2.2), Inches(0.5), size=14, bold=True, color=BODY_CLR)
txb(s, "PostgreSQL + pgvector",Inches(2.7), y + Pt(6), Inches(4.8), Inches(0.5), size=14, bold=True, color=ACCENT_PG)
txb(s, "Oracle 26ai", Inches(7.6), y + Pt(6), Inches(5.4), Inches(0.5), size=14, bold=True, color=ACCENT_ORA)
y += Inches(0.56)
for i, (aspect, pg, ora) in enumerate(rows):
bg_color = RGBColor(0x28, 0x29, 0x3d) if i % 2 == 0 else RGBColor(0x24, 0x25, 0x38)
row_bg = s.shapes.add_shape(1, Inches(0.3), y, Inches(12.7), Inches(0.52))
row_bg.fill.solid()
row_bg.fill.fore_color.rgb = bg_color
row_bg.line.fill.background()
txb(s, aspect, Inches(0.4), y + Pt(5), Inches(2.2), Inches(0.48), size=13, bold=True, color=DIM_CLR)
txb(s, pg, Inches(2.7), y + Pt(5), Inches(4.8), Inches(0.48), size=13, color=ACCENT_PG)
txb(s, ora, Inches(7.6), y + Pt(5), Inches(5.4), Inches(0.48), size=13, color=ACCENT_ORA)
y += Inches(0.53)
# ════════════════════════════════════════════════════════════════════════════
# Slide 11 — Oracle In-Database Embedding
# ════════════════════════════════════════════════════════════════════════════
s = add_slide()
section_header(s, "Oracle 26ai — Embedding in der Datenbank", ACCENT_IDB)
bullet_box(s, [
"▸ Oracle kann ONNX-Modelle direkt in die Datenbank laden",
"▸ VECTOR_EMBEDDING() ruft das Modell innerhalb einer SQL-Abfrage auf",
"▸ Kein Python, keine KI-Bibliothek auf dem Anwendungsserver zur Laufzeit",
"▸ Der Text-String ist der einzige Parameter aus Python",
], Inches(0.8), Inches(1.3), Inches(11.5), Inches(2.2), size=20)
code_box(s,
"-- Gesamte Logik in einem SQL-Statement\nSELECT filename,\n 1 - VECTOR_DISTANCE(\n foto_vek,\n VECTOR_EMBEDDING(CLIP_TXT USING :q AS data),\n COSINE\n ) AS score\nFROM VECTOR.FOTO_VEKTOR\nORDER BY VECTOR_DISTANCE(\n foto_vek,\n VECTOR_EMBEDDING(CLIP_TXT USING :q AS data), COSINE)\nFETCH FIRST 12 ROWS ONLY;",
Inches(0.8), Inches(3.6), Inches(7.5), Inches(3.3), size=13)
bullet_box(s, [
":q = reiner Text aus Python",
"",
"Oracle übernimmt:",
" • Tokenisierung",
" • ONNX-Inferenz",
" • Vektorsuche",
"",
"→ Architektur vereinfacht sich",
], Inches(9.0), Inches(3.6), Inches(4.0), Inches(3.4), size=18, color=DIM_CLR)
# ════════════════════════════════════════════════════════════════════════════
# Slide 12 — ONNX in Oracle: Besonderheit
# ════════════════════════════════════════════════════════════════════════════
s = add_slide()
section_header(s, "ONNX in Oracle: Was zu beachten ist", ACCENT_IDB)
bullet_box(s, [
"Oracle's ONNX-Validator stellt strenge Anforderungen an das Modell-Graph:",
"",
"▸ input_ids darf nur in einem einzigen Gather-Knoten verwendet werden",
"▸ Standard-CLIP-Export verwendet input_ids auch in ArgMax → wird abgelehnt",
"",
"Lösung: CLIP_TXT mit CLS-Token-Pooling (Position 0) statt EOS-Token-Pooling",
"▸ Einfacherer ONNX-Graph, den Oracle akzeptiert",
"▸ Cosinus-Ähnlichkeit zwischen EOS- und CLS-Variante: ~0,70",
"▸ Modell muss beim Export entsprechend angepasst werden",
], Inches(0.8), Inches(1.3), Inches(11.5), Inches(3.8), size=19)
code_box(s,
"-- Modell laden (einmalig durch Administrator)\nEXEC DBMS_VECTOR.LOAD_ONNX_MODEL(\n 'VEC_DUMP', 'clip_txt.onnx', 'CLIP_TXT',\n JSON('{\"function\":\"embedding\",\"embeddingOutput\":\"output\",\n \"input\":{\"input\":[\"DATA\"]}}'));",
Inches(0.8), Inches(5.2), Inches(11.5), Inches(1.6), size=13)
# ════════════════════════════════════════════════════════════════════════════
# Slide 13 — Architektur der Demo
# ════════════════════════════════════════════════════════════════════════════
s = add_slide()
section_header(s, "Architektur der Demo", ACCENT_GRN)
# Three columns
for i, (label, port, color) in enumerate([
("pgvector", "Port 8000", ACCENT_PG),
("Oracle 26ai\n(Python)", "Port 8001", ACCENT_ORA),
("Oracle 26ai\n(In-DB)", "Port 8002", ACCENT_IDB),
]):
x = Inches(0.5 + i * 4.27)
# Box
box = s.shapes.add_shape(1, x, Inches(1.3), Inches(3.8), Inches(4.8))
box.fill.solid()
box.fill.fore_color.rgb = RGBColor(0x28, 0x29, 0x3d)
box.line.color.rgb = color
txb(s, label, x + Inches(0.1), Inches(1.4), Inches(3.6), Inches(0.8),
size=22, bold=True, color=color, align=PP_ALIGN.CENTER)
txb(s, port, x + Inches(0.1), Inches(2.1), Inches(3.6), Inches(0.4),
size=16, color=DIM_CLR, align=PP_ALIGN.CENTER)
items = {
"pgvector": ["Browser /ui/", "FastAPI", "CLIP (Python)", "PostgreSQL 18", "pgvector 0.8.2"],
"Oracle 26ai\n(Python)": ["Browser /ui/", "FastAPI", "CLIP (Python)", "Oracle 26ai", "HNSW (SGA)"],
"Oracle 26ai\n(In-DB)": ["Browser /ui/", "FastAPI", "(kein CLIP)", "Oracle 26ai", "VECTOR_EMBEDDING()"],
}[label]
for j, item in enumerate(items):
txb(s, "" + item, x + Inches(0.2), Inches(2.65 + j * 0.52), Inches(3.5), Inches(0.48),
size=16, color=BODY_CLR)
txb(s, "116 Street Fotos · CLIP ViT-B/32 · 512-dimensionale Vektoren",
Inches(0.5), Inches(6.6), Inches(12.33), Inches(0.3),
size=16, color=DIM_CLR, align=PP_ALIGN.CENTER)
# ════════════════════════════════════════════════════════════════════════════
# Slide 14 — Demo-Hinweis
# ════════════════════════════════════════════════════════════════════════════
s = add_slide()
section_header(s, "Demo", ACCENT_GRN)
for url, label, color, y in [
("http://localhost:8000/ui/", "pgvector (blau)", ACCENT_PG, Inches(2.2)),
("http://localhost:8001/ui/", "Oracle 26ai (rot)", ACCENT_ORA, Inches(3.5)),
("http://localhost:8002/ui/", "Oracle In-DB (lila)",ACCENT_IDB, Inches(4.8)),
]:
txb(s, url, Inches(1.5), y, Inches(6), Inches(0.5), size=22, bold=True, color=color)
txb(s, label, Inches(7.8), y + Inches(0.05), Inches(4.5), Inches(0.5), size=20, color=DIM_CLR)
txb(s, "Suchbegriffe zum Ausprobieren:",
Inches(1.5), Inches(5.9), Inches(10), Inches(0.5), size=18, color=BODY_CLR)
txb(s, "Bäume · Wasser · Menschen · Gebäude · Himmel · Nacht · Autos",
Inches(1.5), Inches(6.3), Inches(10), Inches(0.6), size=20, bold=True, color=ACCENT_GRN)
# ════════════════════════════════════════════════════════════════════════════
# Slide 15 — Vergleich
# ════════════════════════════════════════════════════════════════════════════
s = add_slide()
section_header(s, "Vergleich", ACCENT_PG)
rows = [
("Merkmal", "PostgreSQL + pgvector", "Oracle 26ai (Python)", "Oracle 26ai (In-DB)"),
("Fotos indiziert", "116", "116", "116"),
("Indizierungszeit", "~26 Sek. (CPU)", "~16 Sek. (CPU)", "— (separat)"),
("Index-Typ", "HNSW (auf Disk)", "HNSW (im Speicher)", "Full Table Scan"),
("RAM-Bedarf", "Keiner", "512 MB SGA", "512 MB SGA"),
("CLIP zur Laufzeit", "Ja (Python)", "Ja (Python)", "Nein"),
("Embedding-Ort", "Python-Prozess", "Python-Prozess", "In der Datenbank"),
("VECTOR_EMBEDDING()", "", "", "Ja"),
("Extension nötig", "CREATE EXTENSION vector", "Nein", "Nein"),
]
y = Inches(1.3)
header = True
for row in rows:
bg_color = RGBColor(0x18, 0x18, 0x28) if header else (RGBColor(0x28, 0x29, 0x3d) if rows.index(row) % 2 == 0 else RGBColor(0x24, 0x25, 0x38))
row_bg = s.shapes.add_shape(1, Inches(0.3), y, Inches(12.7), Inches(0.52))
row_bg.fill.solid()
row_bg.fill.fore_color.rgb = bg_color
row_bg.line.fill.background()
colors = [DIM_CLR, ACCENT_PG, ACCENT_ORA, ACCENT_IDB] if header else [BODY_CLR, ACCENT_PG, ACCENT_ORA, ACCENT_IDB]
widths = [2.5, 3.0, 3.1, 3.1]
xs = [0.4, 2.9, 6.0, 9.15]
for j, (cell, col, w, x) in enumerate(zip(row, colors, widths, xs)):
txb(s, cell, Inches(x), y + Pt(4), Inches(w), Inches(0.48),
size=13, bold=header, color=col)
y += Inches(0.53)
header = False
# ════════════════════════════════════════════════════════════════════════════
# Slide 16 — Fazit
# ════════════════════════════════════════════════════════════════════════════
s = add_slide()
section_header(s, "Fazit", ACCENT_GRN)
bullet_box(s, [
"▸ Beide Datenbanken unterstützen Vektorsuche produktionsreif",
"▸ pgvector: einfach, leichtgewichtig, kein zusätzlicher Speicher nötig",
"▸ Oracle 26ai: vollständig integriert, kein Extension-Management",
"▸ Oracle In-DB Embedding: Architektur ohne ML-Laufzeit im App-Server",
"▸ CLIP ermöglicht Bildersuche per Freitext — ohne Tagging oder Metadaten",
"▸ HNSW liefert schnelle approximative k-NN-Suche in beiden Datenbanken",
], Inches(0.8), Inches(1.3), Inches(11.5), Inches(3.5), size=21)
divider(s, Inches(5.1))
txb(s, "Quellcode & Dokumentation",
Inches(0.8), Inches(5.2), Inches(11), Inches(0.5),
size=20, bold=True, color=BODY_CLR)
txb(s, "https://gitea.dl-cons.de/dierk/vector-search-demo",
Inches(0.8), Inches(5.7), Inches(11), Inches(0.5),
size=20, color=ACCENT_PG)
txb(s, "Programmierung und Folien unterstützt durch Claude (Anthropic)",
Inches(0.8), Inches(6.55), Inches(11.33), Inches(0.35),
size=13, italic=True, color=DIM_CLR, align=PP_ALIGN.CENTER)
# ════════════════════════════════════════════════════════════════════════════
# Save
# ════════════════════════════════════════════════════════════════════════════
OUT = "Vektoren in der Datenbank.pptx"
prs.save(OUT)
print(f"Saved: {OUT} ({prs.slides.__len__()} slides)")