diff --git a/.~lock.Vektoren in der Datenbank.pptx# b/.~lock.Vektoren in der Datenbank.pptx# new file mode 100644 index 0000000..2fe7882 --- /dev/null +++ b/.~lock.Vektoren in der Datenbank.pptx# @@ -0,0 +1 @@ +,dierk,dlc-lnx-01,19.05.2026 13:51,file:///home/dierk/.config/libreoffice/4; \ No newline at end of file diff --git a/Vektoren in der Datenbank.pptx b/Vektoren in der Datenbank.pptx new file mode 100644 index 0000000..f738af0 Binary files /dev/null and b/Vektoren in der Datenbank.pptx differ diff --git a/make_presentation.py b/make_presentation.py new file mode 100644 index 0000000..fb7cb7d --- /dev/null +++ b/make_presentation.py @@ -0,0 +1,614 @@ +""" +Generates "Vektoren in der Datenbank.pptx" — a LibreOffice-compatible presentation. +Run from the project root: python3 make_presentation.py +""" + +from pptx import Presentation +from pptx.util import Inches, Pt, Emu +from pptx.dml.color import RGBColor +from pptx.enum.text import PP_ALIGN +from pptx.oxml.ns import qn +from pptx.oxml import parse_xml +from lxml import etree + +_A_NS = "http://schemas.openxmlformats.org/drawingml/2006/main" + +def OxmlElement(tag): + local = tag.split(":")[1] + return etree.fromstring(f'') + +import copy + +# ── Colour palette (dark theme) ────────────────────────────────────────────── +BG = RGBColor(0x1e, 0x1e, 0x2e) # slide background +TITLE_CLR = RGBColor(0xcb, 0xd3, 0xff) # slide titles +BODY_CLR = RGBColor(0xcd, 0xd6, 0xf4) # body text +DIM_CLR = RGBColor(0x6c, 0x70, 0x86) # dimmed / captions +ACCENT_PG = RGBColor(0x89, 0xb4, 0xfa) # pgvector blue +ACCENT_ORA = RGBColor(0xf3, 0x8b, 0xa8) # Oracle red/pink +ACCENT_IDB = RGBColor(0xcb, 0xa6, 0xf7) # in-DB purple +ACCENT_GRN = RGBColor(0xa6, 0xe3, 0xa1) # green for highlights +CODE_BG = RGBColor(0x31, 0x32, 0x44) # code block background +CODE_CLR = RGBColor(0xa6, 0xe3, 0xa1) # code text + +W = Inches(13.33) # widescreen 16:9 +H = Inches(7.5) + +FONT = "Roboto" + +prs = Presentation() +prs.slide_width = W +prs.slide_height = H + +blank_layout = prs.slide_layouts[6] # completely blank + +LOGO_PATH = "/home/dierk/Bilder/Logo/Logo DLC Final.png" +CONFERENCE = "Quest Data Minds Konferenz" +EVENT_DATE = "28. Mai 2026" +EVENT_CITY = "Köln" + +_slide_num = [0] # mutable counter so nested calls can increment it + + +def add_slide(logo=True, footer=True): + slide = prs.slides.add_slide(blank_layout) + bg = slide.background + fill = bg.fill + fill.solid() + fill.fore_color.rgb = BG + if logo: + slide.shapes.add_picture(LOGO_PATH, + Inches(11.6), Inches(7.0), Inches(1.6), Inches(0.42)) + if footer: + _slide_num[0] += 1 + # thin separator line + sep = slide.shapes.add_shape(1, Inches(0.3), Inches(6.95), Inches(11.1), Pt(1)) + sep.fill.solid() + sep.fill.fore_color.rgb = DIM_CLR + sep.line.fill.background() + # left: conference info + txb(slide, f"{CONFERENCE} · {EVENT_CITY}, {EVENT_DATE}", + Inches(0.3), Inches(7.02), Inches(9.5), Inches(0.35), + size=11, color=DIM_CLR) + # right: page number (before logo) + txb(slide, str(_slide_num[0]), + Inches(10.9), Inches(7.02), Inches(0.6), Inches(0.35), + size=11, color=DIM_CLR, align=PP_ALIGN.RIGHT) + return slide + + +def txb(slide, text, x, y, w, h, + size=24, bold=False, color=BODY_CLR, + align=PP_ALIGN.LEFT, italic=False): + box = slide.shapes.add_textbox(x, y, w, h) + tf = box.text_frame + tf.word_wrap = True + p = tf.paragraphs[0] + p.alignment = align + run = p.add_run() + run.text = text + run.font.size = Pt(size) + run.font.bold = bold + run.font.italic = italic + run.font.color.rgb = color + run.font.name = FONT + return box + + +def title_slide_layout(slide, title, subtitle=None): + txb(slide, title, + Inches(1), Inches(2.8), Inches(11.33), Inches(1.2), + size=48, bold=True, color=TITLE_CLR, align=PP_ALIGN.CENTER) + if subtitle: + txb(slide, subtitle, + Inches(1), Inches(4.1), Inches(11.33), Inches(0.8), + size=24, color=DIM_CLR, align=PP_ALIGN.CENTER) + + +def section_header(slide, title, accent=ACCENT_PG): + """Full-width coloured bar at top, then title.""" + bar = slide.shapes.add_shape( + 1, # MSO_SHAPE_TYPE.RECTANGLE + Inches(0), Inches(0), W, Inches(0.12) + ) + bar.fill.solid() + bar.fill.fore_color.rgb = accent + bar.line.fill.background() + + txb(slide, title, + Inches(0.5), Inches(0.2), Inches(12.33), Inches(0.8), + size=32, bold=True, color=TITLE_CLR) + + +def bullet_box(slide, items, x, y, w, h, size=20, color=BODY_CLR, indent=False): + box = slide.shapes.add_textbox(x, y, w, h) + tf = box.text_frame + tf.word_wrap = True + first = True + for item in items: + if first: + p = tf.paragraphs[0] + first = False + else: + p = tf.add_paragraph() + p.space_before = Pt(4) + run = p.add_run() + run.text = (" " if indent else "") + item + run.font.size = Pt(size) + run.font.color.rgb = color + run.font.name = FONT + + +def code_box(slide, code, x, y, w, h, size=13): + # Background rectangle (no text) + bg = slide.shapes.add_shape(1, x, y, w, h) + bg.fill.solid() + bg.fill.fore_color.rgb = CODE_BG + bg.line.color.rgb = RGBColor(0x58, 0x5b, 0x70) + bg.text_frame.text = "" + + # Text box on top — textboxes have predictable left-aligned defaults + pad = Pt(7) + tb = slide.shapes.add_textbox(x + pad, y + pad, w - pad * 2, h - pad * 2) + tf = tb.text_frame + tf.word_wrap = False + tf.margin_left = Pt(0) + tf.margin_right = Pt(0) + tf.margin_top = Pt(0) + tf.margin_bottom = Pt(0) + + first = True + for line in code.strip().split("\n"): + if first: + p = tf.paragraphs[0] + first = False + else: + p = tf.add_paragraph() + p.alignment = PP_ALIGN.LEFT + p.space_before = Pt(0) + p.space_after = Pt(0) + # Explicitly zero out left margin, hanging indent, and remove any bullet + pPr = p._p.get_or_add_pPr() + pPr.set("marL", "0") + pPr.set("indent", "0") + for tag in ("a:buClr","a:buClrTx","a:buFont","a:buFontTx","a:buChar","a:buAutoNum","a:buNone"): + for el in pPr.findall(qn(tag)): + pPr.remove(el) + pPr.append(OxmlElement("a:buNone")) + run = p.add_run() + run.text = line + run.font.size = Pt(size) + run.font.color.rgb = CODE_CLR + run.font.name = "Courier New" + + +def divider(slide, y, color=DIM_CLR): + line = slide.shapes.add_shape(1, Inches(0.5), y, Inches(12.33), Pt(1)) + line.fill.solid() + line.fill.fore_color.rgb = color + line.line.fill.background() + + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 1 — Titelfolie +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide(logo=False, footer=False) # title slide: custom layout +title_slide_layout(s, + "Vektoren in der Datenbank", + "Semantische Bildsuche mit PostgreSQL/pgvector und Oracle 26ai") +# Conference details +txb(s, CONFERENCE, + Inches(1), Inches(5.0), Inches(11.33), Inches(0.5), + size=20, bold=True, color=ACCENT_PG, align=PP_ALIGN.CENTER) +txb(s, f"{EVENT_DATE} · {EVENT_CITY}", + Inches(1), Inches(5.5), Inches(11.33), Inches(0.45), + size=18, color=DIM_CLR, align=PP_ALIGN.CENTER) +# Larger centred logo +s.shapes.add_picture(LOGO_PATH, Inches(4.67), Inches(6.1), Inches(4.0), Inches(1.06)) + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 2 — Agenda +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "Agenda", ACCENT_PG) +bullet_box(s, [ + "01 Was ist ein Vektor?", + "02 Semantische Suche — jenseits von Schlüsselwörtern", + "03 Das CLIP-Modell", + "04 Ähnlichkeit messen: Cosinus-Distanz", + "05 PostgreSQL + pgvector", + "06 Oracle 26ai — nativer Vektor-Support", + "07 Oracle 26ai — Embedding in der Datenbank", + "08 Architektur der Demo", + "09 Demo", + "10 Vergleich & Fazit", +], Inches(1.5), Inches(1.3), Inches(10), Inches(5.5), size=20) + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 3 — Was ist ein Vektor? +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "Was ist ein Vektor?", ACCENT_PG) +bullet_box(s, [ + "▸ Ein Vektor ist eine geordnete Liste von Zahlen: [0.12, -0.87, 0.44, …]", + "▸ Jede Zahl beschreibt eine Dimension im semantischen Raum", + "▸ Moderne KI-Modelle erzeugen Vektoren mit 512 bis 1536 Dimensionen", + "▸ Ähnliche Inhalte → ähnliche Vektoren → kleiner Abstand im Raum", + "▸ Texte, Bilder, Audio — alles lässt sich in denselben Vektorraum einbetten", +], Inches(0.8), Inches(1.3), Inches(7.5), Inches(4), size=20) + +code_box(s, '# 4-dimensionaler Beispielvektor\nvec_hund = [0.91, 0.12, -0.44, 0.72]\nvec_katze = [0.87, 0.18, -0.39, 0.68]\n# ähnlich! Abstand ≈ 0.04\nvec_auto = [-0.3, -0.82, 0.91, -0.11]\n# weit entfernt', + Inches(8.8), Inches(1.5), Inches(4.3), Inches(2.6), size=12) + +txb(s, "Vektoren machen Ähnlichkeit berechenbar.", + Inches(0.8), Inches(5.8), Inches(11), Inches(0.7), + size=22, bold=True, color=ACCENT_GRN) + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 4 — Semantische Suche +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "Semantische Suche — jenseits von Schlüsselwörtern", ACCENT_PG) +bullet_box(s, [ + "Klassische Suche: \"trees\" findet nur Dokumente mit dem Wort \"trees\"", + "", + "Semantische Suche: \"trees\" findet Bilder von Wäldern, Parks, Natur —", + " ohne dass das Wort irgendwo steht", +], Inches(0.8), Inches(1.3), Inches(11.5), Inches(2.2), size=20) + +divider(s, Inches(3.7)) + +bullet_box(s, [ + "▸ Text-Anfrage wird in denselben Vektorraum eingebettet wie die Bilder", + "▸ Datenbankabfrage: finde die k nächsten Nachbarn (k-NN)", + "▸ Ergebnis: Bilder nach semantischer Ähnlichkeit gerankt", + "▸ Kein manuelles Tagging, keine Metadaten nötig", +], Inches(0.8), Inches(3.9), Inches(11.5), Inches(2.8), size=20) + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 5 — CLIP-Modell +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "Das CLIP-Modell (OpenAI)", ACCENT_IDB) +bullet_box(s, [ + "CLIP = Contrastive Language–Image Pretraining", + "▸ Trainiert auf hunderten Millionen Bild-Text-Paaren", + "▸ Bildet sowohl Bilder als auch Text in denselben 512-dimensionalen Raum ab", + "▸ Modell: clip-ViT-B-32 (Vision Transformer, Patch-Größe 32×32)", + "▸ Quell-Gewichte: Hugging Face Hub (sentence-transformers/clip-ViT-B-32)", +], Inches(0.8), Inches(1.3), Inches(7.5), Inches(3.2), size=20) + +code_box(s, + 'from sentence_transformers import (\n SentenceTransformer)\n\nmodel = SentenceTransformer(\n "clip-ViT-B-32")\n\n# Bild einbetten\nvec = model.encode(image)\n# → 512 floats\n\n# Text einbetten\nvec = model.encode("Bäume")\n# → 512 floats, gleicher Raum!', + Inches(8.8), Inches(1.3), Inches(4.3), Inches(3.8), size=11) + +txb(s, "Bild-Vektor und Text-Vektor zeigen in dieselbe Richtung,\nwenn Bild und Text inhaltlich übereinstimmen.", + Inches(0.8), Inches(5.0), Inches(11.5), Inches(1.0), + size=18, italic=True, color=ACCENT_IDB) + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 6 — Cosinus-Distanz +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "Ähnlichkeit messen: Cosinus-Distanz", ACCENT_PG) +bullet_box(s, [ + "▸ CLIP-Vektoren haben unterschiedliche Beträge — daher kein euklidischer Abstand", + "▸ Cosinus-Distanz misst nur den Winkel zwischen zwei Vektoren", + "▸ Cosinus-Distanz = 0 → identisch", + "▸ Cosinus-Distanz = 1 → völlig unähnlich", + "▸ Ähnlichkeitswert = 1 − Distanz → 1.0 = perfekte Übereinstimmung", +], Inches(0.8), Inches(1.3), Inches(8.5), Inches(3.5), size=20) + +code_box(s, + "-- PostgreSQL\n1 - (embedding <=> query_vec)\n\n-- Oracle 26ai\n1 - VECTOR_DISTANCE(embedding, query_vec, COSINE)", + Inches(0.8), Inches(5.0), Inches(6.0), Inches(1.9), size=13) + +txb(s, "In der Demo:\nScore 28 % = schwache Übereinstimmung\nScore 75 % = starke Übereinstimmung", + Inches(7.5), Inches(5.0), Inches(5.0), Inches(2.0), + size=18, color=ACCENT_GRN) + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 7 — PostgreSQL + pgvector: Voraussetzungen +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "PostgreSQL + pgvector", ACCENT_PG) + +txb(s, "Was wird benötigt?", Inches(0.8), Inches(1.3), Inches(11), Inches(0.5), + size=22, bold=True, color=ACCENT_PG) +bullet_box(s, [ + "▸ PostgreSQL (ab Version 13)", + "▸ pgvector-Extension — docker image: pgvector/pgvector:pg18", + "▸ Extension aktivieren: CREATE EXTENSION vector;", + "▸ Python-Paket: psycopg2-binary", + "▸ KI-Bibliothek: sentence-transformers (auf dem Anwendungsserver)", +], Inches(0.8), Inches(1.9), Inches(11.5), Inches(2.5), size=20) + +divider(s, Inches(4.6)) + +txb(s, "Schema & Index", Inches(0.8), Inches(4.5), Inches(11), Inches(0.5), + size=22, bold=True, color=ACCENT_PG) +code_box(s, + "CREATE TABLE images (\n id SERIAL PRIMARY KEY,\n filename TEXT NOT NULL UNIQUE,\n embedding vector(512) -- pgvector-Typ\n);\n\nCREATE INDEX ON images USING hnsw (embedding vector_cosine_ops);", + Inches(0.8), Inches(5.0), Inches(7.5), Inches(1.85), size=13) + +bullet_box(s, [ + "HNSW = Hierarchical Navigable Small World", + "Approximativer k-NN Index", + "Sehr schnell bei der Suche", +], Inches(8.8), Inches(5.0), Inches(4.3), Inches(1.85), size=18, color=DIM_CLR) + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 8 — PostgreSQL: Suchanfrage +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "PostgreSQL: Suchanfrage", ACCENT_PG) + +bullet_box(s, [ + "1. Text-Anfrage mit CLIP in Python in einen Vektor umwandeln", + "2. Vektor an die SQL-Abfrage übergeben", + "3. PostgreSQL findet die ähnlichsten Bilder via HNSW-Index", +], Inches(0.8), Inches(1.3), Inches(11.5), Inches(1.5), size=20) + +code_box(s, + "# Python\nvec = model.encode(\"Bäume\") # → 512 floats\n\n# SQL\nSELECT filename,\n 1 - (embedding <=> %s::vector) AS score\nFROM images\nORDER BY embedding <=> %s::vector\nLIMIT 12;", + Inches(0.8), Inches(3.0), Inches(7.5), Inches(3.5), size=16) + +bullet_box(s, [ + "<=> Cosinus-Distanz-Operator", + "(pgvector-spezifisch)", + "", + "$1::vector expliziter Cast", + "erforderlich", + "", + "LIMIT statt FETCH FIRST", +], Inches(9.0), Inches(3.0), Inches(4.0), Inches(3.5), size=18, color=DIM_CLR) + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 9 — Oracle 26ai: Nativer Support +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "Oracle 26ai — nativer Vektor-Support", ACCENT_ORA) + +txb(s, "Was wird benötigt?", Inches(0.8), Inches(1.3), Inches(11), Inches(0.5), + size=22, bold=True, color=ACCENT_ORA) +bullet_box(s, [ + "▸ Oracle AI Database 26ai Free (oder Enterprise)", + "▸ Keine Extension nötig — Vektoren sind eingebaut", + "▸ Vector Memory Area im SGA konfigurieren (für HNSW-Index)", + "▸ Python-Paket: oracledb (Thin Mode — kein Oracle Client nötig)", + "▸ KI-Bibliothek: sentence-transformers (auf dem Anwendungsserver)", +], Inches(0.8), Inches(1.9), Inches(11.5), Inches(2.2), size=20) + +divider(s, Inches(4.2)) + +txb(s, "Schema & Index", Inches(0.8), Inches(4.3), Inches(11), Inches(0.45), + size=20, bold=True, color=ACCENT_ORA) +code_box(s, + "CREATE TABLE images (\n id NUMBER GENERATED ALWAYS AS IDENTITY PRIMARY KEY,\n filename VARCHAR2(255) NOT NULL UNIQUE,\n embedding VECTOR(512, FLOAT32) -- Typ + Dimension\n);\nCREATE VECTOR INDEX images_idx ON images(embedding)\n ORGANIZATION INMEMORY NEIGHBOR GRAPH\n WITH DISTANCE COSINE WITH TARGET ACCURACY 95;", + Inches(0.8), Inches(4.8), Inches(8.5), Inches(2.0), size=11) + +bullet_box(s, [ + "HNSW im SGA", + "(Vector Memory Area)", + "512 MB konfiguriert", +], Inches(9.8), Inches(4.8), Inches(3.3), Inches(2.0), size=17, color=DIM_CLR) + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 10 — Oracle: Unterschiede zu pgvector +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "Oracle vs. pgvector — Schema-Unterschiede", ACCENT_ORA) + +rows = [ + ("Extension", "CREATE EXTENSION vector", "Eingebaut, keine Extension"), + ("Vektor-Spalte", "vector(512) — nur Dimension", "VECTOR(512, FLOAT32) — Dim + Typ"), + ("Primary Key", "SERIAL", "NUMBER GENERATED ALWAYS AS IDENTITY"), + ("Text-Spalte", "TEXT (unbegrenzt)", "VARCHAR2(n) — Länge erforderlich"), + ("HNSW-Syntax", "USING hnsw (...ops)", "ORGANIZATION INMEMORY NEIGHBOR GRAPH"), + ("Genauigkeit", "Implizit via Index-Parameter", "WITH TARGET ACCURACY 95 (explizit)"), + ("Speicher", "Kein Sonder-Speicher nötig", "vector_memory_size im SGA"), + ("Abstand-Op", "<=> (Operator)", "VECTOR_DISTANCE(col, vec, COSINE)"), + ("Top-N", "LIMIT n", "FETCH FIRST n ROWS ONLY"), +] + +# Column header row +y = Inches(1.3) +hdr_bg = s.shapes.add_shape(1, Inches(0.3), y, Inches(12.7), Inches(0.55)) +hdr_bg.fill.solid() +hdr_bg.fill.fore_color.rgb = RGBColor(0x18, 0x18, 0x28) +hdr_bg.line.fill.background() +txb(s, "Aspekt", Inches(0.4), y + Pt(6), Inches(2.2), Inches(0.5), size=14, bold=True, color=BODY_CLR) +txb(s, "PostgreSQL + pgvector",Inches(2.7), y + Pt(6), Inches(4.8), Inches(0.5), size=14, bold=True, color=ACCENT_PG) +txb(s, "Oracle 26ai", Inches(7.6), y + Pt(6), Inches(5.4), Inches(0.5), size=14, bold=True, color=ACCENT_ORA) +y += Inches(0.56) + +for i, (aspect, pg, ora) in enumerate(rows): + bg_color = RGBColor(0x28, 0x29, 0x3d) if i % 2 == 0 else RGBColor(0x24, 0x25, 0x38) + row_bg = s.shapes.add_shape(1, Inches(0.3), y, Inches(12.7), Inches(0.52)) + row_bg.fill.solid() + row_bg.fill.fore_color.rgb = bg_color + row_bg.line.fill.background() + + txb(s, aspect, Inches(0.4), y + Pt(5), Inches(2.2), Inches(0.48), size=13, bold=True, color=DIM_CLR) + txb(s, pg, Inches(2.7), y + Pt(5), Inches(4.8), Inches(0.48), size=13, color=ACCENT_PG) + txb(s, ora, Inches(7.6), y + Pt(5), Inches(5.4), Inches(0.48), size=13, color=ACCENT_ORA) + y += Inches(0.53) + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 11 — Oracle In-Database Embedding +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "Oracle 26ai — Embedding in der Datenbank", ACCENT_IDB) + +bullet_box(s, [ + "▸ Oracle kann ONNX-Modelle direkt in die Datenbank laden", + "▸ VECTOR_EMBEDDING() ruft das Modell innerhalb einer SQL-Abfrage auf", + "▸ Kein Python, keine KI-Bibliothek auf dem Anwendungsserver zur Laufzeit", + "▸ Der Text-String ist der einzige Parameter aus Python", +], Inches(0.8), Inches(1.3), Inches(11.5), Inches(2.2), size=20) + +code_box(s, + "-- Gesamte Logik in einem SQL-Statement\nSELECT filename,\n 1 - VECTOR_DISTANCE(\n foto_vek,\n VECTOR_EMBEDDING(CLIP_TXT USING :q AS data),\n COSINE\n ) AS score\nFROM VECTOR.FOTO_VEKTOR\nORDER BY VECTOR_DISTANCE(\n foto_vek,\n VECTOR_EMBEDDING(CLIP_TXT USING :q AS data), COSINE)\nFETCH FIRST 12 ROWS ONLY;", + Inches(0.8), Inches(3.6), Inches(7.5), Inches(3.3), size=13) + +bullet_box(s, [ + ":q = reiner Text aus Python", + "", + "Oracle übernimmt:", + " • Tokenisierung", + " • ONNX-Inferenz", + " • Vektorsuche", + "", + "→ Architektur vereinfacht sich", +], Inches(9.0), Inches(3.6), Inches(4.0), Inches(3.4), size=18, color=DIM_CLR) + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 12 — ONNX in Oracle: Besonderheit +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "ONNX in Oracle: Was zu beachten ist", ACCENT_IDB) + +bullet_box(s, [ + "Oracle's ONNX-Validator stellt strenge Anforderungen an das Modell-Graph:", + "", + "▸ input_ids darf nur in einem einzigen Gather-Knoten verwendet werden", + "▸ Standard-CLIP-Export verwendet input_ids auch in ArgMax → wird abgelehnt", + "", + "Lösung: CLIP_TXT mit CLS-Token-Pooling (Position 0) statt EOS-Token-Pooling", + "▸ Einfacherer ONNX-Graph, den Oracle akzeptiert", + "▸ Cosinus-Ähnlichkeit zwischen EOS- und CLS-Variante: ~0,70", + "▸ Modell muss beim Export entsprechend angepasst werden", +], Inches(0.8), Inches(1.3), Inches(11.5), Inches(3.8), size=19) + +code_box(s, + "-- Modell laden (einmalig durch Administrator)\nEXEC DBMS_VECTOR.LOAD_ONNX_MODEL(\n 'VEC_DUMP', 'clip_txt.onnx', 'CLIP_TXT',\n JSON('{\"function\":\"embedding\",\"embeddingOutput\":\"output\",\n \"input\":{\"input\":[\"DATA\"]}}'));", + Inches(0.8), Inches(5.2), Inches(11.5), Inches(1.6), size=13) + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 13 — Architektur der Demo +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "Architektur der Demo", ACCENT_GRN) + +# Three columns +for i, (label, port, color) in enumerate([ + ("pgvector", "Port 8000", ACCENT_PG), + ("Oracle 26ai\n(Python)", "Port 8001", ACCENT_ORA), + ("Oracle 26ai\n(In-DB)", "Port 8002", ACCENT_IDB), +]): + x = Inches(0.5 + i * 4.27) + # Box + box = s.shapes.add_shape(1, x, Inches(1.3), Inches(3.8), Inches(4.8)) + box.fill.solid() + box.fill.fore_color.rgb = RGBColor(0x28, 0x29, 0x3d) + box.line.color.rgb = color + + txb(s, label, x + Inches(0.1), Inches(1.4), Inches(3.6), Inches(0.8), + size=22, bold=True, color=color, align=PP_ALIGN.CENTER) + txb(s, port, x + Inches(0.1), Inches(2.1), Inches(3.6), Inches(0.4), + size=16, color=DIM_CLR, align=PP_ALIGN.CENTER) + + items = { + "pgvector": ["Browser /ui/", "FastAPI", "CLIP (Python)", "PostgreSQL 18", "pgvector 0.8.2"], + "Oracle 26ai\n(Python)": ["Browser /ui/", "FastAPI", "CLIP (Python)", "Oracle 26ai", "HNSW (SGA)"], + "Oracle 26ai\n(In-DB)": ["Browser /ui/", "FastAPI", "(kein CLIP)", "Oracle 26ai", "VECTOR_EMBEDDING()"], + }[label] + + for j, item in enumerate(items): + txb(s, "▸ " + item, x + Inches(0.2), Inches(2.65 + j * 0.52), Inches(3.5), Inches(0.48), + size=16, color=BODY_CLR) + +txb(s, "116 Street Fotos · CLIP ViT-B/32 · 512-dimensionale Vektoren", + Inches(0.5), Inches(6.6), Inches(12.33), Inches(0.3), + size=16, color=DIM_CLR, align=PP_ALIGN.CENTER) + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 14 — Demo-Hinweis +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "Demo", ACCENT_GRN) + +for url, label, color, y in [ + ("http://localhost:8000/ui/", "pgvector (blau)", ACCENT_PG, Inches(2.2)), + ("http://localhost:8001/ui/", "Oracle 26ai (rot)", ACCENT_ORA, Inches(3.5)), + ("http://localhost:8002/ui/", "Oracle In-DB (lila)",ACCENT_IDB, Inches(4.8)), +]: + txb(s, url, Inches(1.5), y, Inches(6), Inches(0.5), size=22, bold=True, color=color) + txb(s, label, Inches(7.8), y + Inches(0.05), Inches(4.5), Inches(0.5), size=20, color=DIM_CLR) + +txb(s, "Suchbegriffe zum Ausprobieren:", + Inches(1.5), Inches(5.9), Inches(10), Inches(0.5), size=18, color=BODY_CLR) +txb(s, "Bäume · Wasser · Menschen · Gebäude · Himmel · Nacht · Autos", + Inches(1.5), Inches(6.3), Inches(10), Inches(0.6), size=20, bold=True, color=ACCENT_GRN) + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 15 — Vergleich +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "Vergleich", ACCENT_PG) + +rows = [ + ("Merkmal", "PostgreSQL + pgvector", "Oracle 26ai (Python)", "Oracle 26ai (In-DB)"), + ("Fotos indiziert", "116", "116", "116"), + ("Indizierungszeit", "~26 Sek. (CPU)", "~16 Sek. (CPU)", "— (separat)"), + ("Index-Typ", "HNSW (auf Disk)", "HNSW (im Speicher)", "Full Table Scan"), + ("RAM-Bedarf", "Keiner", "512 MB SGA", "512 MB SGA"), + ("CLIP zur Laufzeit", "Ja (Python)", "Ja (Python)", "Nein"), + ("Embedding-Ort", "Python-Prozess", "Python-Prozess", "In der Datenbank"), + ("VECTOR_EMBEDDING()", "—", "—", "Ja"), + ("Extension nötig", "CREATE EXTENSION vector", "Nein", "Nein"), +] + +y = Inches(1.3) +header = True +for row in rows: + bg_color = RGBColor(0x18, 0x18, 0x28) if header else (RGBColor(0x28, 0x29, 0x3d) if rows.index(row) % 2 == 0 else RGBColor(0x24, 0x25, 0x38)) + row_bg = s.shapes.add_shape(1, Inches(0.3), y, Inches(12.7), Inches(0.52)) + row_bg.fill.solid() + row_bg.fill.fore_color.rgb = bg_color + row_bg.line.fill.background() + + colors = [DIM_CLR, ACCENT_PG, ACCENT_ORA, ACCENT_IDB] if header else [BODY_CLR, ACCENT_PG, ACCENT_ORA, ACCENT_IDB] + widths = [2.5, 3.0, 3.1, 3.1] + xs = [0.4, 2.9, 6.0, 9.15] + for j, (cell, col, w, x) in enumerate(zip(row, colors, widths, xs)): + txb(s, cell, Inches(x), y + Pt(4), Inches(w), Inches(0.48), + size=13, bold=header, color=col) + y += Inches(0.53) + header = False + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 16 — Fazit +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "Fazit", ACCENT_GRN) + +bullet_box(s, [ + "▸ Beide Datenbanken unterstützen Vektorsuche produktionsreif", + "▸ pgvector: einfach, leichtgewichtig, kein zusätzlicher Speicher nötig", + "▸ Oracle 26ai: vollständig integriert, kein Extension-Management", + "▸ Oracle In-DB Embedding: Architektur ohne ML-Laufzeit im App-Server", + "▸ CLIP ermöglicht Bildersuche per Freitext — ohne Tagging oder Metadaten", + "▸ HNSW liefert schnelle approximative k-NN-Suche in beiden Datenbanken", +], Inches(0.8), Inches(1.3), Inches(11.5), Inches(3.5), size=21) + +divider(s, Inches(5.1)) + +txb(s, "Quellcode & Dokumentation", + Inches(0.8), Inches(5.2), Inches(11), Inches(0.5), + size=20, bold=True, color=BODY_CLR) +txb(s, "https://gitea.dl-cons.de/dierk/vector-search-demo", + Inches(0.8), Inches(5.7), Inches(11), Inches(0.5), + size=20, color=ACCENT_PG) + +txb(s, "Programmierung und Folien unterstützt durch Claude (Anthropic)", + Inches(0.8), Inches(6.55), Inches(11.33), Inches(0.35), + size=13, italic=True, color=DIM_CLR, align=PP_ALIGN.CENTER) + + +# ════════════════════════════════════════════════════════════════════════════ +# Save +# ════════════════════════════════════════════════════════════════════════════ +OUT = "Vektoren in der Datenbank.pptx" +prs.save(OUT) +print(f"Saved: {OUT} ({prs.slides.__len__()} slides)") diff --git a/oravector-demo/frontend/index_indb.html b/oravector-demo/frontend/index_indb.html deleted file mode 100644 index f18e2ea..0000000 --- a/oravector-demo/frontend/index_indb.html +++ /dev/null @@ -1,179 +0,0 @@ - - - - - - Vector Image Search — Oracle In-DB - - - -
-

Vector Image Search

- Oracle In-DB -
- -
-
- - -
-
- trees - water - people - buildings - sky - street - night - cars -
-
- -

-

Enter a search term above.

- - - -