""" Generates "Vektoren in der Datenbank.pptx" — a LibreOffice-compatible presentation. Run from the project root: python3 make_presentation.py """ from pptx import Presentation from pptx.util import Inches, Pt, Emu from pptx.dml.color import RGBColor from pptx.enum.text import PP_ALIGN from pptx.oxml.ns import qn from pptx.oxml import parse_xml from lxml import etree import os import numpy as np import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import matplotlib.patches as mpatches _A_NS = "http://schemas.openxmlformats.org/drawingml/2006/main" def OxmlElement(tag): local = tag.split(":")[1] return etree.fromstring(f'') # ── Diagram generation (matplotlib → PNG → embedded in slide) ──────────────── DIAG_BG = "#1e1e2e" DIAG_GRID = "#313244" DIAG_AXIS = "#6c7086" def _fig(w, h): fig, ax = plt.subplots(figsize=(w, h)) fig.patch.set_facecolor(DIAG_BG) ax.set_facecolor(DIAG_BG) return fig, ax def _save(fig, name): path = os.path.join("diagrams", name) fig.savefig(path, dpi=150, bbox_inches="tight", facecolor=DIAG_BG) plt.close(fig) return path def diagram_s3_vectors(): """Slide 3: 2-D vector space with Hund / Katze / Auto.""" fig, ax = _fig(5, 5) ax.set_xlim(-1.3, 1.3) ax.set_ylim(-1.3, 1.3) ax.set_aspect("equal") ax.grid(True, color=DIAG_GRID, linewidth=0.5, alpha=0.6) ax.axhline(0, color=DIAG_AXIS, linewidth=1) ax.axvline(0, color=DIAG_AXIS, linewidth=1) ax.set_xticks([]); ax.set_yticks([]) for sp in ax.spines.values(): sp.set_visible(False) ax.text(1.27, 0.05, "x₁", color=DIAG_AXIS, fontsize=12) ax.text( 0.05, 1.27, "x₂", color=DIAG_AXIS, fontsize=12) vecs = [ ((0.91, 0.12), "#89b4fa", "Hund"), ((0.87, 0.18), "#74c7ec", "Katze"), ((-0.30, 0.90), "#f38ba8", "Auto"), ] for (vx, vy), color, label in vecs: ax.annotate("", xy=(vx, vy), xytext=(0, 0), arrowprops=dict(arrowstyle="->", color=color, lw=2.5)) ox, oy = 0.10, 0.07 ax.text(vx + ox * np.sign(vx or 1), vy + oy * np.sign(vy or 1), label, color=color, fontsize=13, fontweight="bold") # Small arc: Hund ↔ Katze a1 = np.degrees(np.arctan2(0.12, 0.91)) a2 = np.degrees(np.arctan2(0.18, 0.87)) ax.add_patch(mpatches.Arc((0, 0), 0.32, 0.32, angle=0, theta1=min(a1, a2), theta2=max(a1, a2), color="#a6e3a1", lw=2)) ax.text(0.22, -0.10, "klein", color="#a6e3a1", fontsize=10, ha="center") # Large arc: Hund ↔ Auto a3 = np.degrees(np.arctan2(0.90, -0.30)) ax.add_patch(mpatches.Arc((0, 0), 0.52, 0.52, angle=0, theta1=a1, theta2=a3, color="#fab387", lw=2)) ax.text(-0.35, 0.28, "groß", color="#fab387", fontsize=10) plt.tight_layout(pad=0.3) return _save(fig, "s3_vectors.png") def diagram_s4_flow(): """Slide 4: Semantic search pipeline as a flow diagram.""" fig, ax = _fig(12, 1.9) # flat figure — matches slide aspect ratio ax.set_xlim(0, 12); ax.set_ylim(0, 1.9) ax.axis("off") steps = [ (1.2, 'Text-Anfrage\n"Bäume"', "#89b4fa"), (3.6, "CLIP-Modell", "#cba6f7"), (6.0, "Vektor 512 floats", "#74c7ec"), (8.4, "Datenbank k-NN", "#f38ba8"), (10.8, "Ergebnisse\nnach Score", "#a6e3a1"), ] for x, label, color in steps: box = mpatches.FancyBboxPatch((x - 1.05, 0.22), 2.1, 1.4, boxstyle="round,pad=0.1", facecolor="#313244", edgecolor=color, linewidth=2) ax.add_patch(box) ax.text(x, 0.92, label, ha="center", va="center", color=color, fontsize=13, fontweight="bold", multialignment="center", fontfamily="sans-serif") for i in range(len(steps) - 1): x1 = steps[i][0] + 1.05 x2 = steps[i+1][0] - 1.05 ax.annotate("", xy=(x2, 0.92), xytext=(x1, 0.92), arrowprops=dict(arrowstyle="->", color=DIAG_AXIS, lw=2.5)) plt.tight_layout(pad=0.15) return _save(fig, "s4_flow.png") def diagram_s6_cosine(): """Slide 6: Two vectors with the cosine angle between them.""" fig, ax = _fig(5, 4.5) ax.set_xlim(-0.2, 1.35); ax.set_ylim(-0.15, 1.35) ax.set_aspect("equal") ax.axis("off") vA = np.array([1.1, 0.25]) # image vector vB = np.array([0.55, 1.0 ]) # text vector for v, color, label, lpos in [ (vA, "#89b4fa", "Bild-Vektor", (1.17, 0.08)), (vB, "#cba6f7", 'Text-Vektor\n"Bäume"', (0.56, 1.07)), ]: ax.annotate("", xy=v, xytext=(0, 0), arrowprops=dict(arrowstyle="->", color=color, lw=3)) ax.text(*lpos, label, color=color, fontsize=12, fontweight="bold", ha="center", multialignment="center") # Angle arc a1 = np.degrees(np.arctan2(vA[1], vA[0])) a2 = np.degrees(np.arctan2(vB[1], vB[0])) ax.add_patch(mpatches.Arc((0, 0), 0.45, 0.45, angle=0, theta1=a1, theta2=a2, color="#a6e3a1", lw=2.5)) mid_angle = np.radians((a1 + a2) / 2) ax.text(0.28 * np.cos(mid_angle), 0.28 * np.sin(mid_angle), "θ", color="#a6e3a1", fontsize=16, fontweight="bold", ha="center", va="center") # Origin dot ax.plot(0, 0, "o", color=DIAG_AXIS, markersize=6) # Formula ax.text(0.58, -0.12, "Ähnlichkeit = 1 − cos(θ)", color="#cdd6f4", fontsize=11, ha="center", fontfamily="monospace") plt.tight_layout(pad=0.3) return _save(fig, "s6_cosine.png") def diagram_architecture(): """Architecture slide: 3 columns showing app server, database, and where CLIP runs.""" CLIP_CLR = "#a6e3a1" # (x, db_name, color, port, clip_app, clip_db, db_tech, vec_embed_fn, foto_storage) COLS = [ (2.3, "PostgreSQL 18", "#89b4fa", "Port 8000", True, False, "pgvector 0.8.2\nHNSW (Disk)", None, "Fotos: Dateipfad (Filesystem)"), (6.65, "Oracle 26ai\nVECTORS_USER", "#f38ba8", "Port 8001", True, False, "HNSW (SGA)", None, "Fotos: Dateipfad (Filesystem)"), (11.0, "Oracle 26ai\nVECTOR", "#cba6f7", "Port 8002", False, True, "HNSW (SGA)", "VECTOR_EMBEDDING()", "Fotos: BLOB (in Oracle)"), ] BOX_H = 2.2 # all boxes same height DB_Y = 0.15 # database box bottom GAP = 0.60 # space between DB top and app server bottom APP_Y = DB_Y + BOX_H + GAP # = 2.95 fig, ax = _fig(13.5, 6.5) ax.set_xlim(0, 13.5); ax.set_ylim(-0.8, 5.9) ax.axis("off") for x, db_name, color, port, clip_app, clip_db, db_tech, vec_fn, foto_storage in COLS: APP_TOP = APP_Y + BOX_H # = 5.15 DB_TOP = DB_Y + BOX_H # = 2.35 # ── Port label ax.text(x, APP_TOP + 0.28, port, ha="center", color=color, fontsize=13, fontweight="bold") # ── App server box ax.add_patch(mpatches.FancyBboxPatch( (x-1.7, APP_Y), 3.4, BOX_H, boxstyle="round,pad=0.1", facecolor="#28293d", edgecolor=color, lw=2)) ax.text(x, APP_TOP - 0.22, "App-Server (FastAPI)", ha="center", color=color, fontsize=11, fontweight="bold") if clip_app: ax.add_patch(mpatches.FancyBboxPatch( (x-1.2, APP_Y + 0.10), 2.4, 0.75, boxstyle="round,pad=0.08", facecolor="#1e1e2e", edgecolor=CLIP_CLR, lw=2)) ax.text(x, APP_Y + 0.475, "CLIP-Modell\n(sentence-transformers)", ha="center", va="center", color=CLIP_CLR, fontsize=9.5, fontweight="bold", multialignment="center") ax.add_patch(mpatches.FancyBboxPatch( (x-1.2, APP_Y + 0.95), 2.4, 0.42, boxstyle="round,pad=0.06", facecolor="#1e1e2e", edgecolor=DIAG_AXIS, lw=1, linestyle="dashed")) ax.text(x, APP_Y + 1.16, foto_storage, ha="center", va="center", color=DIAG_AXIS, fontsize=9, style="italic") else: ax.add_patch(mpatches.FancyBboxPatch( (x-1.2, APP_Y + 0.10), 2.4, 0.75, boxstyle="round,pad=0.08", facecolor="#1e1e2e", edgecolor=DIAG_AXIS, lw=1, linestyle="dashed")) ax.text(x, APP_Y + 0.475, "kein CLIP", ha="center", va="center", color=DIAG_AXIS, fontsize=10, style="italic") # ── Arrow with comfortable gap ax.annotate("", xy=(x, DB_TOP + 0.05), xytext=(x, APP_Y - 0.05), arrowprops=dict(arrowstyle="->", color=DIAG_AXIS, lw=2)) arrow_lbl = "Vektor (512 floats)" if clip_app else "Text-String" ax.text(x + 0.2, (DB_TOP + APP_Y) / 2, arrow_lbl, ha="left", va="center", color=DIAG_AXIS, fontsize=9, style="italic") # ── Database box ax.add_patch(mpatches.FancyBboxPatch( (x-1.7, DB_Y), 3.4, BOX_H, boxstyle="round,pad=0.1", facecolor="#28293d", edgecolor=color, lw=2)) if clip_db: ax.add_patch(mpatches.FancyBboxPatch( (x-1.2, DB_Y + 0.10), 2.4, 0.72, boxstyle="round,pad=0.08", facecolor="#1e1e2e", edgecolor=CLIP_CLR, lw=2)) ax.text(x, DB_Y + 0.46, "CLIP-Modell\n(ONNX, in Oracle)", ha="center", va="center", color=CLIP_CLR, fontsize=9.5, fontweight="bold", multialignment="center") ax.add_patch(mpatches.FancyBboxPatch( (x-1.2, DB_Y + 0.92), 2.4, 0.40, boxstyle="round,pad=0.06", facecolor="#1e1e2e", edgecolor=DIAG_AXIS, lw=1, linestyle="dashed")) ax.text(x, DB_Y + 1.12, foto_storage, ha="center", va="center", color=DIAG_AXIS, fontsize=9, style="italic") ax.text(x, DB_Y + 1.50, vec_fn, ha="center", color="#fab387", fontsize=10, fontweight="bold", fontfamily="monospace") ax.text(x, DB_Y + 1.72, "Oracle 26ai", ha="center", color=color, fontsize=11, fontweight="bold") ax.text(x, DB_Y + 1.92, "Schema: VECTOR", ha="center", color=color, fontsize=9) ax.text(x, DB_Y + 2.10, db_tech, ha="center", color=DIAG_AXIS, fontsize=9) else: # Split db_name → ["PostgreSQL 18"] or ["Oracle 26ai", "VECTORS_USER"] # Split db_tech → ["pgvector 0.8.2", "HNSW (Disk)"] or ["HNSW (SGA)"] name_parts = db_name.split("\n") tech_parts = db_tech.split("\n") hnsw = tech_parts[-1] # always last tech_extra = tech_parts[:-1] # e.g. ["pgvector 0.8.2"] # HNSW — same height across all three DB boxes ax.text(x, DB_Y + 2.10, hnsw, ha="center", color=DIAG_AXIS, fontsize=9) # Middle line: schema name or version info (matches "Schema: VECTOR" in col 3) if len(name_parts) > 1: mid_label = "Schema: " + name_parts[1] elif tech_extra: mid_label = tech_extra[0] else: mid_label = "" if mid_label: ax.text(x, DB_Y + 1.92, mid_label, ha="center", color=color, fontsize=9) # Main DB name (matches "Oracle 26ai" in col 3) ax.text(x, DB_Y + 1.72, name_parts[0], ha="center", color=color, fontsize=11, fontweight="bold") # ── Vertical separators for xsep in [4.5, 8.85]: ax.plot([xsep, xsep], [0.05, 5.55], color=DIAG_GRID, lw=1, linestyle="--") # ── Caption — separated from boxes, applies to all three columns ax.plot([0.3, 13.2], [-0.18, -0.18], color=DIAG_GRID, lw=1) ax.text(6.75, -0.5, "116 Street Fotos · CLIP ViT-B/32 · 512-dimensionale Vektoren", ha="center", va="center", color="#cdd6f4", fontsize=13, style="italic") plt.tight_layout(pad=0.2) return _save(fig, "architecture.png") # Generate diagrams up front os.makedirs("diagrams", exist_ok=True) DIAG_S3 = diagram_s3_vectors() DIAG_S4 = diagram_s4_flow() DIAG_S6 = diagram_s6_cosine() DIAG_ARCH = diagram_architecture() import copy # ── Colour palette (dark theme) ────────────────────────────────────────────── BG = RGBColor(0x1e, 0x1e, 0x2e) # slide background TITLE_CLR = RGBColor(0xcb, 0xd3, 0xff) # slide titles BODY_CLR = RGBColor(0xcd, 0xd6, 0xf4) # body text DIM_CLR = RGBColor(0x6c, 0x70, 0x86) # dimmed / captions ACCENT_PG = RGBColor(0x89, 0xb4, 0xfa) # pgvector blue ACCENT_ORA = RGBColor(0xf3, 0x8b, 0xa8) # Oracle red/pink ACCENT_IDB = RGBColor(0xcb, 0xa6, 0xf7) # in-DB purple ACCENT_GRN = RGBColor(0xa6, 0xe3, 0xa1) # green for highlights CODE_BG = RGBColor(0x31, 0x32, 0x44) # code block background CODE_CLR = RGBColor(0xa6, 0xe3, 0xa1) # code text W = Inches(13.33) # widescreen 16:9 H = Inches(7.5) FONT = "Roboto" prs = Presentation() prs.slide_width = W prs.slide_height = H blank_layout = prs.slide_layouts[6] # completely blank LOGO_PATH = "/home/dierk/Bilder/Logo/Logo DLC Final.png" CONFERENCE = "Quest Data Minds Konferenz" EVENT_DATE = "28. Mai 2026" EVENT_CITY = "Köln" _slide_num = [0] # mutable counter so nested calls can increment it def add_slide(logo=True, footer=True): slide = prs.slides.add_slide(blank_layout) bg = slide.background fill = bg.fill fill.solid() fill.fore_color.rgb = BG if logo: slide.shapes.add_picture(LOGO_PATH, Inches(11.6), Inches(7.0), Inches(1.6), Inches(0.42)) if footer: _slide_num[0] += 1 # thin separator line sep = slide.shapes.add_shape(1, Inches(0.3), Inches(6.95), Inches(11.1), Pt(1)) sep.fill.solid() sep.fill.fore_color.rgb = DIM_CLR sep.line.fill.background() # left: conference info txb(slide, f"{CONFERENCE} · {EVENT_CITY}, {EVENT_DATE}", Inches(0.3), Inches(7.02), Inches(9.5), Inches(0.35), size=11, color=DIM_CLR) # right: page number (before logo) txb(slide, str(_slide_num[0]), Inches(10.9), Inches(7.02), Inches(0.6), Inches(0.35), size=11, color=DIM_CLR, align=PP_ALIGN.RIGHT) return slide def txb(slide, text, x, y, w, h, size=24, bold=False, color=BODY_CLR, align=PP_ALIGN.LEFT, italic=False): box = slide.shapes.add_textbox(x, y, w, h) tf = box.text_frame tf.word_wrap = True p = tf.paragraphs[0] p.alignment = align run = p.add_run() run.text = text run.font.size = Pt(size) run.font.bold = bold run.font.italic = italic run.font.color.rgb = color run.font.name = FONT return box def title_slide_layout(slide, title, subtitle=None): txb(slide, title, Inches(1), Inches(2.8), Inches(11.33), Inches(1.2), size=48, bold=True, color=TITLE_CLR, align=PP_ALIGN.CENTER) if subtitle: txb(slide, subtitle, Inches(1), Inches(4.1), Inches(11.33), Inches(0.8), size=24, color=DIM_CLR, align=PP_ALIGN.CENTER) def section_header(slide, title, accent=ACCENT_PG): """Full-width coloured bar at top, then title.""" bar = slide.shapes.add_shape( 1, # MSO_SHAPE_TYPE.RECTANGLE Inches(0), Inches(0), W, Inches(0.12) ) bar.fill.solid() bar.fill.fore_color.rgb = accent bar.line.fill.background() txb(slide, title, Inches(0.5), Inches(0.2), Inches(12.33), Inches(0.8), size=32, bold=True, color=TITLE_CLR) def bullet_box(slide, items, x, y, w, h, size=20, color=BODY_CLR, indent=False): box = slide.shapes.add_textbox(x, y, w, h) tf = box.text_frame tf.word_wrap = True first = True for item in items: if first: p = tf.paragraphs[0] first = False else: p = tf.add_paragraph() p.space_before = Pt(4) run = p.add_run() run.text = (" " if indent else "") + item run.font.size = Pt(size) run.font.color.rgb = color run.font.name = FONT def code_box(slide, code, x, y, w, h, size=13): # Background rectangle (no text) bg = slide.shapes.add_shape(1, x, y, w, h) bg.fill.solid() bg.fill.fore_color.rgb = CODE_BG bg.line.color.rgb = RGBColor(0x58, 0x5b, 0x70) bg.text_frame.text = "" # Text box on top — textboxes have predictable left-aligned defaults pad = Pt(7) tb = slide.shapes.add_textbox(x + pad, y + pad, w - pad * 2, h - pad * 2) tf = tb.text_frame tf.word_wrap = False tf.margin_left = Pt(0) tf.margin_right = Pt(0) tf.margin_top = Pt(0) tf.margin_bottom = Pt(0) first = True for line in code.strip().split("\n"): if first: p = tf.paragraphs[0] first = False else: p = tf.add_paragraph() p.alignment = PP_ALIGN.LEFT p.space_before = Pt(0) p.space_after = Pt(0) # Explicitly zero out left margin, hanging indent, and remove any bullet pPr = p._p.get_or_add_pPr() pPr.set("marL", "0") pPr.set("indent", "0") for tag in ("a:buClr","a:buClrTx","a:buFont","a:buFontTx","a:buChar","a:buAutoNum","a:buNone"): for el in pPr.findall(qn(tag)): pPr.remove(el) pPr.append(OxmlElement("a:buNone")) run = p.add_run() run.text = line run.font.size = Pt(size) run.font.color.rgb = CODE_CLR run.font.name = "Courier New" def divider(slide, y, color=DIM_CLR): line = slide.shapes.add_shape(1, Inches(0.5), y, Inches(12.33), Pt(1)) line.fill.solid() line.fill.fore_color.rgb = color line.line.fill.background() # ════════════════════════════════════════════════════════════════════════════ # Slide 1 — Titelfolie # ════════════════════════════════════════════════════════════════════════════ s = add_slide(logo=False, footer=False) # title slide: custom layout title_slide_layout(s, "Vektoren in der Datenbank", "Der VECTOR-Datentyp in Oracle 26ai und PostgreSQL") # Conference details txb(s, CONFERENCE, Inches(1), Inches(5.0), Inches(11.33), Inches(0.5), size=20, bold=True, color=ACCENT_PG, align=PP_ALIGN.CENTER) txb(s, f"{EVENT_DATE} · {EVENT_CITY}", Inches(1), Inches(5.5), Inches(11.33), Inches(0.45), size=18, color=DIM_CLR, align=PP_ALIGN.CENTER) # Larger centred logo s.shapes.add_picture(LOGO_PATH, Inches(4.67), Inches(6.1), Inches(4.0), Inches(1.06)) # ════════════════════════════════════════════════════════════════════════════ # Slide 2 — Motivation: Der VECTOR-Datentyp # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "Der VECTOR-Datentyp", ACCENT_PG) bullet_box(s, [ "▸ VECTOR ist ein neuer nativer Datentyp in Oracle AI Database 26ai und PostgreSQL (pgvector)", "▸ Ermöglicht das Speichern hochdimensionaler Vektoren direkt in der Datenbank", "▸ Bringt optimierte Suchoperatoren und Indizes für Ähnlichkeitssuche (k-NN) mit", "▸ Macht KI-Embeddings zu einem First-Class-Citizen in relationalen Datenbanken", ], Inches(0.8), Inches(1.3), Inches(11.5), Inches(2.2), size=22) divider(s, Inches(3.7)) txb(s, "Ziel dieses Vortrags", Inches(0.8), Inches(3.85), Inches(11.5), Inches(0.5), size=22, bold=True, color=ACCENT_PG) bullet_box(s, [ "▸ Den VECTOR-Datentyp erklären — was er ist, wie er funktioniert", "▸ Gemeinsamkeiten und Unterschiede zwischen Oracle 26ai und PostgreSQL/pgvector zeigen", "▸ Eine konkrete Demo: semantische Bildsuche mit 116 Street-Fotos", "▸ Drei Ansätze vergleichen: pgvector, Oracle (Python-Embedding), Oracle (In-Database-Embedding)", ], Inches(0.8), Inches(4.4), Inches(11.5), Inches(2.3), size=20) # ════════════════════════════════════════════════════════════════════════════ # Slide 3 — Agenda # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "Agenda", ACCENT_PG) bullet_box(s, [ "01 Was ist ein Vektor?", "02 Semantische Suche — jenseits von Schlüsselwörtern", "03 Das CLIP-Modell", "04 Ähnlichkeit messen: Cosinus-Distanz", "05 PostgreSQL + pgvector", "06 Oracle 26ai — nativer Vektor-Support", "07 Oracle 26ai — Embedding in der Datenbank", "08 Architektur der Demo", "09 Demo", "10 Vergleich & Fazit", ], Inches(1.5), Inches(1.3), Inches(10), Inches(5.5), size=20) # ════════════════════════════════════════════════════════════════════════════ # Slide 3 — Was ist ein Vektor? # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "Was ist ein Vektor?", ACCENT_PG) bullet_box(s, [ "▸ Ein Vektor ist eine geordnete Liste von Zahlen: [0.12, -0.87, 0.44, …]", "▸ Jede Zahl beschreibt eine Dimension im semantischen Raum", "▸ Moderne KI-Modelle erzeugen Vektoren mit 512 bis 1536 Dimensionen", "▸ Ähnliche Inhalte → ähnliche Vektoren → kleiner Abstand im Raum", "▸ Texte, Bilder, Audio — alles lässt sich in denselben Vektorraum einbetten", ], Inches(0.8), Inches(1.3), Inches(7.2), Inches(4), size=20) # 2-D vector diagram on the right s.shapes.add_picture(DIAG_S3, Inches(7.8), Inches(1.1), Inches(5.3), Inches(5.3)) txb(s, "Vektoren machen Ähnlichkeit berechenbar.", Inches(0.3), Inches(5.75), Inches(7.4), Inches(0.8), size=26, bold=True, color=ACCENT_GRN) # ════════════════════════════════════════════════════════════════════════════ # Slide 4 — Semantische Suche # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "Semantische Suche — jenseits von Schlüsselwörtern", ACCENT_PG) bullet_box(s, [ "Klassische Suche: \"trees\" findet nur Dokumente mit dem Wort \"trees\"", "", "Semantische Suche: \"trees\" findet Bilder von Wäldern, Parks, Natur —", " ohne dass das Wort irgendwo steht", ], Inches(0.8), Inches(1.3), Inches(11.5), Inches(2.2), size=20) divider(s, Inches(3.7)) bullet_box(s, [ "▸ Text-Anfrage wird in denselben Vektorraum eingebettet wie die Bilder", "▸ Datenbankabfrage: finde die k nächsten Nachbarn (k-NN)", "▸ Ergebnis: Bilder nach semantischer Ähnlichkeit gerankt", "▸ Kein manuelles Tagging, keine Metadaten nötig", ], Inches(0.8), Inches(3.9), Inches(11.5), Inches(1.1), size=20) # Flow diagram s.shapes.add_picture(DIAG_S4, Inches(0.5), Inches(5.1), Inches(12.3), Inches(1.75)) # ════════════════════════════════════════════════════════════════════════════ # Slide 5 — CLIP-Modell # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "Das CLIP-Modell (OpenAI)", ACCENT_IDB) bullet_box(s, [ "CLIP = Contrastive Language–Image Pretraining", "▸ Trainiert auf hunderten Millionen Bild-Text-Paaren", "▸ Bildet sowohl Bilder als auch Text in denselben 512-dimensionalen Raum ab", "▸ Modell: clip-ViT-B-32 (Vision Transformer, Patch-Größe 32×32)", "▸ Quell-Gewichte: Hugging Face Hub (sentence-transformers/clip-ViT-B-32)", ], Inches(0.8), Inches(1.3), Inches(7.5), Inches(3.2), size=20) code_box(s, 'from sentence_transformers import (\n SentenceTransformer)\n\nmodel = SentenceTransformer(\n "clip-ViT-B-32")\n\n# Bild einbetten\nvec = model.encode(image)\n# → 512 floats\n\n# Text einbetten\nvec = model.encode("Bäume")\n# → 512 floats, gleicher Raum!', Inches(8.8), Inches(1.3), Inches(4.3), Inches(3.8), size=11) txb(s, "Bild-Vektor und Text-Vektor zeigen in dieselbe Richtung,\nwenn Bild und Text inhaltlich übereinstimmen.", Inches(0.8), Inches(5.0), Inches(11.5), Inches(1.0), size=18, italic=True, color=ACCENT_IDB) # ════════════════════════════════════════════════════════════════════════════ # Slide 6 — Cosinus-Distanz # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "Ähnlichkeit messen: Cosinus-Distanz", ACCENT_PG) bullet_box(s, [ "▸ CLIP-Vektoren haben unterschiedliche Beträge — daher kein euklidischer Abstand", "▸ Cosinus-Distanz misst nur den Winkel zwischen zwei Vektoren", "▸ Cosinus-Distanz = 0 → identisch", "▸ Cosinus-Distanz = 1 → völlig unähnlich", "▸ Ähnlichkeitswert = 1 − Distanz → 1.0 = perfekte Übereinstimmung", ], Inches(0.8), Inches(1.3), Inches(7.5), Inches(3.5), size=20) # Cosine diagram on the right s.shapes.add_picture(DIAG_S6, Inches(8.0), Inches(1.1), Inches(5.1), Inches(3.7)) code_box(s, "-- PostgreSQL\n1 - (embedding <=> query_vec)\n\n-- Oracle 26ai\n1 - VECTOR_DISTANCE(embedding, query_vec, COSINE)", Inches(0.8), Inches(5.0), Inches(6.0), Inches(1.85), size=13) txb(s, "In der Demo:\nScore 28 % = schwach\nScore 75 % = stark", Inches(7.0), Inches(5.0), Inches(5.0), Inches(1.85), size=18, color=ACCENT_GRN) # ════════════════════════════════════════════════════════════════════════════ # Slide 7 — PostgreSQL + pgvector: Voraussetzungen # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "PostgreSQL + pgvector", ACCENT_PG) txb(s, "Was wird benötigt?", Inches(0.8), Inches(1.3), Inches(11), Inches(0.5), size=22, bold=True, color=ACCENT_PG) bullet_box(s, [ "▸ PostgreSQL (ab Version 13)", "▸ pgvector-Extension — docker image: pgvector/pgvector:pg18", "▸ Extension aktivieren: CREATE EXTENSION vector;", "▸ Python-Paket: psycopg2-binary", "▸ KI-Bibliothek: sentence-transformers (auf dem Anwendungsserver)", ], Inches(0.8), Inches(1.9), Inches(11.5), Inches(2.5), size=20) divider(s, Inches(4.6)) txb(s, "Schema & Index", Inches(0.8), Inches(4.5), Inches(11), Inches(0.5), size=22, bold=True, color=ACCENT_PG) code_box(s, "CREATE TABLE images (\n id SERIAL PRIMARY KEY,\n filename TEXT NOT NULL UNIQUE,\n embedding vector(512) -- pgvector-Typ\n);\n\nCREATE INDEX ON images USING hnsw (embedding vector_cosine_ops);", Inches(0.8), Inches(5.0), Inches(7.5), Inches(1.85), size=13) bullet_box(s, [ "HNSW = Hierarchical Navigable Small World", "Approximativer k-NN Index", "Sehr schnell bei der Suche", ], Inches(8.8), Inches(5.0), Inches(4.3), Inches(1.85), size=18, color=DIM_CLR) # ════════════════════════════════════════════════════════════════════════════ # Slide 8 — PostgreSQL: Suchanfrage # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "PostgreSQL: Suchanfrage", ACCENT_PG) bullet_box(s, [ "1. Text-Anfrage mit CLIP in Python in einen Vektor umwandeln", "2. Vektor an die SQL-Abfrage übergeben", "3. PostgreSQL findet die ähnlichsten Bilder via HNSW-Index", ], Inches(0.8), Inches(1.3), Inches(11.5), Inches(1.5), size=20) code_box(s, "# Python\nvec = model.encode(\"Bäume\") # → 512 floats\n\n# SQL\nSELECT filename,\n 1 - (embedding <=> %s::vector) AS score\nFROM images\nORDER BY embedding <=> %s::vector\nLIMIT 12;", Inches(0.8), Inches(3.0), Inches(7.5), Inches(3.5), size=16) bullet_box(s, [ "<=> Cosinus-Distanz-Operator", "(pgvector-spezifisch)", "", "$1::vector expliziter Cast", "erforderlich", "", "LIMIT statt FETCH FIRST", ], Inches(9.0), Inches(3.0), Inches(4.0), Inches(3.5), size=18, color=DIM_CLR) # ════════════════════════════════════════════════════════════════════════════ # Slide 9 — Oracle 26ai: Nativer Support # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "Oracle 26ai — nativer Vektor-Support", ACCENT_ORA) txb(s, "Was wird benötigt?", Inches(0.8), Inches(1.3), Inches(11), Inches(0.5), size=22, bold=True, color=ACCENT_ORA) bullet_box(s, [ "▸ Oracle AI Database 26ai Free (oder Enterprise)", "▸ Keine Extension nötig — Vektoren sind eingebaut", "▸ Vector Memory Area im SGA konfigurieren (für HNSW-Index)", "▸ Python-Paket: oracledb (Thin Mode — kein Oracle Client nötig)", "▸ KI-Bibliothek: sentence-transformers (auf dem Anwendungsserver)", ], Inches(0.8), Inches(1.9), Inches(11.5), Inches(2.2), size=20) divider(s, Inches(4.2)) txb(s, "Schema & Index", Inches(0.8), Inches(4.3), Inches(11), Inches(0.45), size=20, bold=True, color=ACCENT_ORA) code_box(s, "CREATE TABLE images (\n id NUMBER GENERATED ALWAYS AS IDENTITY PRIMARY KEY,\n filename VARCHAR2(255) NOT NULL UNIQUE,\n embedding VECTOR(512, FLOAT32) -- Typ + Dimension\n);\nCREATE VECTOR INDEX images_idx ON images(embedding)\n ORGANIZATION INMEMORY NEIGHBOR GRAPH\n WITH DISTANCE COSINE WITH TARGET ACCURACY 95;", Inches(0.8), Inches(4.8), Inches(8.5), Inches(2.0), size=11) bullet_box(s, [ "HNSW im SGA", "(Vector Memory Area)", "512 MB konfiguriert", ], Inches(9.8), Inches(4.8), Inches(3.3), Inches(2.0), size=17, color=DIM_CLR) # ════════════════════════════════════════════════════════════════════════════ # Slide 10 — Oracle: Unterschiede zu pgvector # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "Oracle vs. pgvector — Schema-Unterschiede", ACCENT_ORA) rows = [ ("Extension", "CREATE EXTENSION vector", "Eingebaut, keine Extension"), ("Vektor-Spalte", "vector(512) — nur Dimension", "VECTOR(512, FLOAT32) — Dim + Typ"), ("Primary Key", "SERIAL", "NUMBER GENERATED ALWAYS AS IDENTITY"), ("Text-Spalte", "TEXT (unbegrenzt)", "VARCHAR2(n) — Länge erforderlich"), ("HNSW-Syntax", "USING hnsw (...ops)", "ORGANIZATION INMEMORY NEIGHBOR GRAPH"), ("Genauigkeit", "Implizit via Index-Parameter", "WITH TARGET ACCURACY 95 (explizit)"), ("Speicher", "Kein Sonder-Speicher nötig", "vector_memory_size im SGA"), ("Abstand-Op", "<=> (Operator)", "VECTOR_DISTANCE(col, vec, COSINE)"), ("Top-N", "LIMIT n", "FETCH FIRST n ROWS ONLY"), ] # Column header row y = Inches(1.3) hdr_bg = s.shapes.add_shape(1, Inches(0.3), y, Inches(12.7), Inches(0.55)) hdr_bg.fill.solid() hdr_bg.fill.fore_color.rgb = RGBColor(0x18, 0x18, 0x28) hdr_bg.line.fill.background() txb(s, "Aspekt", Inches(0.4), y + Pt(6), Inches(2.2), Inches(0.5), size=14, bold=True, color=BODY_CLR) txb(s, "PostgreSQL + pgvector",Inches(2.7), y + Pt(6), Inches(4.8), Inches(0.5), size=14, bold=True, color=ACCENT_PG) txb(s, "Oracle 26ai", Inches(7.6), y + Pt(6), Inches(5.4), Inches(0.5), size=14, bold=True, color=ACCENT_ORA) y += Inches(0.56) for i, (aspect, pg, ora) in enumerate(rows): bg_color = RGBColor(0x28, 0x29, 0x3d) if i % 2 == 0 else RGBColor(0x24, 0x25, 0x38) row_bg = s.shapes.add_shape(1, Inches(0.3), y, Inches(12.7), Inches(0.52)) row_bg.fill.solid() row_bg.fill.fore_color.rgb = bg_color row_bg.line.fill.background() txb(s, aspect, Inches(0.4), y + Pt(5), Inches(2.2), Inches(0.48), size=13, bold=True, color=DIM_CLR) txb(s, pg, Inches(2.7), y + Pt(5), Inches(4.8), Inches(0.48), size=13, color=ACCENT_PG) txb(s, ora, Inches(7.6), y + Pt(5), Inches(5.4), Inches(0.48), size=13, color=ACCENT_ORA) y += Inches(0.53) # ════════════════════════════════════════════════════════════════════════════ # Slide 11 — Oracle In-Database Embedding # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "Oracle 26ai — Embedding in der Datenbank", ACCENT_IDB) bullet_box(s, [ "▸ Oracle kann ONNX-Modelle direkt in die Datenbank laden", " (ONNX = Open Neural Network Exchange)", "▸ VECTOR_EMBEDDING() ruft das Modell innerhalb einer SQL-Abfrage auf", "▸ Kein Python, keine KI-Bibliothek auf dem Anwendungsserver zur Laufzeit", "▸ Der Text-String ist der einzige Parameter aus Python", "▸ Schema: VECTOR — Tabelle: FOTO_VEKTOR — Bilder als BLOB gespeichert", "▸ HNSW-Index auf FOTO_VEKTOR (wie in Schema VECTORS_USER)", ], Inches(0.8), Inches(1.3), Inches(11.5), Inches(2.4), size=16) code_box(s, "-- Gesamte Logik in einem SQL-Statement\nSELECT filename,\n 1 - VECTOR_DISTANCE(\n foto_vek,\n VECTOR_EMBEDDING(CLIP_TXT USING :q AS data),\n COSINE\n ) AS score\nFROM VECTOR.FOTO_VEKTOR\nORDER BY VECTOR_DISTANCE(\n foto_vek,\n VECTOR_EMBEDDING(CLIP_TXT USING :q AS data), COSINE)\nFETCH FIRST 12 ROWS ONLY;", Inches(0.8), Inches(3.8), Inches(7.5), Inches(3.0), size=11) bullet_box(s, [ ":q = reiner Text aus Python", "", "Oracle übernimmt:", " • Tokenisierung", " • ONNX-Inferenz", " • Vektorsuche", "", "→ Architektur vereinfacht sich", ], Inches(9.0), Inches(3.8), Inches(4.0), Inches(3.0), size=16, color=DIM_CLR) # ════════════════════════════════════════════════════════════════════════════ # Slide 12 — ONNX in Oracle: Besonderheit # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "ONNX in Oracle: Was zu beachten ist", ACCENT_IDB) bullet_box(s, [ "Oracle's ONNX-Validator stellt strenge Anforderungen an das Modell-Graph:", "", "▸ input_ids darf nur in einem einzigen Gather-Knoten verwendet werden", "▸ Standard-CLIP-Export verwendet input_ids auch in ArgMax → wird abgelehnt", "", "Lösung: CLIP_TXT mit CLS-Token-Pooling (Position 0) statt EOS-Token-Pooling", "▸ Einfacherer ONNX-Graph, den Oracle akzeptiert", "▸ Cosinus-Ähnlichkeit zwischen EOS- und CLS-Variante: ~0,70", "▸ Modell muss beim Export entsprechend angepasst werden", ], Inches(0.8), Inches(1.3), Inches(11.5), Inches(3.8), size=19) code_box(s, "-- Modell laden (einmalig durch Administrator)\nEXEC DBMS_VECTOR.LOAD_ONNX_MODEL(\n 'VEC_DUMP', 'clip_txt.onnx', 'CLIP_TXT',\n JSON('{\"function\":\"embedding\",\"embeddingOutput\":\"output\",\n \"input\":{\"input\":[\"DATA\"]}}'));", Inches(0.8), Inches(5.2), Inches(11.5), Inches(1.6), size=13) # ════════════════════════════════════════════════════════════════════════════ # Slide 13 — Architektur: Wo wird CLIP berechnet? # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "Architektur der Demo", ACCENT_GRN) s.shapes.add_picture(DIAG_ARCH, Inches(0.3), Inches(1.1), Inches(12.73), Inches(5.7)) # Slide 15 — Demo-Hinweis # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "Demo", ACCENT_GRN) for url, label, color, y in [ ("http://localhost:8000/ui/", "pgvector (blau)", ACCENT_PG, Inches(2.2)), ("http://localhost:8001/ui/", "Oracle 26ai (rot)", ACCENT_ORA, Inches(3.5)), ("http://localhost:8002/ui/", "Oracle In-DB (lila)",ACCENT_IDB, Inches(4.8)), ]: txb(s, url, Inches(1.5), y, Inches(6), Inches(0.5), size=22, bold=True, color=color) txb(s, label, Inches(7.8), y + Inches(0.05), Inches(4.5), Inches(0.5), size=20, color=DIM_CLR) txb(s, "Suchbegriffe zum Ausprobieren:", Inches(1.5), Inches(5.9), Inches(10), Inches(0.5), size=18, color=BODY_CLR) txb(s, "Bäume · Wasser · Menschen · Gebäude · Himmel · Nacht · Autos", Inches(1.5), Inches(6.3), Inches(10), Inches(0.6), size=20, bold=True, color=ACCENT_GRN) # ════════════════════════════════════════════════════════════════════════════ # Slide 15 — Vergleich # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "Vergleich", ACCENT_PG) rows = [ ("Merkmal", "PostgreSQL + pgvector", "Oracle · VECTORS_USER", "Oracle · VECTOR"), ("Fotos indiziert", "116", "116", "116"), ("Indizierungszeit", "Ø 12,1 Sek. (3 Läufe)", "Ø 12,1 Sek. (3 Läufe)", "Ø 13,6 Sek. (3 Läufe)"), ("Index-Typ", "HNSW (auf Disk)", "HNSW (im Speicher)", "HNSW (im Speicher)"), ("RAM-Bedarf", "Keiner", "512 MB SGA", "512 MB SGA"), ("CLIP zur Laufzeit", "Ja (Python)", "Ja (Python)", "Nein"), ("Embedding-Ort", "Python-Prozess", "Python-Prozess", "In der Datenbank"), ("VECTOR_EMBEDDING()", "—", "—", "Ja"), ("Extension nötig", "CREATE EXTENSION vector", "Nein", "Nein"), ] y = Inches(1.3) header = True for row in rows: bg_color = RGBColor(0x18, 0x18, 0x28) if header else (RGBColor(0x28, 0x29, 0x3d) if rows.index(row) % 2 == 0 else RGBColor(0x24, 0x25, 0x38)) row_bg = s.shapes.add_shape(1, Inches(0.3), y, Inches(12.7), Inches(0.52)) row_bg.fill.solid() row_bg.fill.fore_color.rgb = bg_color row_bg.line.fill.background() colors = [DIM_CLR, ACCENT_PG, ACCENT_ORA, ACCENT_IDB] if header else [BODY_CLR, ACCENT_PG, ACCENT_ORA, ACCENT_IDB] widths = [2.5, 3.0, 3.1, 3.1] xs = [0.4, 2.9, 6.0, 9.15] for j, (cell, col, w, x) in enumerate(zip(row, colors, widths, xs)): txb(s, cell, Inches(x), y + Pt(4), Inches(w), Inches(0.48), size=13, bold=header, color=col) y += Inches(0.53) header = False # ════════════════════════════════════════════════════════════════════════════ # Slide 16 — Fazit # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "Fazit", ACCENT_GRN) bullet_box(s, [ "▸ Beide Datenbanken unterstützen Vektorsuche produktionsreif", "▸ pgvector: einfach, leichtgewichtig, kein zusätzlicher Speicher nötig", "▸ Oracle 26ai: vollständig integriert, kein Extension-Management", "▸ Oracle In-DB Embedding: Architektur ohne ML-Laufzeit im App-Server", "▸ CLIP ermöglicht Bildersuche per Freitext — ohne Tagging oder Metadaten", "▸ HNSW liefert schnelle approximative k-NN-Suche in beiden Datenbanken", "▸ VECTOR ist eine sehr willkommene Erweiterung — relationale Datenbanken", " nutzen damit KI-Embeddings als First-Class-Citizen", ], Inches(0.8), Inches(1.3), Inches(11.5), Inches(4.2), size=21) divider(s, Inches(5.1)) txb(s, "Quellcode & Dokumentation", Inches(0.8), Inches(5.2), Inches(11), Inches(0.5), size=20, bold=True, color=BODY_CLR) txb(s, "https://gitea.dl-cons.de/dierk/vector-search-demo", Inches(0.8), Inches(5.7), Inches(11), Inches(0.5), size=20, color=ACCENT_PG) txb(s, "Programmierung und Folien unterstützt durch Claude (Anthropic)", Inches(0.8), Inches(6.55), Inches(11.33), Inches(0.35), size=13, italic=True, color=DIM_CLR, align=PP_ALIGN.CENTER) # ════════════════════════════════════════════════════════════════════════════ # Save # ════════════════════════════════════════════════════════════════════════════ OUT = "Vektoren in der Datenbank.pptx" prs.save(OUT) print(f"Saved: {OUT} ({prs.slides.__len__()} slides)")