diff --git a/Vektoren in der Datenbank.pptx b/Vektoren in der Datenbank.pptx index f738af0..23854db 100644 Binary files a/Vektoren in der Datenbank.pptx and b/Vektoren in der Datenbank.pptx differ diff --git a/make_presentation.py b/make_presentation.py index de00208..c501e46 100644 --- a/make_presentation.py +++ b/make_presentation.py @@ -83,7 +83,7 @@ def diagram_s3_vectors(): ax.add_patch(mpatches.Arc((0, 0), 0.52, 0.52, angle=0, theta1=a1, theta2=a3, color="#fab387", lw=2)) - ax.text(-0.10, 0.34, "groß", color="#fab387", fontsize=10) + ax.text(-0.35, 0.28, "groß", color="#fab387", fontsize=10) plt.tight_layout(pad=0.3) return _save(fig, "s3_vectors.png") @@ -132,7 +132,7 @@ def diagram_s6_cosine(): vB = np.array([0.55, 1.0 ]) # text vector for v, color, label, lpos in [ - (vA, "#89b4fa", "Bild-Vektor", (1.12, 0.18)), + (vA, "#89b4fa", "Bild-Vektor", (1.17, 0.08)), (vB, "#cba6f7", 'Text-Vektor\n"Bäume"', (0.56, 1.07)), ]: ax.annotate("", xy=v, xytext=(0, 0), @@ -167,82 +167,120 @@ def diagram_s6_cosine(): def diagram_architecture(): """Architecture slide: 3 columns showing app server, database, and where CLIP runs.""" CLIP_CLR = "#a6e3a1" - # (x, db_name, color, port, clip_app, clip_db, db_tech, vec_embed_fn) + # (x, db_name, color, port, clip_app, clip_db, db_tech, vec_embed_fn, foto_storage) COLS = [ - (2.3, "PostgreSQL 18", "#89b4fa", "Port 8000", True, False, "pgvector 0.8.2\nHNSW (Disk)", None), - (6.65, "Oracle 26ai\nVECTORS_USER", "#f38ba8", "Port 8001", True, False, "HNSW (SGA)", None), - (11.0, "Oracle 26ai\nVECTOR", "#cba6f7", "Port 8002", False, True, "HNSW (SGA)", "VECTOR_EMBEDDING()"), + (2.3, "PostgreSQL 18", "#89b4fa", "Port 8000", True, False, "pgvector 0.8.2\nHNSW (Disk)", None, "Fotos: Dateipfad (Filesystem)"), + (6.65, "Oracle 26ai\nVECTORS_USER", "#f38ba8", "Port 8001", True, False, "HNSW (SGA)", None, "Fotos: Dateipfad (Filesystem)"), + (11.0, "Oracle 26ai\nVECTOR", "#cba6f7", "Port 8002", False, True, "HNSW (SGA)", "VECTOR_EMBEDDING()", "Fotos: BLOB (in Oracle)"), ] + BOX_H = 2.2 # all boxes same height + DB_Y = 0.15 # database box bottom + GAP = 0.60 # space between DB top and app server bottom + APP_Y = DB_Y + BOX_H + GAP # = 2.95 + fig, ax = _fig(13.5, 6.5) - ax.set_xlim(0, 13.5); ax.set_ylim(-0.8, 6.0) + ax.set_xlim(0, 13.5); ax.set_ylim(-0.8, 5.9) ax.axis("off") - for x, db_name, color, port, clip_app, clip_db, db_tech, vec_fn in COLS: - # ── Column title + port - ax.text(x, 5.78, port, ha="center", color=color, fontsize=13, fontweight="bold") + for x, db_name, color, port, clip_app, clip_db, db_tech, vec_fn, foto_storage in COLS: + APP_TOP = APP_Y + BOX_H # = 5.15 + DB_TOP = DB_Y + BOX_H # = 2.35 + + # ── Port label + ax.text(x, APP_TOP + 0.28, port, ha="center", color=color, + fontsize=13, fontweight="bold") # ── App server box ax.add_patch(mpatches.FancyBboxPatch( - (x-1.7, 3.7), 3.4, 1.85, + (x-1.7, APP_Y), 3.4, BOX_H, boxstyle="round,pad=0.1", facecolor="#28293d", edgecolor=color, lw=2)) - ax.text(x, 5.38, "App-Server (FastAPI)", ha="center", + ax.text(x, APP_TOP - 0.22, "App-Server (FastAPI)", ha="center", color=color, fontsize=11, fontweight="bold") if clip_app: ax.add_patch(mpatches.FancyBboxPatch( - (x-1.2, 3.78), 2.4, 0.82, + (x-1.2, APP_Y + 0.10), 2.4, 0.75, boxstyle="round,pad=0.08", facecolor="#1e1e2e", edgecolor=CLIP_CLR, lw=2)) - ax.text(x, 4.19, "CLIP-Modell\n(sentence-transformers)", + ax.text(x, APP_Y + 0.475, "CLIP-Modell\n(sentence-transformers)", ha="center", va="center", color=CLIP_CLR, fontsize=9.5, fontweight="bold", multialignment="center") + ax.add_patch(mpatches.FancyBboxPatch( + (x-1.2, APP_Y + 0.95), 2.4, 0.42, + boxstyle="round,pad=0.06", facecolor="#1e1e2e", edgecolor=DIAG_AXIS, lw=1, + linestyle="dashed")) + ax.text(x, APP_Y + 1.16, foto_storage, + ha="center", va="center", color=DIAG_AXIS, fontsize=9, style="italic") else: ax.add_patch(mpatches.FancyBboxPatch( - (x-1.2, 3.78), 2.4, 0.82, + (x-1.2, APP_Y + 0.10), 2.4, 0.75, boxstyle="round,pad=0.08", facecolor="#1e1e2e", edgecolor=DIAG_AXIS, lw=1, linestyle="dashed")) - ax.text(x, 4.19, "kein CLIP", + ax.text(x, APP_Y + 0.475, "kein CLIP", ha="center", va="center", color=DIAG_AXIS, fontsize=10, style="italic") - # ── Arrow + what is sent - ax.annotate("", xy=(x, 3.05), xytext=(x, 3.65), + # ── Arrow with comfortable gap + ax.annotate("", xy=(x, DB_TOP + 0.05), xytext=(x, APP_Y - 0.05), arrowprops=dict(arrowstyle="->", color=DIAG_AXIS, lw=2)) arrow_lbl = "Vektor (512 floats)" if clip_app else "Text-String" - ax.text(x, 3.35, arrow_lbl, ha="center", va="center", + ax.text(x + 0.2, (DB_TOP + APP_Y) / 2, arrow_lbl, ha="left", va="center", color=DIAG_AXIS, fontsize=9, style="italic") # ── Database box - db_h = 2.8 if clip_db else 1.9 ax.add_patch(mpatches.FancyBboxPatch( - (x-1.7, 0.15), 3.4, db_h, + (x-1.7, DB_Y), 3.4, BOX_H, boxstyle="round,pad=0.1", facecolor="#28293d", edgecolor=color, lw=2)) if clip_db: - # CLIP ONNX box inside DB ax.add_patch(mpatches.FancyBboxPatch( - (x-1.2, 0.25), 2.4, 0.82, + (x-1.2, DB_Y + 0.10), 2.4, 0.72, boxstyle="round,pad=0.08", facecolor="#1e1e2e", edgecolor=CLIP_CLR, lw=2)) - ax.text(x, 0.66, "CLIP-Modell\n(ONNX, in Oracle)", + ax.text(x, DB_Y + 0.46, "CLIP-Modell\n(ONNX, in Oracle)", ha="center", va="center", color=CLIP_CLR, fontsize=9.5, fontweight="bold", multialignment="center") - # VECTOR_EMBEDDING() label - ax.text(x, 1.22, vec_fn, + ax.add_patch(mpatches.FancyBboxPatch( + (x-1.2, DB_Y + 0.92), 2.4, 0.40, + boxstyle="round,pad=0.06", facecolor="#1e1e2e", edgecolor=DIAG_AXIS, lw=1, + linestyle="dashed")) + ax.text(x, DB_Y + 1.12, foto_storage, + ha="center", va="center", color=DIAG_AXIS, fontsize=9, style="italic") + ax.text(x, DB_Y + 1.50, vec_fn, ha="center", color="#fab387", fontsize=10, fontweight="bold", fontfamily="monospace") - # DB name - ax.text(x, 1.65, db_name, ha="center", color=color, + ax.text(x, DB_Y + 1.72, "Oracle 26ai", ha="center", color=color, fontsize=11, fontweight="bold") - ax.text(x, 2.35, db_tech, ha="center", color=DIAG_AXIS, - fontsize=9, multialignment="center") + ax.text(x, DB_Y + 1.92, "Schema: VECTOR", ha="center", color=color, + fontsize=9) + ax.text(x, DB_Y + 2.10, db_tech, ha="center", color=DIAG_AXIS, + fontsize=9) else: - ax.text(x, 1.5, db_name, ha="center", color=color, + # Split db_name → ["PostgreSQL 18"] or ["Oracle 26ai", "VECTORS_USER"] + # Split db_tech → ["pgvector 0.8.2", "HNSW (Disk)"] or ["HNSW (SGA)"] + name_parts = db_name.split("\n") + tech_parts = db_tech.split("\n") + hnsw = tech_parts[-1] # always last + tech_extra = tech_parts[:-1] # e.g. ["pgvector 0.8.2"] + + # HNSW — same height across all three DB boxes + ax.text(x, DB_Y + 2.10, hnsw, ha="center", color=DIAG_AXIS, fontsize=9) + + # Middle line: schema name or version info (matches "Schema: VECTOR" in col 3) + if len(name_parts) > 1: + mid_label = "Schema: " + name_parts[1] + elif tech_extra: + mid_label = tech_extra[0] + else: + mid_label = "" + if mid_label: + ax.text(x, DB_Y + 1.92, mid_label, ha="center", color=color, fontsize=9) + + # Main DB name (matches "Oracle 26ai" in col 3) + ax.text(x, DB_Y + 1.72, name_parts[0], ha="center", color=color, fontsize=11, fontweight="bold") - ax.text(x, 0.72, db_tech, ha="center", color=DIAG_AXIS, - fontsize=9, multialignment="center") # ── Vertical separators for xsep in [4.5, 8.85]: - ax.plot([xsep, xsep], [0.05, 5.9], color=DIAG_GRID, lw=1, linestyle="--") + ax.plot([xsep, xsep], [0.05, 5.55], color=DIAG_GRID, lw=1, linestyle="--") # ── Caption — separated from boxes, applies to all three columns ax.plot([0.3, 13.2], [-0.18, -0.18], color=DIAG_GRID, lw=1) @@ -437,7 +475,7 @@ def divider(slide, y, color=DIM_CLR): s = add_slide(logo=False, footer=False) # title slide: custom layout title_slide_layout(s, "Vektoren in der Datenbank", - "Semantische Bildsuche mit PostgreSQL/pgvector und Oracle 26ai") + "Der VECTOR-Datentyp in Oracle 26ai und PostgreSQL") # Conference details txb(s, CONFERENCE, Inches(1), Inches(5.0), Inches(11.33), Inches(0.5), @@ -449,7 +487,30 @@ txb(s, f"{EVENT_DATE} · {EVENT_CITY}", s.shapes.add_picture(LOGO_PATH, Inches(4.67), Inches(6.1), Inches(4.0), Inches(1.06)) # ════════════════════════════════════════════════════════════════════════════ -# Slide 2 — Agenda +# Slide 2 — Motivation: Der VECTOR-Datentyp +# ════════════════════════════════════════════════════════════════════════════ +s = add_slide() +section_header(s, "Der VECTOR-Datentyp", ACCENT_PG) +bullet_box(s, [ + "▸ VECTOR ist ein neuer nativer Datentyp in Oracle AI Database 26ai und PostgreSQL (pgvector)", + "▸ Ermöglicht das Speichern hochdimensionaler Vektoren direkt in der Datenbank", + "▸ Bringt optimierte Suchoperatoren und Indizes für Ähnlichkeitssuche (k-NN) mit", + "▸ Macht KI-Embeddings zu einem First-Class-Citizen in relationalen Datenbanken", +], Inches(0.8), Inches(1.3), Inches(11.5), Inches(2.2), size=22) + +divider(s, Inches(3.7)) + +txb(s, "Ziel dieses Vortrags", Inches(0.8), Inches(3.85), Inches(11.5), Inches(0.5), + size=22, bold=True, color=ACCENT_PG) +bullet_box(s, [ + "▸ Den VECTOR-Datentyp erklären — was er ist, wie er funktioniert", + "▸ Gemeinsamkeiten und Unterschiede zwischen Oracle 26ai und PostgreSQL/pgvector zeigen", + "▸ Eine konkrete Demo: semantische Bildsuche mit 116 Street-Fotos", + "▸ Drei Ansätze vergleichen: pgvector, Oracle (Python-Embedding), Oracle (In-Database-Embedding)", +], Inches(0.8), Inches(4.4), Inches(11.5), Inches(2.3), size=20) + +# ════════════════════════════════════════════════════════════════════════════ +# Slide 3 — Agenda # ════════════════════════════════════════════════════════════════════════════ s = add_slide() section_header(s, "Agenda", ACCENT_PG) @@ -483,8 +544,8 @@ bullet_box(s, [ s.shapes.add_picture(DIAG_S3, Inches(7.8), Inches(1.1), Inches(5.3), Inches(5.3)) txb(s, "Vektoren machen Ähnlichkeit berechenbar.", - Inches(0.8), Inches(5.8), Inches(6.8), Inches(0.7), - size=22, bold=True, color=ACCENT_GRN) + Inches(0.3), Inches(5.75), Inches(7.4), Inches(0.8), + size=26, bold=True, color=ACCENT_GRN) # ════════════════════════════════════════════════════════════════════════════ # Slide 4 — Semantische Suche @@ -696,11 +757,11 @@ bullet_box(s, [ "▸ Der Text-String ist der einzige Parameter aus Python", "▸ Schema: VECTOR — Tabelle: FOTO_VEKTOR — Bilder als BLOB gespeichert", "▸ HNSW-Index auf FOTO_VEKTOR (wie in Schema VECTORS_USER)", -], Inches(0.8), Inches(1.3), Inches(11.5), Inches(3.0), size=19) +], Inches(0.8), Inches(1.3), Inches(11.5), Inches(2.4), size=16) code_box(s, "-- Gesamte Logik in einem SQL-Statement\nSELECT filename,\n 1 - VECTOR_DISTANCE(\n foto_vek,\n VECTOR_EMBEDDING(CLIP_TXT USING :q AS data),\n COSINE\n ) AS score\nFROM VECTOR.FOTO_VEKTOR\nORDER BY VECTOR_DISTANCE(\n foto_vek,\n VECTOR_EMBEDDING(CLIP_TXT USING :q AS data), COSINE)\nFETCH FIRST 12 ROWS ONLY;", - Inches(0.8), Inches(3.6), Inches(7.5), Inches(3.3), size=13) + Inches(0.8), Inches(3.8), Inches(7.5), Inches(3.0), size=11) bullet_box(s, [ ":q = reiner Text aus Python", @@ -711,7 +772,7 @@ bullet_box(s, [ " • Vektorsuche", "", "→ Architektur vereinfacht sich", -], Inches(9.0), Inches(3.6), Inches(4.0), Inches(3.4), size=18, color=DIM_CLR) +], Inches(9.0), Inches(3.8), Inches(4.0), Inches(3.0), size=16, color=DIM_CLR) # ════════════════════════════════════════════════════════════════════════════ # Slide 12 — ONNX in Oracle: Besonderheit @@ -809,7 +870,9 @@ bullet_box(s, [ "▸ Oracle In-DB Embedding: Architektur ohne ML-Laufzeit im App-Server", "▸ CLIP ermöglicht Bildersuche per Freitext — ohne Tagging oder Metadaten", "▸ HNSW liefert schnelle approximative k-NN-Suche in beiden Datenbanken", -], Inches(0.8), Inches(1.3), Inches(11.5), Inches(3.5), size=21) + "▸ VECTOR ist eine sehr willkommene Erweiterung — relationale Datenbanken", + " nutzen damit KI-Embeddings als First-Class-Citizen", +], Inches(0.8), Inches(1.3), Inches(11.5), Inches(4.2), size=21) divider(s, Inches(5.1))