From 4a82352391cecc9ca01f7a6166ae4ffc33097ee0 Mon Sep 17 00:00:00 2001
From: Dierk <dierk.lenz@dl-cons.de>
Date: Wed, 20 May 2026 12:04:10 +0200
Subject: [PATCH] Document CLIP model source in embedder.py and README

Model downloads automatically from HuggingFace Hub on first use.
No manual download required.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 README.md                          | 9 +++++++++
 oravector-demo/backend/embedder.py | 3 +++
 pgvector-demo/backend/embedder.py  | 3 +++
 3 files changed, 15 insertions(+)

diff --git a/README.md b/README.md
index 135a1aa..6860d73 100644
--- a/README.md
+++ b/README.md
@@ -439,6 +439,15 @@ pip3 install -r pgvector-demo/backend/requirements.txt --break-system-packages
 pip3 install -r oravector-demo/backend/requirements.txt --break-system-packages
 ```
 
+**CLIP model** — not included in the repository. It is downloaded automatically from
+Hugging Face Hub on first use (~600 MB, cached in `~/.cache/huggingface/hub/`):
+
+> `sentence-transformers/clip-ViT-B-32`
+> https://huggingface.co/sentence-transformers/clip-ViT-B-32
+
+No manual download is required — `sentence-transformers` handles this transparently
+when `index_images.py` or a backend is started for the first time.
+
 ### 1. PostgreSQL
 
 **Start the container:**
diff --git a/oravector-demo/backend/embedder.py b/oravector-demo/backend/embedder.py
index 2b642ef..029fffa 100644
--- a/oravector-demo/backend/embedder.py
+++ b/oravector-demo/backend/embedder.py
@@ -6,6 +6,9 @@ _model = None
 def _get_model():
     # Lazy load: the CLIP model is ~600 MB and takes several seconds to initialise.
     # Loading on first call avoids the cost at import time and during indexing warmup.
+    # Downloaded automatically from Hugging Face Hub on first use:
+    # https://huggingface.co/sentence-transformers/clip-ViT-B-32
+    # Cached in ~/.cache/huggingface/hub/
     global _model
     if _model is None:
         _model = SentenceTransformer("clip-ViT-B-32")
diff --git a/pgvector-demo/backend/embedder.py b/pgvector-demo/backend/embedder.py
index 2b642ef..029fffa 100644
--- a/pgvector-demo/backend/embedder.py
+++ b/pgvector-demo/backend/embedder.py
@@ -6,6 +6,9 @@ _model = None
 def _get_model():
     # Lazy load: the CLIP model is ~600 MB and takes several seconds to initialise.
     # Loading on first call avoids the cost at import time and during indexing warmup.
+    # Downloaded automatically from Hugging Face Hub on first use:
+    # https://huggingface.co/sentence-transformers/clip-ViT-B-32
+    # Cached in ~/.cache/huggingface/hub/
     global _model
     if _model is None:
         _model = SentenceTransformer("clip-ViT-B-32")