From 389fee25988213159370b7a4e10f00b136e50751 Mon Sep 17 00:00:00 2001
From: Sebastian Gode <sebastian.gode@telekom.de>
Date: Tue, 3 Mar 2026 09:37:49 +0000
Subject: [PATCH 1/6] Script for LLM

---
 tools/generate_meta.py | 373 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 373 insertions(+)
 create mode 100755 tools/generate_meta.py
diff --git a/tools/generate_meta.py b/tools/generate_meta.py
new file mode 100755
index 0000000..20c10e9
--- /dev/null
+++ b/tools/generate_meta.py
@@ -0,0 +1,373 @@
+#!/usr/bin/python
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import base64
+import logging
+import pathlib
+import re
+import requests
+import subprocess
+import sys
+
+from git import exc
+from git import Repo
+
+import otc_metadata.services
+
+data = otc_metadata.services.Services()
+
+api_session = requests.Session()
+
+
+def remove_thinking_content(text):
+    """Remove thinking process content between thinking markers."""
+    # Remove everything between <think> and </think> markers
+    text = re.sub(r'(?is)<think>.*?</think>', '', text, flags=re.DOTALL | re.IGNORECASE)
+    return text.strip()
+
+
+def generate_description_with_llm(text, service_title, llm_api_url, model_name, api_username, api_password):
+    """Generate a meta description using the LLM API."""
+    prompt = (
+        "/no_think\n"
+        f"Generate a concise HTML meta description (maximum 160 characters, minimum 40 characters) "
+        f"for the following documentation content of the service '{service_title}'. "
+        f"The description should be suitable for search engines and summarize the content. "
+        f"Do not include any markdown formatting, quotes, or meta-commentary.\n\n"
+        f"Content:\n{text[:2000]}\n\n"
+        f"Meta description:"
+    )
+
+    try:
+        headers = {"Content-Type": "application/json"}
+        if api_username and api_password:
+            credentials = f"{api_username}:{api_password}"
+            encoded_credentials = base64.b64encode(credentials.encode()).decode()
+            headers["Authorization"] = f"Basic {encoded_credentials}"
+
+        response = requests.post(
+            llm_api_url,
+            json={
+                "prompt": prompt,
+                "model": model_name,
+                "temperature": 0.2,
+
+                "repeat_last_n": 128,
+                "repeat_penalty": 1.15,
+                "presence_penalty": 0.2,
+                "frequency_penalty": 0.2,
+
+                # optional DRY anti-looping (try only if it still loops)
+                "dry_multiplier": 0.5,
+                "dry_base": 1.75,
+                "dry_allowed_length": 2,
+
+            },
+            headers=headers,
+            timeout=15,
+        )
+        response.raise_for_status()
+        result = response.json()
+        if "choices" in result and len(result["choices"]) > 0:
+            description = result["choices"][0]["text"].strip()
+            description = remove_thinking_content(description)
+            # If description is empty or just whitespace, use fallback
+            if not description or description.isspace():
+                return f"{service_title} documentation"
+            # Extract only the first sentence (meta description should be one sentence)
+            parts = description.split(".")
+            first_sentence = parts[0].strip() + "."
+            if len(first_sentence) <= 1:
+                first_sentence = description[:160].strip() + "."
+            if len(first_sentence) > 160:
+                first_sentence = first_sentence[:157] + "..."
+            return first_sentence
+        elif "response" in result:
+            description = result["response"].strip()
+            description = remove_thinking_content(description)
+            if not description or description.isspace():
+                return f"{service_title} documentation"
+            first_sentence = description.split(".")[0].strip() + "."
+            if len(first_sentence) <= 1:
+                first_sentence = description[:160].strip() + "."
+            if len(first_sentence) > 160:
+                first_sentence = first_sentence[:157] + "..."
+            return first_sentence
+        elif isinstance(result, dict) and "text" in result:
+            description = result["text"].strip()
+            description = remove_thinking_content(description)
+            if not description or description.isspace():
+                return f"{service_title} documentation"
+            first_sentence = description.split(".")[0].strip() + "."
+            if len(first_sentence) <= 1:
+                first_sentence = description[:160].strip() + "."
+            if len(first_sentence) > 160:
+                first_sentence = first_sentence[:157] + "..."
+            return first_sentence
+    except requests.exceptions.RequestException as e:
+        logging.warning(f"LLM API request failed: {e}. Using fallback description.")
+    except (KeyError, ValueError, IndexError) as e:
+        logging.warning(f"LLM API response parsing failed: {e}. Using fallback description.")
+
+    # Fallback: Extract first sentence from content
+    lines = text.split("\n")
+    for line in lines:
+        line = line.strip()
+        if line and not line.startswith("-") and not line.startswith("#"):
+            first_sentence = line.split(".")[0] + "."
+            if len(first_sentence) > 160:
+                first_sentence = first_sentence[:157] + "..."
+            return first_sentence
+
+    return f"{service_title} documentation"
+
+
+def read_rst_content(file_path):
+    """Read and return the content of an RST file."""
+    with open(file_path, "r", encoding="utf-8") as f:
+        return f.read()
+
+
+def add_sphinx_metadata(file_path, meta_description):
+    """Add Sphinx-compatible meta block at the end of an RST file."""
+    content = read_rst_content(file_path)
+
+    meta_block = (
+        "\n"
+        ".. meta::\n"
+        "   :description: {}\n".format(meta_description)
+    )
+
+    # Check if meta block already exists
+    if ".. meta::" in content:
+        logging.debug(f"Meta block already exists in {file_path}. Skipping.")
+        return False
+
+    # Append meta block at the end of the file
+    new_content = content.rstrip() + meta_block
+
+    with open(file_path, "w", encoding="utf-8", newline="") as f:
+        f.write(new_content)
+
+    return True
+
+
+def process_service(args, service):
+    """Process a single service and add metadata to its RST files."""
+    logging.debug(f"Processing service {service['service_title']}")
+    workdir = pathlib.Path(args.work_dir)
+    workdir.mkdir(exist_ok=True)
+
+    repo_url = None
+    repo_dir = None
+    git_repo = None
+    error_list = []
+
+    repo = None
+    for r in service["repositories"]:
+        if r["cloud_environments"][0] == args.cloud_environment:
+            repo_dir = workdir / r["type"] / r["repo"]
+
+            if r["environment"] == args.target_environment:
+                repo = r
+                break
+            else:
+                logging.debug(f"Skipping repository {r}")
+                continue
+
+    if not repo_dir:
+        logging.info(f"No repository found for service {service['service_title']}")
+        return
+
+    if repo_dir.exists():
+        logging.debug(f"Repository {repo_dir} already exists")
+        try:
+            git_repo = Repo(repo_dir)
+            git_repo.remotes.origin.fetch()
+            git_repo.heads.main.checkout()
+            git_repo.remotes.origin.pull()
+        except exc.InvalidGitRepositoryError:
+            logging.error("Existing repository checkout is bad")
+            import shutil
+            shutil.rmtree(repo_dir)
+            git_repo = None
+        except Exception as e:
+            error_list.append({"error": e, "repo": repo["repo"]})
+
+    if not repo_dir.exists() or git_repo is None:
+        if repo["type"] == "gitea":
+            repo_url = (
+                f"ssh://git@gitea.eco.tsi-dev.otc-service.com:2222/"
+                f"{repo['repo']}"
+            )
+        elif repo["type"] == "github":
+            repo_url = f"git@github.com:{repo['repo']}"
+        else:
+            logging.error(f"Repository type {repo['type']} is not supported")
+            error_list.append({"error": f"Repository type {repo['type']} is not supported", "repo": repo["repo"]})
+            return
+
+        try:
+            logging.debug(f"Cloning repository {repo_url}")
+            git_repo = Repo.clone_from(repo_url, repo_dir, branch="main")
+        except Exception as e:
+            logging.error(f"Error cloning repository {repo_url}: {e}")
+            error_list.append({"error": f"Error cloning repository {repo_url}", "repo": repo["repo"]})
+            return
+
+    branch_name = f"add-meta-{args.branch_name}"
+
+    try:
+        new_branch = git_repo.create_head(branch_name, "main")
+    except Exception as e:
+        logging.warning(f"Skipping service {service} due to {e}")
+        error_list.append({"error": e, "repo": repo["repo"]})
+        return
+
+    new_branch.checkout()
+
+    # Find all RST files in the documentation (doc/, umn/, api-ref/)
+    rst_files = list(repo_dir.rglob("doc/**/*.rst")) + \
+                list(repo_dir.rglob("umn/**/*.rst")) + \
+                list(repo_dir.rglob("api-ref/**/*.rst"))
+
+    processed_count = 0
+    updated_count = 0
+
+    for rst_file in rst_files:
+
+        logging.debug(f"Analyzing document {rst_file}")
+
+        try:
+            content = read_rst_content(rst_file)
+            description = generate_description_with_llm(
+                content,
+                service["service_title"],
+                args.llm_api_url,
+                args.llm_model,
+                args.llm_username,
+                args.llm_password
+            )
+
+            if add_sphinx_metadata(rst_file, description):
+                updated_count += 1
+                logging.info(f"Added meta description to {rst_file}")
+            else:
+                processed_count += 1
+
+            git_repo.index.add([str(rst_file)])
+
+        except Exception as e:
+            logging.error(f"Error processing {rst_file}: {e}")
+            error_list.append({"error": e, "repo": str(rst_file)})
+
+    if len(git_repo.index.diff("HEAD")) == 0:
+        logging.debug("No changes required for service %s", service["service_type"])
+        return
+
+    git_repo.index.commit(args.commit_description)
+
+    try:
+        git_repo.git.push("--set-upstream", "origin", branch_name)
+        logging.info(f"Pushed changes for service {service['service_title']}")
+    except Exception as e:
+        error_list.append({"error": e, "repo": repo["repo"]})
+
+    if repo_url and "github" in repo_url:
+        subprocess.run(
+            args=["gh", "pr", "create", "-f"], cwd=repo_dir, check=False
+        )
+    elif repo_url and "gitea" in repo_url and args.token:
+        pass
+
+    if len(error_list) != 0:
+        logging.error("The following errors have happened:")
+        logging.error(error_list)
+
+    logging.info(f"Processed {processed_count} files, updated {updated_count} files")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Add Sphinx meta blocks to RST files using LLM-generated descriptions."
+    )
+    parser.add_argument(
+        "--target-environment",
+        required=True,
+        choices=["internal", "public"],
+        help="Environment to be used as a source",
+    )
+    parser.add_argument("--service-type", help="Service to update")
+    parser.add_argument(
+        "--work-dir",
+        required=True,
+        help="Working directory to use for repository checkout.",
+    )
+    parser.add_argument(
+        "--branch-name",
+        default="meta-generation",
+        help="Branch name to be used for changes.",
+    )
+    parser.add_argument("--token", metavar="token", help="API token")
+    parser.add_argument(
+        "--llm-api-url",
+        default="http://localhost:8080/v1/completions",
+        help="URL of the LLM API server. Default: http://localhost:8080/v1/completions",
+    )
+    parser.add_argument(
+        "--llm-model",
+        default="llama2",
+        help="LLM model name to use. Default: llama2",
+    )
+    parser.add_argument(
+        "--llm-username",
+        help="Username for Basic Authentication with LLM server",
+    )
+    parser.add_argument(
+        "--llm-password",
+        help="Password for Basic Authentication with LLM server",
+    )
+    parser.add_argument(
+        "--commit-description",
+        default=(
+            "Add Sphinx meta blocks to RST files\n\n"
+            "Generated by otc-metadata-rework/tools/generate_meta.py"
+        ),
+        help="Commit description for the commit",
+    )
+    parser.add_argument(
+        "--cloud-environment",
+        required=True,
+        default="eu_de",
+        help="Cloud Environment. Default: eu_de",
+    )
+
+    args = parser.parse_args()
+
+    logging.basicConfig(level=logging.DEBUG)
+
+    services = []
+    if args.service_type:
+        services = [data.get_service_with_repo_by_service_type(service_type=args.service_type)]
+    else:
+        services = data.services_with_repos()
+
+    for service in services:
+        process_service(args, service)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
-- 
2.34.1


From 553e3d7e195a311cf6f8b8773b7a4445cca6d87d Mon Sep 17 00:00:00 2001
From: Sebastian Gode <sebastian.gode@telekom.de>
Date: Tue, 3 Mar 2026 10:54:38 +0000
Subject: [PATCH 2/6] working script

---
 tools/generate_meta.py | 158 +++++++++++++++++++++--------------------
 1 file changed, 81 insertions(+), 77 deletions(-)

diff --git a/tools/generate_meta.py b/tools/generate_meta.py
index 20c10e9..2306081 100755
--- a/tools/generate_meta.py
+++ b/tools/generate_meta.py
@@ -39,90 +39,94 @@ def remove_thinking_content(text):
     return text.strip()
 
 
+def extract_description(result):
+    """Extract description from API response and clean it."""
+    # Chat completion format: choices[0].message.content
+    if "choices" in result and len(result["choices"]) > 0:
+        message = result["choices"][0].get("message", {})
+        description = message.get("content", "")
+    elif "response" in result:
+        description = result["response"].strip()
+    elif isinstance(result, dict) and "text" in result:
+        description = result["text"].strip()
+    else:
+        return None
+
+    description = remove_thinking_content(description)
+    description = description.strip()
+
+    if not description or description.isspace():
+        return None
+
+    # Extract only the first sentence
+    parts = description.split(".")
+    first_sentence = parts[0].strip() + "."
+    if len(first_sentence) <= 1:
+        first_sentence = description[:160].strip() + "."
+    if len(first_sentence) > 160:
+        first_sentence = first_sentence[:157] + "..."
+    return first_sentence
+
+
 def generate_description_with_llm(text, service_title, llm_api_url, model_name, api_username, api_password):
-    """Generate a meta description using the LLM API."""
+    """Generate a meta description using the llama.cpp /completion endpoint with up to 3 retries."""
+    # Limit content to first 500 chars for speed, focus on content not schema
+    content_preview = text[:500].replace("\n", " ")
     prompt = (
-        "/no_think\n"
-        f"Generate a concise HTML meta description (maximum 160 characters, minimum 40 characters) "
-        f"for the following documentation content of the service '{service_title}'. "
-        f"The description should be suitable for search engines and summarize the content. "
-        f"Do not include any markdown formatting, quotes, or meta-commentary.\n\n"
-        f"Content:\n{text[:2000]}\n\n"
-        f"Meta description:"
+        f"Generate a meta description (40-160 chars) for: {service_title}."
+        f"Content preview: {content_preview}."
+        f"Output ONLY the description text, nothing else."
     )
 
-    try:
-        headers = {"Content-Type": "application/json"}
-        if api_username and api_password:
-            credentials = f"{api_username}:{api_password}"
-            encoded_credentials = base64.b64encode(credentials.encode()).decode()
-            headers["Authorization"] = f"Basic {encoded_credentials}"
+    headers = {"Content-Type": "application/json"}
+    if api_username and api_password:
+        credentials = f"{api_username}:{api_password}"
+        encoded_credentials = base64.b64encode(credentials.encode()).decode()
+        headers["Authorization"] = f"Basic {encoded_credentials}"
 
-        response = requests.post(
-            llm_api_url,
-            json={
-                "prompt": prompt,
-                "model": model_name,
-                "temperature": 0.2,
+    # Try up to 3 times
+    for attempt in range(3):
+        try:
+            response = requests.post(
+                llm_api_url,
+                json={
+                    "messages": [
+                        {"role": "user", "content": prompt},
+                    ],
+                    "model": model_name,
+                    "temperature": 0.2,
 
-                "repeat_last_n": 128,
-                "repeat_penalty": 1.15,
-                "presence_penalty": 0.2,
-                "frequency_penalty": 0.2,
+                    "top_k": 40,
+                    "top_p": 0.9,
+                    "min_p": 0.05,
 
-                # optional DRY anti-looping (try only if it still loops)
-                "dry_multiplier": 0.5,
-                "dry_base": 1.75,
-                "dry_allowed_length": 2,
+                    "repeat_last_n": 256,
+                    "repeat_penalty": 1.18,
+                    "presence_penalty": 0.2,
+                    "frequency_penalty": 0.2,
 
-            },
-            headers=headers,
-            timeout=15,
-        )
-        response.raise_for_status()
-        result = response.json()
-        if "choices" in result and len(result["choices"]) > 0:
-            description = result["choices"][0]["text"].strip()
-            description = remove_thinking_content(description)
-            # If description is empty or just whitespace, use fallback
-            if not description or description.isspace():
-                return f"{service_title} documentation"
-            # Extract only the first sentence (meta description should be one sentence)
-            parts = description.split(".")
-            first_sentence = parts[0].strip() + "."
-            if len(first_sentence) <= 1:
-                first_sentence = description[:160].strip() + "."
-            if len(first_sentence) > 160:
-                first_sentence = first_sentence[:157] + "..."
-            return first_sentence
-        elif "response" in result:
-            description = result["response"].strip()
-            description = remove_thinking_content(description)
-            if not description or description.isspace():
-                return f"{service_title} documentation"
-            first_sentence = description.split(".")[0].strip() + "."
-            if len(first_sentence) <= 1:
-                first_sentence = description[:160].strip() + "."
-            if len(first_sentence) > 160:
-                first_sentence = first_sentence[:157] + "..."
-            return first_sentence
-        elif isinstance(result, dict) and "text" in result:
-            description = result["text"].strip()
-            description = remove_thinking_content(description)
-            if not description or description.isspace():
-                return f"{service_title} documentation"
-            first_sentence = description.split(".")[0].strip() + "."
-            if len(first_sentence) <= 1:
-                first_sentence = description[:160].strip() + "."
-            if len(first_sentence) > 160:
-                first_sentence = first_sentence[:157] + "..."
-            return first_sentence
-    except requests.exceptions.RequestException as e:
-        logging.warning(f"LLM API request failed: {e}. Using fallback description.")
-    except (KeyError, ValueError, IndexError) as e:
-        logging.warning(f"LLM API response parsing failed: {e}. Using fallback description.")
+                    "dry_multiplier": 0.8,
+                    "dry_base": 1.75,
+                    "dry_allowed_length": 2,
+                    "dry_penalty_last_n": -1,
+                    "chat_template_kwargs": {"enable_thinking": False},
+                },
+                headers=headers,
+                timeout=15,
+            )
+            response.raise_for_status()
+            result = response.json()
+            description = extract_description(result)
+            if description:
+                return description
+            logging.warning(f"Attempt {attempt + 1}: Empty or invalid response from LLM API.")
+        except requests.exceptions.RequestException as e:
+            logging.warning(f"Attempt {attempt + 1}: LLM API request failed: {e}. Retrying...")
+        except (KeyError, ValueError, IndexError) as e:
+            logging.warning(f"Attempt {attempt + 1}: LLM API response parsing failed: {e}. Retrying...")
 
-    # Fallback: Extract first sentence from content
+    # After all retries failed, use fallback
+    logging.warning("All LLM API retries failed. Using fallback description.")
     lines = text.split("\n")
     for line in lines:
         line = line.strip()
@@ -324,8 +328,8 @@ def main():
     parser.add_argument("--token", metavar="token", help="API token")
     parser.add_argument(
         "--llm-api-url",
-        default="http://localhost:8080/v1/completions",
-        help="URL of the LLM API server. Default: http://localhost:8080/v1/completions",
+        default="http://localhost:8080/v1/chat/completions",
+        help="URL of the LLM API server. Default: http://localhost:8080/v1/chat/completions",
     )
     parser.add_argument(
         "--llm-model",
-- 
2.34.1


From 99f3ea4663951019d7b89fbc4cfc86ead28fd444 Mon Sep 17 00:00:00 2001
From: Sebastian Gode <sebastian.gode@telekom.de>
Date: Tue, 3 Mar 2026 11:27:37 +0000
Subject: [PATCH 3/6] Fix fallback

---
 tools/generate_meta.py | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/tools/generate_meta.py b/tools/generate_meta.py
index 2306081..099704f 100755
--- a/tools/generate_meta.py
+++ b/tools/generate_meta.py
@@ -17,7 +17,6 @@ import argparse
 import base64
 import logging
 import pathlib
-import re
 import requests
 import subprocess
 import sys
@@ -32,16 +31,8 @@ data = otc_metadata.services.Services()
 api_session = requests.Session()
 
 
-def remove_thinking_content(text):
-    """Remove thinking process content between thinking markers."""
-    # Remove everything between <think> and </think> markers
-    text = re.sub(r'(?is)<think>.*?</think>', '', text, flags=re.DOTALL | re.IGNORECASE)
-    return text.strip()
-
-
 def extract_description(result):
     """Extract description from API response and clean it."""
-    # Chat completion format: choices[0].message.content
     if "choices" in result and len(result["choices"]) > 0:
         message = result["choices"][0].get("message", {})
         description = message.get("content", "")
@@ -52,7 +43,6 @@ def extract_description(result):
     else:
         return None
 
-    description = remove_thinking_content(description)
     description = description.strip()
 
     if not description or description.isspace():
@@ -125,16 +115,20 @@ def generate_description_with_llm(text, service_title, llm_api_url, model_name,
         except (KeyError, ValueError, IndexError) as e:
             logging.warning(f"Attempt {attempt + 1}: LLM API response parsing failed: {e}. Retrying...")
 
-    # After all retries failed, use fallback
-    logging.warning("All LLM API retries failed. Using fallback description.")
+    # After all retries failed, use fallback - extract first headline
+    logging.warning("All LLM API retries failed. Using fallback description from first headline.")
     lines = text.split("\n")
-    for line in lines:
-        line = line.strip()
-        if line and not line.startswith("-") and not line.startswith("#"):
-            first_sentence = line.split(".")[0] + "."
-            if len(first_sentence) > 160:
-                first_sentence = first_sentence[:157] + "..."
-            return first_sentence
+    for i, line in enumerate(lines):
+        line_stripped = line.strip()
+        if line_stripped and not line_stripped.startswith("-") and not line_stripped.startswith("#"):
+            # Check if next line is a headline underline (=== or ---)
+            if i + 1 < len(lines):
+                next_line = lines[i + 1].strip()
+                if next_line and all(c in "=-" for c in next_line):
+                    description = line_stripped
+                    if len(description) > 160:
+                        description = description[:157] + "..."
+                    return description
 
     return f"{service_title} documentation"
 
-- 
2.34.1


From f07b416baf5d5a4e55e13ed42b70deea3a08866b Mon Sep 17 00:00:00 2001
From: Sebastian Gode <sebastian.gode@telekom.de>
Date: Tue, 3 Mar 2026 13:31:19 +0000
Subject: [PATCH 4/6] Added keywords

---
 tools/generate_meta.py | 115 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 102 insertions(+), 13 deletions(-)

diff --git a/tools/generate_meta.py b/tools/generate_meta.py
index 099704f..d332904 100755
--- a/tools/generate_meta.py
+++ b/tools/generate_meta.py
@@ -58,10 +58,34 @@ def extract_description(result):
     return first_sentence
 
 
+def extract_keywords(result):
+    """Extract keywords from API response and clean it."""
+    if "choices" in result and len(result["choices"]) > 0:
+        message = result["choices"][0].get("message", {})
+        keywords_text = message.get("content", "")
+    elif "response" in result:
+        keywords_text = result["response"].strip()
+    elif isinstance(result, dict) and "text" in result:
+        keywords_text = result["text"].strip()
+    else:
+        return None
+
+    keywords_text = keywords_text.strip()
+
+    if not keywords_text or keywords_text.isspace():
+        return None
+
+    keywords = [kw.strip() for kw in keywords_text.split(",")]
+    keywords = [kw for kw in keywords if kw and len(kw) > 0]
+    
+    keywords = keywords[:5]
+    
+    return ", ".join(keywords)
+
+
 def generate_description_with_llm(text, service_title, llm_api_url, model_name, api_username, api_password):
     """Generate a meta description using the llama.cpp /completion endpoint with up to 3 retries."""
-    # Limit content to first 500 chars for speed, focus on content not schema
-    content_preview = text[:500].replace("\n", " ")
+    content_preview = text[:2000].replace("\n", " ")
     prompt = (
         f"Generate a meta description (40-160 chars) for: {service_title}."
         f"Content preview: {content_preview}."
@@ -74,7 +98,6 @@ def generate_description_with_llm(text, service_title, llm_api_url, model_name,
         encoded_credentials = base64.b64encode(credentials.encode()).decode()
         headers["Authorization"] = f"Basic {encoded_credentials}"
 
-    # Try up to 3 times
     for attempt in range(3):
         try:
             response = requests.post(
@@ -84,7 +107,7 @@ def generate_description_with_llm(text, service_title, llm_api_url, model_name,
                         {"role": "user", "content": prompt},
                     ],
                     "model": model_name,
-                    "temperature": 0.2,
+                    "temperature": 0.5,
 
                     "top_k": 40,
                     "top_p": 0.9,
@@ -133,21 +156,80 @@ def generate_description_with_llm(text, service_title, llm_api_url, model_name,
     return f"{service_title} documentation"
 
 
+def generate_keywords_with_llm(text, service_title, llm_api_url, model_name, api_username, api_password):
+    """Generate keywords using the llama.cpp /completion endpoint with up to 3 retries."""
+    content_preview = text[:2000].replace("\n", " ")
+    prompt = (
+        f"Generate up to 5 keywords (comma-separated) for: {service_title}. "
+        f"Content preview: {content_preview}. "
+        f"Output ONLY comma-separated keywords, nothing else."
+    )
+
+    headers = {"Content-Type": "application/json"}
+    if api_username and api_password:
+        credentials = f"{api_username}:{api_password}"
+        encoded_credentials = base64.b64encode(credentials.encode()).decode()
+        headers["Authorization"] = f"Basic {encoded_credentials}"
+
+    for attempt in range(3):
+        try:
+            response = requests.post(
+                llm_api_url,
+                json={
+                    "messages": [
+                        {"role": "user", "content": prompt},
+                    ],
+                    "model": model_name,
+                    "temperature": 0.7,
+
+                    "top_k": 40,
+                    "top_p": 0.9,
+                    "min_p": 0.05,
+
+                    "repeat_last_n": 256,
+                    "repeat_penalty": 1.18,
+                    "presence_penalty": 0.2,
+                    "frequency_penalty": 0.2,
+
+                    "dry_multiplier": 0.8,
+                    "dry_base": 1.75,
+                    "dry_allowed_length": 2,
+                    "dry_penalty_last_n": -1,
+                    "chat_template_kwargs": {"enable_thinking": False},
+                },
+                headers=headers,
+                timeout=15,
+            )
+            response.raise_for_status()
+            result = response.json()
+            keywords = extract_keywords(result)
+            if keywords:
+                return keywords
+            logging.warning(f"Attempt {attempt + 1}: Empty or invalid response from LLM API for keywords.")
+        except requests.exceptions.RequestException as e:
+            logging.warning(f"Attempt {attempt + 1}: LLM API request failed: {e}. Retrying...")
+        except (KeyError, ValueError, IndexError) as e:
+            logging.warning(f"Attempt {attempt + 1}: LLM API response parsing failed: {e}. Retrying...")
+
+    logging.warning("All LLM API retries failed for keywords. Using fallback.")
+    return f"{service_title.replace('-', ' ').title()}"
+
+
 def read_rst_content(file_path):
     """Read and return the content of an RST file."""
     with open(file_path, "r", encoding="utf-8") as f:
         return f.read()
 
 
-def add_sphinx_metadata(file_path, meta_description):
+def add_sphinx_metadata(file_path, meta_description, meta_keywords=None):
     """Add Sphinx-compatible meta block at the end of an RST file."""
     content = read_rst_content(file_path)
 
-    meta_block = (
-        "\n"
-        ".. meta::\n"
-        "   :description: {}\n".format(meta_description)
-    )
+    meta_block = "\n.. meta::\n"
+    if meta_description:
+        meta_block += "   :description: {}\n".format(meta_description)
+    if meta_keywords:
+        meta_block += "   :keywords: {}\n".format(meta_keywords)
 
     # Check if meta block already exists
     if ".. meta::" in content:
@@ -237,7 +319,6 @@ def process_service(args, service):
 
     new_branch.checkout()
 
-    # Find all RST files in the documentation (doc/, umn/, api-ref/)
     rst_files = list(repo_dir.rglob("doc/**/*.rst")) + \
                 list(repo_dir.rglob("umn/**/*.rst")) + \
                 list(repo_dir.rglob("api-ref/**/*.rst"))
@@ -259,10 +340,18 @@ def process_service(args, service):
                 args.llm_username,
                 args.llm_password
             )
+            keywords = generate_keywords_with_llm(
+                content,
+                service["service_title"],
+                args.llm_api_url,
+                args.llm_model,
+                args.llm_username,
+                args.llm_password
+            )
 
-            if add_sphinx_metadata(rst_file, description):
+            if add_sphinx_metadata(rst_file, description, keywords):
                 updated_count += 1
-                logging.info(f"Added meta description to {rst_file}")
+                logging.info(f"Added meta description and keywords to {rst_file}")
             else:
                 processed_count += 1
 
-- 
2.34.1


From fff87e5cc651b0c60abc3f32d73f018cf51f7f21 Mon Sep 17 00:00:00 2001
From: Sebastian Gode <sebastian.gode@telekom.de>
Date: Tue, 3 Mar 2026 13:36:59 +0000
Subject: [PATCH 5/6] Fix pep8

---
 tools/generate_meta.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/tools/generate_meta.py b/tools/generate_meta.py
index d332904..f90470e 100755
--- a/tools/generate_meta.py
+++ b/tools/generate_meta.py
@@ -19,7 +19,6 @@ import logging
 import pathlib
 import requests
 import subprocess
-import sys
 
 from git import exc
 from git import Repo
@@ -77,9 +76,9 @@ def extract_keywords(result):
 
     keywords = [kw.strip() for kw in keywords_text.split(",")]
     keywords = [kw for kw in keywords if kw and len(kw) > 0]
-    
+
     keywords = keywords[:5]
-    
+
     return ", ".join(keywords)
 
 
@@ -319,9 +318,9 @@ def process_service(args, service):
 
     new_branch.checkout()
 
-    rst_files = list(repo_dir.rglob("doc/**/*.rst")) + \
-                list(repo_dir.rglob("umn/**/*.rst")) + \
-                list(repo_dir.rglob("api-ref/**/*.rst"))
+    rst_files = (list(repo_dir.rglob("doc/**/*.rst"))
+                 + list(repo_dir.rglob("umn/**/*.rst"))
+                 + list(repo_dir.rglob("api-ref/**/*.rst")))
 
     processed_count = 0
     updated_count = 0
@@ -457,4 +456,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
-- 
2.34.1


From 9aa8be461e294183c7289715d476d3198a8ba860 Mon Sep 17 00:00:00 2001
From: Sebastian Gode <sebastian.gode@telekom.de>
Date: Wed, 4 Mar 2026 10:23:13 +0000
Subject: [PATCH 6/6] Add new line

---
 tools/generate_meta.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/generate_meta.py b/tools/generate_meta.py
index f90470e..71bbcff 100755
--- a/tools/generate_meta.py
+++ b/tools/generate_meta.py
@@ -224,7 +224,7 @@ def add_sphinx_metadata(file_path, meta_description, meta_keywords=None):
     """Add Sphinx-compatible meta block at the end of an RST file."""
     content = read_rst_content(file_path)
 
-    meta_block = "\n.. meta::\n"
+    meta_block = "\n\n.. meta::\n"
     if meta_description:
         meta_block += "   :description: {}\n".format(meta_description)
     if meta_keywords:
-- 
2.34.1