diff --git a/otc_doc_convertor/convertor.py b/otc_doc_convertor/convertor.py index 5cf42271..b8ad1a5c 100644 --- a/otc_doc_convertor/convertor.py +++ b/otc_doc_convertor/convertor.py @@ -73,6 +73,29 @@ class OTCDocConvertor: or fname.lower() + "#" + ref in self.doc_links ) + def rawize_me(self, soup, expressions): + for to_rawize in expressions: + for p in soup.body.find_all(string=re.compile(to_rawize)): + if p.string and p.parent.name not in [ + "b", "strong", "pre", "code"]: + curr = p.string + part = re.search(to_rawize, curr) + # We should not escape inside of bold - this is wrong + if len(part.groups()) > 0: + logging.debug( + "Found element to rawize %s", part.group(1) + ) + new = curr.replace( + part.group(1), f"{part.group(1)}" + ) + logging.debug("Replacing string with: %s", new) + p.replace_with(bs4.BeautifulSoup(new, "html.parser")) + logging.debug("Replacing string with: %s", p.string) + else: + logging.error( + "Cannot find string for rawization anymore" + ) + def streamline_html(self, soup, file_name): # Drop eventual header duplicated anchors fname = file_name.replace(".html", "").lower() @@ -307,6 +330,18 @@ class OTCDocConvertor: #
{em.string}"
                 em.replace_with(bs4.BeautifulSoup(new, "html.parser"))
 
-        escape_asterisk_re = r"\((\*)[\.,]"
-        for p in soup.body.find_all(string=re.compile(escape_asterisk_re)):
-            if p.string and p.parent.name not in ["b", "strong", "pre"]:
-                curr = p.string
-                part = re.search(escape_asterisk_re, curr)
-                # If we have ` all files (*.*)` - no need to escape
-                if len(part.groups()) > 0:
-                    logging.debug(
-                        "Found asterisks to escape: %s", part.group(1)
-                    )
-                    new = curr.replace(
-                        part.group(1), f"{part.group(1)}"
-                    )
-                    p.replace_with(bs4.BeautifulSoup(new, "html.parser"))
+        # Incredibly dirty hacks:
+        rawize_expressions = [
+            # DWS Dev Guide harcodes
+            r"^(1\|\"iamtext\"\|\"iamvarchar\"\|2006-07-07\|12:00:00)$",
+            r"^(2\|\"iamtext\"\|\"iamvarchar\"\|2022-07-07\|19:00:02)$",
+            r"(\*max_files_per_process\*3)",
+            r"(&&, &&&, .* <#>)",
+            # r"(\*\+)",
+            r"(-\|-)",
+            r"(^-{8}$)"
+        ]
+        self.rawize_me(soup, rawize_expressions)
 
-        # And now specialities
+        # Special asterisks treatement
+        escape_asterisk_re = r"\((\*)[\.,]"
+        self.rawize_me(soup, [escape_asterisk_re])
+
+        # And now remaining specialities
         rawize_strings = [
             # "\*\*\*\*\*\*",
             # r"([\\\/\:\*\?\"\~|<>]{4,})"
@@ -363,27 +400,10 @@ class OTCDocConvertor:
             # it invalid for Sphinx. A bit weird regex:
             # "[:space:][:word:][:underscore:][:comma:]"
             r"\s([\w_]+_)[,]",
+            # DWS dirty-fixes part 2
+            r"/(\*\+)",
         ]
-        for to_rawize in rawize_strings:
-            for p in soup.body.find_all(string=re.compile(to_rawize)):
-                if p.string and p.parent.name not in ["b", "strong", "pre"]:
-                    curr = p.string
-                    part = re.search(to_rawize, curr)
-                    # We should not escape inside of bold - this is wrong
-                    if len(part.groups()) > 0:
-                        logging.debug(
-                            "Found element to rawize %s", part.group(1)
-                        )
-                        new = curr.replace(
-                            part.group(1), f"{part.group(1)}"
-                        )
-                        logging.debug("Replacing string with: %s", new)
-                        p.replace_with(bs4.BeautifulSoup(new, "html.parser"))
-                        logging.debug("Replacing string with: %s", p.string)
-                    else:
-                        logging.error(
-                            "Cannot find string for rawization anymore"
-                        )
+        self.rawize_me(soup, rawize_strings)
 
         # Pandoc seem to be not escaping properly asterists which are
         # immediately following non word chars
diff --git a/templates/conf.py b/templates/conf.py
index 55b6862f..8ce7e2dd 100644
--- a/templates/conf.py
+++ b/templates/conf.py
@@ -94,6 +94,9 @@ html_title = "{{ title }}"
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']
 
+# Do not include sources into the rendered results
+html_copy_source = False
+
 # -- Options for PDF output --------------------------------------------------
 latex_documents = [
 {%- if pdf_name is defined %}