diff --git a/otc_doc_convertor/convertor.py b/otc_doc_convertor/convertor.py
index 5cf42271..b8ad1a5c 100644
--- a/otc_doc_convertor/convertor.py
+++ b/otc_doc_convertor/convertor.py
@@ -73,6 +73,29 @@ class OTCDocConvertor:
or fname.lower() + "#" + ref in self.doc_links
)
+ def rawize_me(self, soup, expressions):
+ for to_rawize in expressions:
+ for p in soup.body.find_all(string=re.compile(to_rawize)):
+ if p.string and p.parent.name not in [
+ "b", "strong", "pre", "code"]:
+ curr = p.string
+ part = re.search(to_rawize, curr)
+ # We should not escape inside of bold - this is wrong
+ if len(part.groups()) > 0:
+ logging.debug(
+ "Found element to rawize %s", part.group(1)
+ )
+ new = curr.replace(
+ part.group(1), f"{part.group(1)}
"
+ )
+ logging.debug("Replacing string with: %s", new)
+ p.replace_with(bs4.BeautifulSoup(new, "html.parser"))
+ logging.debug("Replacing string with: %s", p.string)
+ else:
+ logging.error(
+ "Cannot find string for rawization anymore"
+ )
+
def streamline_html(self, soup, file_name):
# Drop eventual header duplicated anchors
fname = file_name.replace(".html", "").lower()
@@ -307,6 +330,18 @@ class OTCDocConvertor:
#
{em.string}" em.replace_with(bs4.BeautifulSoup(new, "html.parser")) - escape_asterisk_re = r"\((\*)[\.,]" - for p in soup.body.find_all(string=re.compile(escape_asterisk_re)): - if p.string and p.parent.name not in ["b", "strong", "pre"]: - curr = p.string - part = re.search(escape_asterisk_re, curr) - # If we have ` all files (*.*)` - no need to escape - if len(part.groups()) > 0: - logging.debug( - "Found asterisks to escape: %s", part.group(1) - ) - new = curr.replace( - part.group(1), f"{part.group(1)}
" - ) - p.replace_with(bs4.BeautifulSoup(new, "html.parser")) + # Incredibly dirty hacks: + rawize_expressions = [ + # DWS Dev Guide harcodes + r"^(1\|\"iamtext\"\|\"iamvarchar\"\|2006-07-07\|12:00:00)$", + r"^(2\|\"iamtext\"\|\"iamvarchar\"\|2022-07-07\|19:00:02)$", + r"(\*max_files_per_process\*3)", + r"(&&, &&&, .* <#>)", + # r"(\*\+)", + r"(-\|-)", + r"(^-{8}$)" + ] + self.rawize_me(soup, rawize_expressions) - # And now specialities + # Special asterisks treatement + escape_asterisk_re = r"\((\*)[\.,]" + self.rawize_me(soup, [escape_asterisk_re]) + + # And now remaining specialities rawize_strings = [ # "\*\*\*\*\*\*", # r"([\\\/\:\*\?\"\~|<>]{4,})" @@ -363,27 +400,10 @@ class OTCDocConvertor: # it invalid for Sphinx. A bit weird regex: # "[:space:][:word:][:underscore:][:comma:]" r"\s([\w_]+_)[,]", + # DWS dirty-fixes part 2 + r"/(\*\+)", ] - for to_rawize in rawize_strings: - for p in soup.body.find_all(string=re.compile(to_rawize)): - if p.string and p.parent.name not in ["b", "strong", "pre"]: - curr = p.string - part = re.search(to_rawize, curr) - # We should not escape inside of bold - this is wrong - if len(part.groups()) > 0: - logging.debug( - "Found element to rawize %s", part.group(1) - ) - new = curr.replace( - part.group(1), f"{part.group(1)}
" - ) - logging.debug("Replacing string with: %s", new) - p.replace_with(bs4.BeautifulSoup(new, "html.parser")) - logging.debug("Replacing string with: %s", p.string) - else: - logging.error( - "Cannot find string for rawization anymore" - ) + self.rawize_me(soup, rawize_strings) # Pandoc seem to be not escaping properly asterists which are # immediately following non word chars diff --git a/templates/conf.py b/templates/conf.py index 55b6862f..8ce7e2dd 100644 --- a/templates/conf.py +++ b/templates/conf.py @@ -94,6 +94,9 @@ html_title = "{{ title }}" # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] +# Do not include sources into the rendered results +html_copy_source = False + # -- Options for PDF output -------------------------------------------------- latex_documents = [ {%- if pdf_name is defined %}