From 4be65edac3b3eb1d39261d12cbcb9a63706931db Mon Sep 17 00:00:00 2001 From: gtema Date: Wed, 16 Nov 2022 11:11:09 +0000 Subject: [PATCH] Escape .*/ and -*/ asterisks inside paragraphs Reviewed-by: Hasko, Vladimir Co-authored-by: gtema Co-committed-by: gtema --- otc_doc_convertor/convertor.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/otc_doc_convertor/convertor.py b/otc_doc_convertor/convertor.py index 5a6032fc..3bfbe3a3 100644 --- a/otc_doc_convertor/convertor.py +++ b/otc_doc_convertor/convertor.py @@ -323,6 +323,13 @@ class OTCDocConvertor: if p.string and p.parent.name == "p": p.string.replace_with(p.string.replace("/*", "/``*``")) + # MRS UMN contains: /opt/Bigdata/FusionInsight_Porter_8.*/foo-*/ + # This is a pretty special case and we do not want to apply that widely + # therefore only looking for [.-]*/ combinations + for p in soup.body.find_all(string=re.compile(r"([\.-]\*/).+")): + if p.string and p.parent.name == "p": + p.string.replace_with(p.string.replace("*/", "``*``/")) + escape_asterisk_re = r"\((\*)[\.,]" for p in soup.body.find_all(string=re.compile(escape_asterisk_re)): if p.string and p.parent.name not in ["b", "strong", "pre"]: @@ -337,7 +344,6 @@ class OTCDocConvertor: part.group(1), f"{part.group(1)}" ) p.replace_with(bs4.BeautifulSoup(new, "html.parser")) - # And now specialities rawize_strings = [ # "\*\*\*\*\*\*",