Various fixes for MRS component operation guide

Reviewed-by: Hasko, Vladimir <vladimir.hasko@t-systems.com>
Co-authored-by: gtema <artem.goncharov@gmail.com>
Co-committed-by: gtema <artem.goncharov@gmail.com>
This commit is contained in:
gtema 2022-11-22 17:11:07 +00:00 committed by zuul
parent 6ffc35f074
commit c5ad20504f

View File

@ -221,7 +221,7 @@ class OTCDocConvertor:
else:
logging.debug(
"Not placing replaced anchor %s "
" since it already existed",
"since it already existed",
local_ref,
)
@ -319,17 +319,6 @@ class OTCDocConvertor:
new = f"<code>{em.string}</code>"
em.replace_with(bs4.BeautifulSoup(new, "html.parser"))
for p in soup.body.find_all(string=re.compile(r"(/\*).+")):
if p.string and p.parent.name == "p":
p.string.replace_with(p.string.replace("/*", "/``*``"))
# MRS UMN contains: /opt/Bigdata/FusionInsight_Porter_8.*/foo-*/
# This is a pretty special case and we do not want to apply that widely
# therefore only looking for [.-]*/ combinations
for p in soup.body.find_all(string=re.compile(r"([\.-]\*/).+")):
if p.string and p.parent.name == "p":
p.string.replace_with(p.string.replace("*/", "``*``/"))
escape_asterisk_re = r"\((\*)[\.,]"
for p in soup.body.find_all(string=re.compile(escape_asterisk_re)):
if p.string and p.parent.name not in ["b", "strong", "pre"]:
@ -356,6 +345,8 @@ class OTCDocConvertor:
r"\(([\W\x60_]{10,})\)",
# MRS UMN contain: /:*?"<>|\\;&,'`!{}[]$%+
r"\s([^a-zA-Z0-9\s]{8,})",
# MRS operation guide contain: /*+ MAPJOIN(join_table) \*/
r"\s(/\*.*\*/)",
# BMS API contain sequence in a dedicated paragraph
r"^([^a-zA-Z0-9\s]{10,})$",
# OBS special chars - "\$" "\\" etc
@ -364,6 +355,8 @@ class OTCDocConvertor:
r"\s(urn:smn:\(.*)\.",
# "-" only (in tables) is considered as list
r"^(-)$",
# MRS component guide has: "./mydate_\\\\d*/"
r"\w(_)\\",
]
for to_rawize in rawize_strings:
for p in soup.body.find_all(string=re.compile(to_rawize)):
@ -386,6 +379,20 @@ class OTCDocConvertor:
"Cannot find string for rawization anymore"
)
# Pandoc seem to be not escaping properly asterists which are
# immediately following non word chars
# (https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-44)
# NOTE(gtema):
# 1. this is on purpose placed here since we want to have some special
# escapings above
# 2. we are not escaping asterisks at the end of the paragraphs (pandoc
# deals correctly with that)
re_escape = re.compile(r"([-:/'\"<\([{])(\*+)(.+)")
for p in soup.body.find_all(string=re_escape):
if p.string and p.parent.name == "p":
p.string.replace_with(
re.sub(re_escape, r"\1``\2``\3", p.string))
# Drop parent link at the bottom of the page
for parent in soup.body.find_all("p", class_="familylinks"):
parent.decompose()
@ -555,8 +562,20 @@ class OTCDocConvertor:
writer.write(f".. _{f.name.replace('.html', '')}:\n\n")
# post process some usual stuff
for line in reader.readlines():
processed_line = re.sub(r"\.\.\\\\_", ".. _", line)
processed_line = re.sub(r"\.\.\\_", ".. _", processed_line)
processed_line = re.sub(
r"\.\.\\\\_(.*):$", r".. _\1:", line)
# replace anchor when it is itself inside some other block
# (i.e. table)
processed_line = re.sub(
r"\.\.\\\\_(.*):\s", r".. _\1: ", processed_line)
# For some reason regex locally and in zuul are not
# behaving same - thus same but different
processed_line = re.sub(
r"\.\.\\_(.*):$", r".. _\1:", processed_line)
# replace anchor when it is itself inside some other block
# (i.e. table)
processed_line = re.sub(
r"\.\.\\_(.*):\s", r".. _\1: ", processed_line)
# We could get unwanted anchors from pandoc - get rid of
# them
anchor = re.search(r"\.\. \_(.*):", processed_line)
@ -566,7 +585,9 @@ class OTCDocConvertor:
):
# This is most likely some duplicated anchor. It is
# not referred from any other place so drop it
logging.info("Dropping not referred anchor")
logging.info(
"Dropping not referred anchor '%s'",
anchor.group(1))
continue
processed_line = re.sub(