forked from docs/doc-exports
Various fixes for MRS component operation guide
Reviewed-by: Hasko, Vladimir <vladimir.hasko@t-systems.com> Co-authored-by: gtema <artem.goncharov@gmail.com> Co-committed-by: gtema <artem.goncharov@gmail.com>
This commit is contained in:
parent
6ffc35f074
commit
c5ad20504f
@ -221,7 +221,7 @@ class OTCDocConvertor:
|
|||||||
else:
|
else:
|
||||||
logging.debug(
|
logging.debug(
|
||||||
"Not placing replaced anchor %s "
|
"Not placing replaced anchor %s "
|
||||||
" since it already existed",
|
"since it already existed",
|
||||||
local_ref,
|
local_ref,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -319,17 +319,6 @@ class OTCDocConvertor:
|
|||||||
new = f"<code>{em.string}</code>"
|
new = f"<code>{em.string}</code>"
|
||||||
em.replace_with(bs4.BeautifulSoup(new, "html.parser"))
|
em.replace_with(bs4.BeautifulSoup(new, "html.parser"))
|
||||||
|
|
||||||
for p in soup.body.find_all(string=re.compile(r"(/\*).+")):
|
|
||||||
if p.string and p.parent.name == "p":
|
|
||||||
p.string.replace_with(p.string.replace("/*", "/``*``"))
|
|
||||||
|
|
||||||
# MRS UMN contains: /opt/Bigdata/FusionInsight_Porter_8.*/foo-*/
|
|
||||||
# This is a pretty special case and we do not want to apply that widely
|
|
||||||
# therefore only looking for [.-]*/ combinations
|
|
||||||
for p in soup.body.find_all(string=re.compile(r"([\.-]\*/).+")):
|
|
||||||
if p.string and p.parent.name == "p":
|
|
||||||
p.string.replace_with(p.string.replace("*/", "``*``/"))
|
|
||||||
|
|
||||||
escape_asterisk_re = r"\((\*)[\.,]"
|
escape_asterisk_re = r"\((\*)[\.,]"
|
||||||
for p in soup.body.find_all(string=re.compile(escape_asterisk_re)):
|
for p in soup.body.find_all(string=re.compile(escape_asterisk_re)):
|
||||||
if p.string and p.parent.name not in ["b", "strong", "pre"]:
|
if p.string and p.parent.name not in ["b", "strong", "pre"]:
|
||||||
@ -356,6 +345,8 @@ class OTCDocConvertor:
|
|||||||
r"\(([\W\x60_]{10,})\)",
|
r"\(([\W\x60_]{10,})\)",
|
||||||
# MRS UMN contain: /:*?"<>|\\;&,'`!{}[]$%+
|
# MRS UMN contain: /:*?"<>|\\;&,'`!{}[]$%+
|
||||||
r"\s([^a-zA-Z0-9\s]{8,})",
|
r"\s([^a-zA-Z0-9\s]{8,})",
|
||||||
|
# MRS operation guide contain: /*+ MAPJOIN(join_table) \*/
|
||||||
|
r"\s(/\*.*\*/)",
|
||||||
# BMS API contain sequence in a dedicated paragraph
|
# BMS API contain sequence in a dedicated paragraph
|
||||||
r"^([^a-zA-Z0-9\s]{10,})$",
|
r"^([^a-zA-Z0-9\s]{10,})$",
|
||||||
# OBS special chars - "\$" "\\" etc
|
# OBS special chars - "\$" "\\" etc
|
||||||
@ -364,6 +355,8 @@ class OTCDocConvertor:
|
|||||||
r"\s(urn:smn:\(.*)\.",
|
r"\s(urn:smn:\(.*)\.",
|
||||||
# "-" only (in tables) is considered as list
|
# "-" only (in tables) is considered as list
|
||||||
r"^(-)$",
|
r"^(-)$",
|
||||||
|
# MRS component guide has: "./mydate_\\\\d*/"
|
||||||
|
r"\w(_)\\",
|
||||||
]
|
]
|
||||||
for to_rawize in rawize_strings:
|
for to_rawize in rawize_strings:
|
||||||
for p in soup.body.find_all(string=re.compile(to_rawize)):
|
for p in soup.body.find_all(string=re.compile(to_rawize)):
|
||||||
@ -386,6 +379,20 @@ class OTCDocConvertor:
|
|||||||
"Cannot find string for rawization anymore"
|
"Cannot find string for rawization anymore"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Pandoc seem to be not escaping properly asterists which are
|
||||||
|
# immediately following non word chars
|
||||||
|
# (https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-44)
|
||||||
|
# NOTE(gtema):
|
||||||
|
# 1. this is on purpose placed here since we want to have some special
|
||||||
|
# escapings above
|
||||||
|
# 2. we are not escaping asterisks at the end of the paragraphs (pandoc
|
||||||
|
# deals correctly with that)
|
||||||
|
re_escape = re.compile(r"([-:/'\"<\([{])(\*+)(.+)")
|
||||||
|
for p in soup.body.find_all(string=re_escape):
|
||||||
|
if p.string and p.parent.name == "p":
|
||||||
|
p.string.replace_with(
|
||||||
|
re.sub(re_escape, r"\1``\2``\3", p.string))
|
||||||
|
|
||||||
# Drop parent link at the bottom of the page
|
# Drop parent link at the bottom of the page
|
||||||
for parent in soup.body.find_all("p", class_="familylinks"):
|
for parent in soup.body.find_all("p", class_="familylinks"):
|
||||||
parent.decompose()
|
parent.decompose()
|
||||||
@ -555,8 +562,20 @@ class OTCDocConvertor:
|
|||||||
writer.write(f".. _{f.name.replace('.html', '')}:\n\n")
|
writer.write(f".. _{f.name.replace('.html', '')}:\n\n")
|
||||||
# post process some usual stuff
|
# post process some usual stuff
|
||||||
for line in reader.readlines():
|
for line in reader.readlines():
|
||||||
processed_line = re.sub(r"\.\.\\\\_", ".. _", line)
|
processed_line = re.sub(
|
||||||
processed_line = re.sub(r"\.\.\\_", ".. _", processed_line)
|
r"\.\.\\\\_(.*):$", r".. _\1:", line)
|
||||||
|
# replace anchor when it is itself inside some other block
|
||||||
|
# (i.e. table)
|
||||||
|
processed_line = re.sub(
|
||||||
|
r"\.\.\\\\_(.*):\s", r".. _\1: ", processed_line)
|
||||||
|
# For some reason regex locally and in zuul are not
|
||||||
|
# behaving same - thus same but different
|
||||||
|
processed_line = re.sub(
|
||||||
|
r"\.\.\\_(.*):$", r".. _\1:", processed_line)
|
||||||
|
# replace anchor when it is itself inside some other block
|
||||||
|
# (i.e. table)
|
||||||
|
processed_line = re.sub(
|
||||||
|
r"\.\.\\_(.*):\s", r".. _\1: ", processed_line)
|
||||||
# We could get unwanted anchors from pandoc - get rid of
|
# We could get unwanted anchors from pandoc - get rid of
|
||||||
# them
|
# them
|
||||||
anchor = re.search(r"\.\. \_(.*):", processed_line)
|
anchor = re.search(r"\.\. \_(.*):", processed_line)
|
||||||
@ -566,7 +585,9 @@ class OTCDocConvertor:
|
|||||||
):
|
):
|
||||||
# This is most likely some duplicated anchor. It is
|
# This is most likely some duplicated anchor. It is
|
||||||
# not referred from any other place so drop it
|
# not referred from any other place so drop it
|
||||||
logging.info("Dropping not referred anchor")
|
logging.info(
|
||||||
|
"Dropping not referred anchor '%s'",
|
||||||
|
anchor.group(1))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
processed_line = re.sub(
|
processed_line = re.sub(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user