forked from docs/doc-exports
Various fixes for MRS component operation guide
Reviewed-by: Hasko, Vladimir <vladimir.hasko@t-systems.com> Co-authored-by: gtema <artem.goncharov@gmail.com> Co-committed-by: gtema <artem.goncharov@gmail.com>
This commit is contained in:
parent
6ffc35f074
commit
c5ad20504f
@ -221,7 +221,7 @@ class OTCDocConvertor:
|
||||
else:
|
||||
logging.debug(
|
||||
"Not placing replaced anchor %s "
|
||||
" since it already existed",
|
||||
"since it already existed",
|
||||
local_ref,
|
||||
)
|
||||
|
||||
@ -319,17 +319,6 @@ class OTCDocConvertor:
|
||||
new = f"<code>{em.string}</code>"
|
||||
em.replace_with(bs4.BeautifulSoup(new, "html.parser"))
|
||||
|
||||
for p in soup.body.find_all(string=re.compile(r"(/\*).+")):
|
||||
if p.string and p.parent.name == "p":
|
||||
p.string.replace_with(p.string.replace("/*", "/``*``"))
|
||||
|
||||
# MRS UMN contains: /opt/Bigdata/FusionInsight_Porter_8.*/foo-*/
|
||||
# This is a pretty special case and we do not want to apply that widely
|
||||
# therefore only looking for [.-]*/ combinations
|
||||
for p in soup.body.find_all(string=re.compile(r"([\.-]\*/).+")):
|
||||
if p.string and p.parent.name == "p":
|
||||
p.string.replace_with(p.string.replace("*/", "``*``/"))
|
||||
|
||||
escape_asterisk_re = r"\((\*)[\.,]"
|
||||
for p in soup.body.find_all(string=re.compile(escape_asterisk_re)):
|
||||
if p.string and p.parent.name not in ["b", "strong", "pre"]:
|
||||
@ -356,6 +345,8 @@ class OTCDocConvertor:
|
||||
r"\(([\W\x60_]{10,})\)",
|
||||
# MRS UMN contain: /:*?"<>|\\;&,'`!{}[]$%+
|
||||
r"\s([^a-zA-Z0-9\s]{8,})",
|
||||
# MRS operation guide contain: /*+ MAPJOIN(join_table) \*/
|
||||
r"\s(/\*.*\*/)",
|
||||
# BMS API contain sequence in a dedicated paragraph
|
||||
r"^([^a-zA-Z0-9\s]{10,})$",
|
||||
# OBS special chars - "\$" "\\" etc
|
||||
@ -364,6 +355,8 @@ class OTCDocConvertor:
|
||||
r"\s(urn:smn:\(.*)\.",
|
||||
# "-" only (in tables) is considered as list
|
||||
r"^(-)$",
|
||||
# MRS component guide has: "./mydate_\\\\d*/"
|
||||
r"\w(_)\\",
|
||||
]
|
||||
for to_rawize in rawize_strings:
|
||||
for p in soup.body.find_all(string=re.compile(to_rawize)):
|
||||
@ -386,6 +379,20 @@ class OTCDocConvertor:
|
||||
"Cannot find string for rawization anymore"
|
||||
)
|
||||
|
||||
# Pandoc seem to be not escaping properly asterists which are
|
||||
# immediately following non word chars
|
||||
# (https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-44)
|
||||
# NOTE(gtema):
|
||||
# 1. this is on purpose placed here since we want to have some special
|
||||
# escapings above
|
||||
# 2. we are not escaping asterisks at the end of the paragraphs (pandoc
|
||||
# deals correctly with that)
|
||||
re_escape = re.compile(r"([-:/'\"<\([{])(\*+)(.+)")
|
||||
for p in soup.body.find_all(string=re_escape):
|
||||
if p.string and p.parent.name == "p":
|
||||
p.string.replace_with(
|
||||
re.sub(re_escape, r"\1``\2``\3", p.string))
|
||||
|
||||
# Drop parent link at the bottom of the page
|
||||
for parent in soup.body.find_all("p", class_="familylinks"):
|
||||
parent.decompose()
|
||||
@ -555,8 +562,20 @@ class OTCDocConvertor:
|
||||
writer.write(f".. _{f.name.replace('.html', '')}:\n\n")
|
||||
# post process some usual stuff
|
||||
for line in reader.readlines():
|
||||
processed_line = re.sub(r"\.\.\\\\_", ".. _", line)
|
||||
processed_line = re.sub(r"\.\.\\_", ".. _", processed_line)
|
||||
processed_line = re.sub(
|
||||
r"\.\.\\\\_(.*):$", r".. _\1:", line)
|
||||
# replace anchor when it is itself inside some other block
|
||||
# (i.e. table)
|
||||
processed_line = re.sub(
|
||||
r"\.\.\\\\_(.*):\s", r".. _\1: ", processed_line)
|
||||
# For some reason regex locally and in zuul are not
|
||||
# behaving same - thus same but different
|
||||
processed_line = re.sub(
|
||||
r"\.\.\\_(.*):$", r".. _\1:", processed_line)
|
||||
# replace anchor when it is itself inside some other block
|
||||
# (i.e. table)
|
||||
processed_line = re.sub(
|
||||
r"\.\.\\_(.*):\s", r".. _\1: ", processed_line)
|
||||
# We could get unwanted anchors from pandoc - get rid of
|
||||
# them
|
||||
anchor = re.search(r"\.\. \_(.*):", processed_line)
|
||||
@ -566,7 +585,9 @@ class OTCDocConvertor:
|
||||
):
|
||||
# This is most likely some duplicated anchor. It is
|
||||
# not referred from any other place so drop it
|
||||
logging.info("Dropping not referred anchor")
|
||||
logging.info(
|
||||
"Dropping not referred anchor '%s'",
|
||||
anchor.group(1))
|
||||
continue
|
||||
|
||||
processed_line = re.sub(
|
||||
|
Loading…
x
Reference in New Issue
Block a user