forked from docs/doc-exports
240 lines
8.4 KiB
Python
240 lines
8.4 KiB
Python
#!/usr/bin/env python3
|
||
|
||
import argparse
|
||
import bs4
|
||
import json
|
||
import os
|
||
import pathlib
|
||
import re
|
||
import subprocess
|
||
import warnings
|
||
|
||
|
||
def get_new_name(current_name):
|
||
new_name = current_name.replace(' - ', '_')
|
||
new_name = new_name.replace(' ', '_')
|
||
new_name = new_name.replace('/', '_')
|
||
new_name = new_name.replace('\'', '')
|
||
new_name = new_name.replace('"', '')
|
||
new_name = new_name.replace('`', '')
|
||
new_name = new_name.replace('´', '')
|
||
new_name = new_name.replace(':', '')
|
||
new_name = new_name.replace('?', '')
|
||
new_name = new_name.lower()
|
||
return new_name
|
||
|
||
|
||
def get_target_path(code, metadata, path=''):
|
||
if code in metadata:
|
||
current = metadata[code]
|
||
if not current.get('p_code'):
|
||
return current['new_name']
|
||
else:
|
||
return (
|
||
"{0}/{1}".format(
|
||
get_target_path(current['p_code'], metadata),
|
||
current['new_name'])
|
||
)
|
||
else:
|
||
return ''
|
||
|
||
|
||
def build_doc_tree(metadata):
|
||
flat_tree = dict()
|
||
for k, v in metadata.items():
|
||
parent_id = v.get('p_code')
|
||
if not parent_id:
|
||
parent_id = 0
|
||
|
||
if parent_id not in flat_tree:
|
||
flat_tree[parent_id] = list()
|
||
flat_tree[parent_id].append(v)
|
||
return flat_tree
|
||
|
||
|
||
def flatten_html(soup):
|
||
for i in soup.body.find_all('div'):
|
||
if "note" in i.get('class', []):
|
||
del i['id']
|
||
if i.img:
|
||
i.img.decompose()
|
||
notetitle = i.find('span', class_='notetitle')
|
||
if notetitle:
|
||
title = soup.new_tag('div')
|
||
title['class'] = 'title'
|
||
title.string = 'Note:'
|
||
notetitle.replace_with(title)
|
||
elif "notice" in i.get('class', []):
|
||
del i['id']
|
||
if i.img:
|
||
i.img.decompose()
|
||
i['class'] = 'important'
|
||
elif "caution" in i.get('class', []):
|
||
del i['id']
|
||
if i.img:
|
||
i.img.decompose()
|
||
elif "fignone" in i.get('class', []):
|
||
figure = soup.new_tag('figure')
|
||
img = i.find('img')
|
||
cap = i.find('span', class_='figcap')
|
||
if cap is not None:
|
||
cap.name = 'figcaption'
|
||
figure.append(cap)
|
||
if img:
|
||
img['src'] = '/_static/images/' + img['src']
|
||
figure.append(img)
|
||
i.replace_with(figure)
|
||
else:
|
||
i.name = 'p'
|
||
for tbl in soup.body.find_all('table'):
|
||
tbl_id = tbl.get('id')
|
||
if tbl_id:
|
||
tbl['id'] = re.sub('[-_]', '', tbl_id)
|
||
return soup.body
|
||
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(description='Process links.')
|
||
parser.add_argument(
|
||
'path', type=str, help='path to the files')
|
||
args = parser.parse_args()
|
||
retval = os.getcwd()
|
||
os.chdir(args.path)
|
||
meta_data = json.loads(open("CLASS.TXT.json").read())
|
||
metadata_by_uri = dict()
|
||
metadata_by_code = dict()
|
||
rename_matrix = dict()
|
||
table_re = re.compile(r'.*<table[^>]+ id="([^"]+)"')
|
||
for f in meta_data:
|
||
f['new_name'] = get_new_name(f['title'])
|
||
metadata_by_uri[f['uri']] = f
|
||
metadata_by_code[f.get('code')] = f
|
||
|
||
tree = build_doc_tree(metadata_by_code)
|
||
|
||
for f in meta_data:
|
||
# Construct link renaming matrix
|
||
target_path = get_target_path(f['p_code'], metadata_by_code)
|
||
name = f["new_name"] if not tree.get(f['code']) else "index"
|
||
rename_matrix[f['uri']] = f"{target_path}/{name}.html"
|
||
|
||
pathlib.Path("temp/").mkdir(parents=True, exist_ok=True)
|
||
|
||
for f in pathlib.Path().glob("*.html"):
|
||
if f.name not in metadata_by_uri:
|
||
continue
|
||
_target = metadata_by_uri[f.name]
|
||
target = _target['new_name']
|
||
target_path = get_target_path(_target['p_code'], metadata_by_code)
|
||
target_deepness = target_path.count('/') + 1
|
||
pathlib.Path("temp/").mkdir(parents=True, exist_ok=True)
|
||
pathlib.Path("tmp_result/" + target_path).mkdir(
|
||
parents=True, exist_ok=True)
|
||
pathlib.Path("result/" + target_path).mkdir(
|
||
parents=True, exist_ok=True)
|
||
|
||
# Pre-processing of html content
|
||
with open(f, 'r') as reader, open(f"temp/{target}.tmp", 'w') as writer:
|
||
print(f"Processing {target}")
|
||
doc_anchors = dict()
|
||
content = reader.read()
|
||
soup = bs4.BeautifulSoup(content, "lxml")
|
||
proc = flatten_html(soup)
|
||
# Fix cross links
|
||
for lnk in proc.find_all("a"):
|
||
href = lnk.get('href')
|
||
if href:
|
||
page_url = ''
|
||
href_parts = href.split('#')
|
||
if href_parts[0] in rename_matrix:
|
||
page_url = ('../' * target_deepness) + \
|
||
rename_matrix[href_parts[0]]
|
||
else:
|
||
page_url = href_parts[0]
|
||
if len(href_parts) > 1:
|
||
lnk['href'] = (
|
||
f"{page_url}#" +
|
||
re.sub('[-_]', '', href_parts[1]).lower()
|
||
)
|
||
else:
|
||
lnk['href'] = lnk['href'].replace(
|
||
href_parts[0],
|
||
page_url)
|
||
if not href:
|
||
lnk_name = lnk.get('name')
|
||
if (
|
||
lnk_name and not lnk.string
|
||
and lnk_name not in doc_anchors
|
||
):
|
||
lnk['id'] = lnk_name
|
||
doc_anchors[lnk_name] = 1
|
||
#writer.write(str(proc))
|
||
for line in str(proc).splitlines():
|
||
table_match = table_re.match(line)
|
||
if table_match:
|
||
writer.write(f".. _{table_match.group(1)}:\n\n")
|
||
if not line.startswith("<strong>Parent topic:</strong>"):
|
||
processed_line = line
|
||
writer.write(processed_line + '\n')
|
||
# Convert html to rst
|
||
os.system(
|
||
f"pandoc 'temp/{target}.tmp' -f html "
|
||
f"-o 'tmp_result/{target_path}/{target}.rst' "
|
||
f"--ascii -s --wrap none"
|
||
)
|
||
# Post processing of rendered rst
|
||
with (
|
||
open(f"tmp_result/{target_path}/{target}.rst", 'r') as reader,
|
||
open(f"result/{target_path}/{target}.rst", 'w') as writer
|
||
):
|
||
print(f"Post processing {target}")
|
||
for line in reader.readlines():
|
||
processed_line = re.sub(r'\.\. \\_', '\n\n.. _', line)
|
||
processed_line = re.sub(r'√', 'Y', processed_line)
|
||
processed_line = re.sub(
|
||
r'public_sys-resources/', '', processed_line)
|
||
processed_line = re.sub(
|
||
r'image:: ', 'image:: /_static/images/', processed_line)
|
||
processed_line = re.sub(
|
||
r' :name: .*$', '', processed_line)
|
||
processed_line = re.sub(
|
||
r'\*\*Parent topic:.*$', '', processed_line)
|
||
processed_line = re.sub(
|
||
r'.. code:: screen', '.. code-block::', processed_line)
|
||
processed_line = re.sub(
|
||
r'.. code:: codeblock', '.. code-block::', processed_line)
|
||
writer.write(processed_line)
|
||
# Generate indexes
|
||
for k, v in tree.items():
|
||
path = ''
|
||
title = 'Main Index'
|
||
if k != 0:
|
||
curr = metadata_by_code[k]
|
||
title = curr['title']
|
||
path = get_target_path(curr['code'], metadata_by_code)
|
||
with open(f"result/{path}/index.rst", "w") as index:
|
||
index.write('='*(len(title)) + '\n')
|
||
index.write(title + '\n')
|
||
index.write('='*(len(title)) + '\n')
|
||
index.write('\n')
|
||
index.write('.. toctree::\n')
|
||
index.write(' :maxdepth: 1\n\n')
|
||
for child in v:
|
||
new_name = child['new_name']
|
||
if child['code'] in tree:
|
||
# If this is folder - add /index
|
||
new_name = new_name + '/index'
|
||
index.write(f" {new_name}\n")
|
||
|
||
p = pathlib.Path(f"result/{path}.rst")
|
||
if p.exists():
|
||
print(f"{p.resolve()} is removed in favour"
|
||
f" of result/{path}/index.rst")
|
||
p.unlink()
|
||
|
||
os.chdir(retval)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|