Update conversion script

- drop line wrapping (leaves lines long, but at least doesn't corrup
  indentation)
- repair cross links by replacing them to the relative path of the new
  name
- further extend new name calculation
- add input path argument
This commit is contained in:
Artem Goncharov 2022-03-11 18:02:51 +01:00
parent 446860da26
commit d96bbbe08f
4 changed files with 62 additions and 141 deletions

View File

@ -1 +0,0 @@
../../process.py

File diff suppressed because it is too large Load Diff

View File

@ -1,19 +1,22 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import argparse
import json import json
import os import os
import pathlib import pathlib
import re import re
import subprocess
import warnings import warnings
def get_new_name(current_name): def get_new_name(current_name):
new_name = current_name.replace(' - ', '_') new_name = current_name.replace(' - ', '_')
new_name = new_name.replace(' ', '_') new_name = new_name.replace(' ', '_')
new_name = new_name.replace('/', '_') new_name = new_name.replace('/', '_')
new_name = new_name.replace('\'', '') new_name = new_name.replace('\'', '')
new_name = new_name.replace('\"','') new_name = new_name.replace('"', '')
new_name = new_name.replace('\`','') new_name = new_name.replace('`', '')
new_name = new_name.replace('\´','') new_name = new_name.replace('´', '')
new_name = new_name.replace(':', '') new_name = new_name.replace(':', '')
new_name = new_name.replace('?', '') new_name = new_name.replace('?', '')
new_name = new_name.lower() new_name = new_name.lower()
@ -48,23 +51,37 @@ def build_doc_tree(metadata):
def main(): def main():
parser = argparse.ArgumentParser(description='Process links.')
parser.add_argument(
'path', type=str, help='path to the files')
args = parser.parse_args()
retval = os.getcwd()
os.chdir(args.path)
meta_data = json.loads(open("CLASS.TXT.json").read()) meta_data = json.loads(open("CLASS.TXT.json").read())
metadata_by_uri = dict() metadata_by_uri = dict()
metadata_by_code = dict() metadata_by_code = dict()
rename_matrix = dict()
table_re = re.compile(r'.*<table[^>]+ id="([^"]+)"') table_re = re.compile(r'.*<table[^>]+ id="([^"]+)"')
for f in meta_data: for f in meta_data:
f['new_name'] = get_new_name(f['title']) f['new_name'] = get_new_name(f['title'])
metadata_by_uri[f['uri']] = f metadata_by_uri[f['uri']] = f
metadata_by_code[f.get('code')] = f metadata_by_code[f.get('code')] = f
# Construct link renaming matrix
target_path = get_target_path(f['p_code'], metadata_by_code)
rename_matrix[f['uri']] = f"{target_path}/{f['new_name']}.html"
tree = build_doc_tree(metadata_by_code) tree = build_doc_tree(metadata_by_code)
pathlib.Path("temp/").mkdir(parents=True, exist_ok=True)
for f in pathlib.Path().glob("*.html"): for f in pathlib.Path().glob("*.html"):
if not f.name in metadata_by_uri: if not f.name in metadata_by_uri:
continue continue
_target = metadata_by_uri[f.name] _target = metadata_by_uri[f.name]
target = _target['new_name'] target = _target['new_name']
target_path = get_target_path(_target['p_code'], metadata_by_code) target_path = get_target_path(_target['p_code'], metadata_by_code)
target_deepness = target_path.count('/') + 1
pathlib.Path("temp/").mkdir(parents=True, exist_ok=True) pathlib.Path("temp/").mkdir(parents=True, exist_ok=True)
pathlib.Path("tmp_result/" + target_path).mkdir(parents=True, exist_ok=True) pathlib.Path("tmp_result/" + target_path).mkdir(parents=True, exist_ok=True)
pathlib.Path("result/" + target_path).mkdir(parents=True, exist_ok=True) pathlib.Path("result/" + target_path).mkdir(parents=True, exist_ok=True)
@ -79,12 +96,17 @@ def main():
if not line.startswith("<strong>Parent topic:</strong>"): if not line.startswith("<strong>Parent topic:</strong>"):
# Drop all divs # Drop all divs
processed_line = re.sub(r'<[/]?div[^>]*>', '', line) processed_line = re.sub(r'<[/]?div[^>]*>', '', line)
# Replace links to point to renamed files
for k, v in rename_matrix.items():
replace = ('../' * target_deepness) + v
processed_line = processed_line.replace(k, replace)
writer.write(processed_line) writer.write(processed_line)
# Convert html to rst # Convert html to rst
os.system( os.system(
f"pandoc 'temp/{target}.tmp' -f html " f"pandoc 'temp/{target}.tmp' -f html "
f"-o 'tmp_result/{target_path}/{target}.rst' " f"-o 'tmp_result/{target_path}/{target}.rst' "
f"--column 120 --ascii -s --wrap preserve" f"--ascii -s --wrap none"
) )
# Post processing of rendered rst # Post processing of rendered rst
with ( with (
@ -98,7 +120,7 @@ def main():
processed_line = re.sub(r'public_sys-resources/', '', processed_line) processed_line = re.sub(r'public_sys-resources/', '', processed_line)
processed_line = re.sub(r'image:: ', 'image:: /_static/images/', processed_line) processed_line = re.sub(r'image:: ', 'image:: /_static/images/', processed_line)
processed_line = re.sub(r' :name: .*$', '', processed_line) processed_line = re.sub(r' :name: .*$', '', processed_line)
processed_line = re.sub(r'**Parent topic:.*$', '', processed_line) processed_line = re.sub(r'\*\*Parent topic:.*$', '', processed_line)
processed_line = re.sub(r'.. code:: screen', '.. code-block::', processed_line) processed_line = re.sub(r'.. code:: screen', '.. code-block::', processed_line)
writer.write(processed_line) writer.write(processed_line)
# Generate indexes # Generate indexes
@ -130,6 +152,8 @@ def main():
f" of result/{path}/index.rst") f" of result/{path}/index.rst")
p.unlink() p.unlink()
os.chdir(retval)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

25
process_links.py Normal file
View File

@ -0,0 +1,25 @@
#!/usr/bin/env python3
import json
import argparse
import subprocess
def main():
parser = argparse.ArgumentParser(description='Process links.')
parser.add_argument(
'path', type=str, help='path to the files')
args = parser.parse_args()
matrix = json.loads(open("matrix.json").read())
for k, v in matrix.items():
replace = v.replace('/', '\/')
subprocess.run(
f"find {args.path} -name *'.rst' -type f -print0 | xargs"
f" -0 sed -i '' 's/{k}/{replace}/g'",
shell=True
)
print(k, v)
if __name__ == "__main__":
main()