forked from docs/doc-exports
add table header improvements opts
This commit is contained in:
parent
6a28ae6c96
commit
a65a0c20b6
23
process.py
23
process.py
@ -54,7 +54,7 @@ def build_doc_tree(metadata):
|
|||||||
return flat_tree
|
return flat_tree
|
||||||
|
|
||||||
|
|
||||||
def flatten_html(soup):
|
def flatten_html(soup, args):
|
||||||
for i in soup.body.find_all('div'):
|
for i in soup.body.find_all('div'):
|
||||||
if "note" in i.get('class', []):
|
if "note" in i.get('class', []):
|
||||||
del i['id']
|
del i['id']
|
||||||
@ -88,10 +88,24 @@ def flatten_html(soup):
|
|||||||
i.replace_with(figure)
|
i.replace_with(figure)
|
||||||
else:
|
else:
|
||||||
i.name = 'p'
|
i.name = 'p'
|
||||||
|
if args.improve_table_headers:
|
||||||
|
for th in soup.body.find_all('th'):
|
||||||
|
if hasattr(th, 'p') and th.p.string:
|
||||||
|
th.p.string = re.sub(
|
||||||
|
r'\b/\b',
|
||||||
|
' / ',
|
||||||
|
th.p.string)
|
||||||
for tbl in soup.body.find_all('table'):
|
for tbl in soup.body.find_all('table'):
|
||||||
tbl_id = tbl.get('id')
|
tbl_id = tbl.get('id')
|
||||||
if tbl_id:
|
if tbl_id:
|
||||||
tbl['id'] = re.sub('[-_]', '', tbl_id)
|
tbl['id'] = re.sub('[-_]', '', tbl_id)
|
||||||
|
for lnk in soup.body.find_all("a"):
|
||||||
|
if (
|
||||||
|
lnk.string
|
||||||
|
and re.match(r'\d+', lnk.string)
|
||||||
|
and lnk['href'].startswith('#')
|
||||||
|
):
|
||||||
|
lnk.unwrap()
|
||||||
|
|
||||||
return soup.body
|
return soup.body
|
||||||
|
|
||||||
@ -99,7 +113,10 @@ def flatten_html(soup):
|
|||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description='Process links.')
|
parser = argparse.ArgumentParser(description='Process links.')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'path', type=str, help='path to the files')
|
'path', type=str, help='path to the files')
|
||||||
|
parser.add_argument(
|
||||||
|
'--improve-table-headers', action='store_true',
|
||||||
|
help='Improve table headers by enforcing spaces around `/`')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
retval = os.getcwd()
|
retval = os.getcwd()
|
||||||
os.chdir(args.path)
|
os.chdir(args.path)
|
||||||
@ -172,7 +189,7 @@ def main():
|
|||||||
doc_anchors = dict()
|
doc_anchors = dict()
|
||||||
content = reader.read()
|
content = reader.read()
|
||||||
soup = bs4.BeautifulSoup(content, "lxml")
|
soup = bs4.BeautifulSoup(content, "lxml")
|
||||||
proc = flatten_html(soup)
|
proc = flatten_html(soup, args)
|
||||||
# Fix cross links
|
# Fix cross links
|
||||||
for lnk in proc.find_all("a"):
|
for lnk in proc.find_all("a"):
|
||||||
href = lnk.get('href')
|
href = lnk.get('href')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user