forked from docs/doc-exports
Compare commits
1 Commits
main
...
split-conv
Author | SHA1 | Date | |
---|---|---|---|
9ced3b3fb0 |
@ -1,95 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import logging
|
|
||||||
import requests
|
|
||||||
import pathlib
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
|
|
||||||
|
|
||||||
def body_filter(tag):
|
|
||||||
return (
|
|
||||||
tag.name == "div"
|
|
||||||
and tag.has_attr("id")
|
|
||||||
and tag["id"].startswith("body")
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def simplify_body(data):
|
|
||||||
return data.get_text().replace(" ", "")
|
|
||||||
|
|
||||||
|
|
||||||
class OTCComparator:
|
|
||||||
|
|
||||||
def compare(self, url_prefix, file_path, file_name):
|
|
||||||
try:
|
|
||||||
data = requests.get(
|
|
||||||
f"https://docs.otc.t-systems.com/{url_prefix}/"
|
|
||||||
f"{file_name}.json")
|
|
||||||
page_data = None
|
|
||||||
for item in data.json():
|
|
||||||
if (
|
|
||||||
item.get("url").endswith(f"{file_name}.html")
|
|
||||||
and item['content']
|
|
||||||
):
|
|
||||||
page_data = item["content"]
|
|
||||||
break
|
|
||||||
original = BeautifulSoup(page_data, 'html.parser')
|
|
||||||
with open(f"{file_path}/{file_name}.html", "r") as f:
|
|
||||||
new_content = f.read()
|
|
||||||
new = BeautifulSoup(new_content, 'html.parser')
|
|
||||||
t1 = original.find(body_filter)
|
|
||||||
t2 = new.find(body_filter)
|
|
||||||
if t1 != t2:
|
|
||||||
if simplify_body(t1) == simplify_body(t2):
|
|
||||||
logging.error(
|
|
||||||
"File %s is not matching, but "
|
|
||||||
"plain text matches" % file_name)
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
logging.error("File %s mismatches" % file_name)
|
|
||||||
logging.debug(
|
|
||||||
"Proposed content: %s" %
|
|
||||||
t2.get_text().encode("unicode_escape").decode("utf-8"))
|
|
||||||
logging.debug(
|
|
||||||
"Current content: %s" %
|
|
||||||
t1.get_text().encode("unicode_escape").decode("utf-8"))
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
logging.info("Content matches")
|
|
||||||
return True
|
|
||||||
except Exception as ex:
|
|
||||||
logging.error("Content comparison error %s" % ex)
|
|
||||||
return False
|
|
||||||
|
|
||||||
def main(self):
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
|
||||||
parser = argparse.ArgumentParser(description="Compare document data.")
|
|
||||||
parser.add_argument(
|
|
||||||
"path",
|
|
||||||
type=str,
|
|
||||||
help="Path to the document content (i.e. docs/ecs/api-ref")
|
|
||||||
parser.add_argument(
|
|
||||||
"url",
|
|
||||||
type=str,
|
|
||||||
help="url prefix in the helpcenter (i.e. api/ecs)")
|
|
||||||
args = parser.parse_args()
|
|
||||||
match = True
|
|
||||||
|
|
||||||
for f in pathlib.Path(args.path).glob("*.html"):
|
|
||||||
logging.info(f"Comparing {f.name}")
|
|
||||||
if not self.compare(
|
|
||||||
args.url, args.path, f.name.replace(".html", "")):
|
|
||||||
match = False
|
|
||||||
|
|
||||||
if not match:
|
|
||||||
logging.error("Comparison showed deviations")
|
|
||||||
exit(1)
|
|
||||||
else:
|
|
||||||
logging.info("No deviations found")
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
OTCComparator().main()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -5,12 +5,10 @@
|
|||||||
- ensure-pip
|
- ensure-pip
|
||||||
- ensure-virtualenv
|
- ensure-virtualenv
|
||||||
- role: "ensure-pandoc"
|
- role: "ensure-pandoc"
|
||||||
vars:
|
|
||||||
ensure_pandoc_version: "2.19.2"
|
|
||||||
tasks:
|
tasks:
|
||||||
- name: Install convertor
|
- name: Install convertor
|
||||||
pip:
|
pip:
|
||||||
chdir: "{{ zuul.project.src_dir }}"
|
chdir: "{{ zuul.project.src_dir }}"
|
||||||
virtualenv: "{{ ansible_user_dir }}/.venv"
|
virtualenv: "{{ ansible_user_dir }}/.venv"
|
||||||
name: .
|
name: "{{ ansible_user_dir }}/{{ zuul.projects['gitea.eco.tsi-dev.otc-service.com/docs/doc-convertor'].src_dir }}"
|
||||||
editable: "yes"
|
editable: "yes"
|
||||||
|
15
setup.cfg
15
setup.cfg
@ -1,11 +1,11 @@
|
|||||||
[metadata]
|
[metadata]
|
||||||
name = otc-doc-convertor
|
name = otc-doc-exports
|
||||||
author = Open Telekom Cloud - Ecosystem Squad
|
author = Open Telekom Cloud - Ecosystem Squad
|
||||||
author_email = dl-pbcotcdeleco@t-systems.com
|
author_email = dl-pbcotcdeleco@t-systems.com
|
||||||
description = Python program to convert docs exported in HTML into RST
|
description = Doc sources (HTML) to track changes in the vendors documentation system
|
||||||
description_file =
|
description_file =
|
||||||
README.md
|
README.md
|
||||||
home_page = https://github.com/opentelekomcloud-docs/doc-exports
|
home_page = https://gitea.eco.tsi-dev.otc-service.com/docs/doc-exports
|
||||||
classifier =
|
classifier =
|
||||||
License :: OSI Approved :: Apache Software License
|
License :: OSI Approved :: Apache Software License
|
||||||
Operating System :: POSIX :: Linux
|
Operating System :: POSIX :: Linux
|
||||||
@ -17,12 +17,3 @@ classifier =
|
|||||||
Programming Language :: Python :: 3
|
Programming Language :: Python :: 3
|
||||||
Programming Language :: Python :: 3.6
|
Programming Language :: Python :: 3.6
|
||||||
Programming Language :: Python :: 3.7
|
Programming Language :: Python :: 3.7
|
||||||
keywords = Sphinx, search, python
|
|
||||||
|
|
||||||
[options]
|
|
||||||
packages = otc_doc_convertor
|
|
||||||
|
|
||||||
[options.entry_points]
|
|
||||||
console_scripts =
|
|
||||||
otc-convert-doc = otc_doc_convertor.convertor:main
|
|
||||||
otc-convert-compare = otc_doc_convertor.comparator:main
|
|
||||||
|
@ -18,7 +18,7 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
extensions = [
|
extensions = [
|
||||||
'otcdocstheme'
|
'otcdocstheme',
|
||||||
]
|
]
|
||||||
|
|
||||||
otcdocs_auto_name = False
|
otcdocs_auto_name = False
|
||||||
|
Loading…
x
Reference in New Issue
Block a user