forked from docs/doc-exports
Reviewed-by: Goncharov, Artem <artem.goncharov@t-systems.com> Co-authored-by: gtema <artem.goncharov@gmail.com> Co-committed-by: gtema <artem.goncharov@gmail.com>
96 lines
2.9 KiB
Python
96 lines
2.9 KiB
Python
import argparse
|
|
import logging
|
|
import requests
|
|
import pathlib
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
def body_filter(tag):
|
|
return (
|
|
tag.name == "div"
|
|
and tag.has_attr("id")
|
|
and tag["id"].startswith("body")
|
|
)
|
|
|
|
|
|
def simplify_body(data):
|
|
return data.get_text().replace(" ", "")
|
|
|
|
|
|
class OTCComparator:
|
|
|
|
def compare(self, url_prefix, file_path, file_name):
|
|
try:
|
|
data = requests.get(
|
|
f"https://docs.otc.t-systems.com/{url_prefix}/"
|
|
f"{file_name}.json")
|
|
page_data = None
|
|
for item in data.json():
|
|
if (
|
|
item.get("url").endswith(f"{file_name}.html")
|
|
and item['content']
|
|
):
|
|
page_data = item["content"]
|
|
break
|
|
original = BeautifulSoup(page_data, 'html.parser')
|
|
with open(f"{file_path}/{file_name}.html", "r") as f:
|
|
new_content = f.read()
|
|
new = BeautifulSoup(new_content, 'html.parser')
|
|
t1 = original.find(body_filter)
|
|
t2 = new.find(body_filter)
|
|
if t1 != t2:
|
|
if simplify_body(t1) == simplify_body(t2):
|
|
logging.error(
|
|
"File %s is not matching, but "
|
|
"plain text matches" % file_name)
|
|
return True
|
|
else:
|
|
logging.error("File %s mismatches" % file_name)
|
|
logging.debug(
|
|
"Proposed content: %s" %
|
|
t2.get_text().encode("unicode_escape").decode("utf-8"))
|
|
logging.debug(
|
|
"Current content: %s" %
|
|
t1.get_text().encode("unicode_escape").decode("utf-8"))
|
|
return False
|
|
else:
|
|
logging.info("Content matches")
|
|
return True
|
|
except Exception as ex:
|
|
logging.error("Content comparison error %s" % ex)
|
|
return False
|
|
|
|
def main(self):
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
parser = argparse.ArgumentParser(description="Compare document data.")
|
|
parser.add_argument(
|
|
"path",
|
|
type=str,
|
|
help="Path to the document content (i.e. docs/ecs/api-ref")
|
|
parser.add_argument(
|
|
"url",
|
|
type=str,
|
|
help="url prefix in the helpcenter (i.e. api/ecs)")
|
|
args = parser.parse_args()
|
|
match = True
|
|
|
|
for f in pathlib.Path(args.path).glob("*.html"):
|
|
logging.info(f"Comparing {f.name}")
|
|
if not self.compare(
|
|
args.url, args.path, f.name.replace(".html", "")):
|
|
match = False
|
|
|
|
if not match:
|
|
logging.error("Comparison showed deviations")
|
|
exit(1)
|
|
else:
|
|
logging.info("No deviations found")
|
|
|
|
|
|
def main():
|
|
OTCComparator().main()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|