1
0
forked from docs/doc-exports
doc-exports/tools/compare.py
gtema 63e77a3648 Add document comparison utility
Reviewed-by: Goncharov, Artem <artem.goncharov@t-systems.com>
Co-authored-by: gtema <artem.goncharov@gmail.com>
Co-committed-by: gtema <artem.goncharov@gmail.com>
2022-10-19 06:33:38 +00:00

63 lines
1.7 KiB
Python

import argparse
import logging
import requests
import pathlib
from bs4 import BeautifulSoup
def body_filter(tag):
return (
tag.name == "div"
and tag.has_attr("id")
and tag["id"].startswith("body")
)
def compare(url_prefix, file_path, file_name):
data = requests.get(
f"https://docs.otc.t-systems.com/{url_prefix}/{file_name}.json")
page_data = None
for item in data.json():
if item.get("url").endswith(f"{file_name}.html") and item['content']:
page_data = item["content"]
break
original = BeautifulSoup(page_data, 'html.parser')
with open(f"{file_path}/{file_name}.html", "r") as f:
new_content = f.read()
new = BeautifulSoup(new_content, 'html.parser')
t1 = original.find(body_filter)
t2 = new.find(body_filter)
if t1 != t2:
logging.error("content %s != %s" % (t1, t2))
return False
return True
def main():
logging.basicConfig(level=logging.DEBUG)
parser = argparse.ArgumentParser(description="Compare document data.")
parser.add_argument(
"path",
type=str,
help="Path to the document content (i.e. docs/ecs/api-ref")
parser.add_argument(
"url",
type=str,
help="url prefix in the helpcenter (i.e. api/ecs)")
args = parser.parse_args()
result = True
for f in pathlib.Path(args.path).glob("*.html"):
logging.debug(f"Comparing {f.name}")
if not compare(args.url, args.path, f.name.replace(".html", "")):
result = False
if not result:
logging.error("Comparison showed deviations")
else:
logging.info("No deviations found")
if __name__ == '__main__':
main()