import argparse import logging import requests import pathlib from bs4 import BeautifulSoup def body_filter(tag): return ( tag.name == "div" and tag.has_attr("id") and tag["id"].startswith("body") ) def compare(url_prefix, file_path, file_name): data = requests.get( f"https://docs.otc.t-systems.com/{url_prefix}/{file_name}.json") page_data = None for item in data.json(): if item.get("url").endswith(f"{file_name}.html") and item['content']: page_data = item["content"] break original = BeautifulSoup(page_data, 'html.parser') with open(f"{file_path}/{file_name}.html", "r") as f: new_content = f.read() new = BeautifulSoup(new_content, 'html.parser') t1 = original.find(body_filter) t2 = new.find(body_filter) if t1 != t2: logging.error("content %s != %s" % (t1, t2)) return False return True def main(): logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser(description="Compare document data.") parser.add_argument( "path", type=str, help="Path to the document content (i.e. docs/ecs/api-ref") parser.add_argument( "url", type=str, help="url prefix in the helpcenter (i.e. api/ecs)") args = parser.parse_args() result = True for f in pathlib.Path(args.path).glob("*.html"): logging.debug(f"Comparing {f.name}") if not compare(args.url, args.path, f.name.replace(".html", "")): result = False if not result: logging.error("Comparison showed deviations") else: logging.info("No deviations found") if __name__ == '__main__': main()