import argparse import logging import requests import pathlib from bs4 import BeautifulSoup def body_filter(tag): return ( tag.name == "div" and tag.has_attr("id") and tag["id"].startswith("body") ) def simplify_body(data): return data.get_text().replace(" ", "") class OTCComparator: def compare(self, url_prefix, file_path, file_name): try: data = requests.get( f"https://docs.otc.t-systems.com/{url_prefix}/" f"{file_name}.json") page_data = None for item in data.json(): if ( item.get("url").endswith(f"{file_name}.html") and item['content'] ): page_data = item["content"] break original = BeautifulSoup(page_data, 'html.parser') with open(f"{file_path}/{file_name}.html", "r") as f: new_content = f.read() new = BeautifulSoup(new_content, 'html.parser') t1 = original.find(body_filter) t2 = new.find(body_filter) if t1 != t2: if simplify_body(t1) == simplify_body(t2): logging.error( "File %s is not matching, but " "plain text matches" % file_name) return True else: logging.error("File %s mismatches" % file_name) logging.debug( "Proposed content: %s" % t2.get_text().encode("unicode_escape").decode("utf-8")) logging.debug( "Current content: %s" % t1.get_text().encode("unicode_escape").decode("utf-8")) return False else: logging.info("Content matches") return True except Exception as ex: logging.error("Content comparison error %s" % ex) return False def main(self): logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser(description="Compare document data.") parser.add_argument( "path", type=str, help="Path to the document content (i.e. docs/ecs/api-ref") parser.add_argument( "url", type=str, help="url prefix in the helpcenter (i.e. api/ecs)") args = parser.parse_args() match = True for f in pathlib.Path(args.path).glob("*.html"): logging.info(f"Comparing {f.name}") if not self.compare( args.url, args.path, f.name.replace(".html", "")): match = False if not match: logging.error("Comparison showed deviations") exit(1) else: logging.info("No deviations found") def main(): OTCComparator().main() if __name__ == '__main__': main()