From 3a84eaeec0347fa1bee7e0f4cbe62ee2684aa209 Mon Sep 17 00:00:00 2001 From: "Gode, Sebastian" Date: Tue, 27 Jun 2023 07:57:25 +0000 Subject: [PATCH] Index Tool for Search Reviewed-by: Hasko, Vladimir Co-authored-by: Gode, Sebastian Co-committed-by: Gode, Sebastian --- otc_metadata/services.py | 17 +++-- tools-requirements.txt | 1 + tools/index_metadata.py | 151 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 5 deletions(-) create mode 100644 tools/index_metadata.py diff --git a/otc_metadata/services.py b/otc_metadata/services.py index f62ca3c0..5c7111f1 100644 --- a/otc_metadata/services.py +++ b/otc_metadata/services.py @@ -115,12 +115,12 @@ class Services(object): def service_types_with_doc_types(self, environment=None): """Retrieve type and title from services and corresponding docs. - As well as a list of all doc_types. + As well as a list of all available doc types with title. :param str environment: Optional service environment. """ service_list = [] - doc_types = [] + docs = [] for service in self.all_services: if not service["service_title"]: @@ -139,8 +139,15 @@ class Services(object): "type": doc["type"] }) - if doc["type"] not in doc_types: - doc_types.append(doc["type"]) + new_doc = { + "type": doc["type"], + "title": doc["title"] + } + type_exists = any( + doc_dict["type"] == new_doc["type"] for doc_dict in docs + ) + if not type_exists: + docs.append(new_doc) service_list.append({ "service_title": service["service_title"], @@ -150,7 +157,7 @@ class Services(object): res = { "services": service_list, - "doc_types": doc_types + "docs": docs } return res diff --git a/tools-requirements.txt b/tools-requirements.txt index 5fee53b0..1bddc556 100644 --- a/tools-requirements.txt +++ b/tools-requirements.txt @@ -4,3 +4,4 @@ requests jinja2 dirsync cookiecutter +opensearch-py diff --git a/tools/index_metadata.py b/tools/index_metadata.py new file mode 100644 index 00000000..12e4dd41 --- /dev/null +++ b/tools/index_metadata.py @@ -0,0 +1,151 @@ +import otc_metadata +import argparse +import logging +from opensearchpy import OpenSearch + + +metadata = otc_metadata.Services() + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Create Index data for search inside OpenSearch" + ) + parser.add_argument( + "--target-environment", + required=True, + help="Environment to be used as a source", + ) + parser.add_argument( + '--delete-index', + action='store_true', + help='Option deletes old index with the same name and creates new ' + 'one.' + ) + parser.add_argument( + '--all-doc-types', + action='store_true', + help='Upload all doc-types instead of only umn, api-ref and dev' + ) + parser.add_argument( + '--debug', + action='store_true', + help='Enable Debug mode' + ) + parser.add_argument( + '--hosts', + metavar='', + nargs='+', + default=['localhost:9200'], + help='Provide one or multiple host:port values ' + 'separated by space for multiple hosts.\n' + 'Default: localhost:9200' + ) + parser.add_argument( + '--index', + metavar='', + default='test-index', + help="OpenSearch / ElasticSearch index name.\n" + 'Default: test-index' + ) + parser.add_argument( + '--username', + metavar='', + required=True, + help='Username for the connection.' + ) + parser.add_argument( + '--password', + metavar='', + required=True, + help='Password for the connection.' + ) + + args = parser.parse_args() + return args + + +def main(): + + args = parse_args() + + if args.debug: + logging.basicConfig(level=logging.DEBUG) + + logging.debug("Obtaining data from otc_metadata") + data = getData( + environment=args.target_environment, + all_doc_types=args.all_doc_types + ) + + logging.debug("Indexing data into OpenSearch") + indexData( + deleteIndex=args.delete_index, + hosts=args.hosts, + index=args.index, + username=args.username, + password=args.password, + data=data + ) + + +def filter_docs(metadata): + allowed_types = ["umn", "api-ref", "dev"] + metadata['docs'] = [doc for doc in metadata['docs'] + if doc['type'] in allowed_types] + return metadata + + +def getData(environment, all_doc_types): + metadatadata = metadata.service_types_with_doc_types( + environment=environment + ) + final_data = metadatadata + if not all_doc_types: + final_data = filter_docs(metadatadata) + return final_data + + +def indexData(deleteIndex, hosts, index, username, password, data): + hosts = generate_os_host_list(hosts) + client = OpenSearch( + hosts=hosts, + http_compress=True, + http_auth=(username, password), + use_ssl=True, + verify_certs=True, + ssl_assert_hostname=False, + ssl_show_warn=False + ) + + if deleteIndex is True: + logging.debug("Deleting Index") + delete_index(client, index) + + logging.debug("Started creating Index") + create_index(client, index, data) + logging.debug("Finished creating Index") + + +def generate_os_host_list(hosts): + host_list = [] + for host in hosts: + raw_host = host.split(':') + if len(raw_host) != 2: + raise Exception('--hosts parameter does not match the following ' + 'format: hostname:port') + json_host = {'host': raw_host[0], 'port': int(raw_host[1])} + host_list.append(json_host) + return host_list + + +def create_index(client, index, data): + client.indices.create(index=index) + return client.index(index=index, body=data) + + +def delete_index(client, index): + return client.indices.delete(index=index, ignore=[400, 404]) + + +main()