import os import threading import time from hc_spider.model import SharedObjects class Controller(threading.Thread): _shared_objects: SharedObjects def __init__(self, **kwargs) -> None: self._shared_objects = SharedObjects(**kwargs) super().__init__() self.daemon = True self.name = "Controller" def start(self) -> None: print(f"[{self.name}] is starting") super().start() def run(self) -> None: print(f"{self.name} started with pid [{os.getpid()}]") # Enqueue starting point starting_url = self._shared_objects.config.get("starting_point") self._shared_objects.not_visited_nodes[starting_url] = "from config.json" self._shared_objects.job_queue.put(starting_url) # Should wait a bit until the first items from workers will be placed in the queue time.sleep(2) counter = 2 while self._shared_objects.shutdown_event.is_set() is False: time.sleep(2) if not self._shared_objects.not_visited_nodes: counter -= 1 if counter == 0: self._shared_objects.shutdown_event.set() print(f"[{self.name}] Ran out from not visited URLs, exiting...") print(f"[{self.name}] is shutting down", flush=True) def __del__(self) -> None: print(f"[{self.name}] exited", flush=True)