44 lines
1.4 KiB
Python
44 lines
1.4 KiB
Python
import os
|
|
import threading
|
|
import time
|
|
|
|
from hc_spider.model import SharedObjects
|
|
|
|
|
|
class Controller(threading.Thread):
|
|
_shared_objects: SharedObjects
|
|
|
|
def __init__(self, **kwargs) -> None:
|
|
self._shared_objects = SharedObjects(**kwargs)
|
|
super().__init__()
|
|
self.daemon = True
|
|
self.name = "Controller"
|
|
|
|
def start(self) -> None:
|
|
print(f"[{self.name}] is starting")
|
|
super().start()
|
|
|
|
def run(self) -> None:
|
|
print(f"{self.name} started with pid [{os.getpid()}]")
|
|
# Enqueue starting point
|
|
starting_url = self._shared_objects.config.get("starting_point")
|
|
self._shared_objects.not_visited_nodes[starting_url] = "from config.json"
|
|
self._shared_objects.job_queue.put(starting_url)
|
|
|
|
# Should wait a bit until the first items from workers will be placed in the queue
|
|
time.sleep(2)
|
|
|
|
counter = 2
|
|
while self._shared_objects.shutdown_event.is_set() is False:
|
|
time.sleep(2)
|
|
if not self._shared_objects.not_visited_nodes:
|
|
counter -= 1
|
|
if counter == 0:
|
|
self._shared_objects.shutdown_event.set()
|
|
print(f"[{self.name}] Ran out from not visited URLs, exiting...")
|
|
|
|
print(f"[{self.name}] is shutting down", flush=True)
|
|
|
|
def __del__(self) -> None:
|
|
print(f"[{self.name}] exited", flush=True)
|