Added controller module
This commit is contained in:
parent
9745a56b44
commit
2d8e03d2d5
43
hc_spider/controller.py
Normal file
43
hc_spider/controller.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
import os
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
|
from hc_spider.model import SharedObjects
|
||||||
|
|
||||||
|
|
||||||
|
class Controller(threading.Thread):
|
||||||
|
_shared_objects: SharedObjects
|
||||||
|
|
||||||
|
def __init__(self, **kwargs) -> None:
|
||||||
|
self._shared_objects = SharedObjects(**kwargs)
|
||||||
|
super().__init__()
|
||||||
|
self.daemon = True
|
||||||
|
self.name = "Controller"
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
print(f"[{self.name}] is starting")
|
||||||
|
super().start()
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
print(f"{self.name} started with pid [{os.getpid()}]")
|
||||||
|
# Enqueue starting point
|
||||||
|
starting_url = self._shared_objects.config.get("starting_point")
|
||||||
|
self._shared_objects.not_visited_nodes[starting_url] = "from config.json"
|
||||||
|
self._shared_objects.job_queue.put(starting_url)
|
||||||
|
|
||||||
|
# Should wait a bit until the first items from workers will be placed in the queue
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
counter = 2
|
||||||
|
while self._shared_objects.shutdown_event.is_set() is False:
|
||||||
|
time.sleep(2)
|
||||||
|
if not self._shared_objects.not_visited_nodes:
|
||||||
|
counter -= 1
|
||||||
|
if counter == 0:
|
||||||
|
self._shared_objects.shutdown_event.set()
|
||||||
|
print(f"[{self.name}] Ran out from not visited URLs, exiting...")
|
||||||
|
|
||||||
|
print(f"[{self.name}] is shutting down", flush=True)
|
||||||
|
|
||||||
|
def __del__(self) -> None:
|
||||||
|
print(f"[{self.name}] exited", flush=True)
|
Loading…
x
Reference in New Issue
Block a user