user2693053 user2693053 - 16 days ago 9
Python Question

Can I assume my threads are done when threading.active_count() returns 1?

Given the following class:

from abc import ABCMeta, abstractmethod
from time import sleep
import threading
from threading import active_count, Thread

class ScraperPool(metaclass=ABCMeta):
Queue = []
ResultList = []

def __init__(self, Queue, MaxNumWorkers=0, ItemsPerWorker=50):
# Initialize attributes
self.MaxNumWorkers = MaxNumWorkers
self.ItemsPerWorker = ItemsPerWorker
self.Queue = Queue # For testing purposes.

def initWorkerPool(self, PrintIDs=True):
for w in range(self.NumWorkers()):
Thread(target=self.worker, args=(w + 1, PrintIDs,)).start()
sleep(1) # Explicitly wait one second for this worker to start.

def run(self):
self.initWorkerPool()

# Wait until all workers (i.e. threads) are done.
while active_count() > 1:
print("Active threads: " + str(active_count()))
sleep(5)

self.HandleResults()

def worker(self, id, printID):
if printID:
print("Starting worker " + str(id) + ".")

while (len(self.Queue) > 0):
self.scraperMethod()

if printID:
print("Worker " + str(id) + " is quiting.")

# Todo Kill is this Thread.

return

def NumWorkers(self):
return 1 # Simplified for testing purposes.

@abstractmethod
def scraperMethod(self):
pass

class TestScraper(ScraperPool):
def scraperMethod(self):
# print("I am scraping.")
# print("Scraping. Threads#: " + str(active_count()))
temp_item = self.Queue[-1]
self.Queue.pop()

self.ResultList.append(temp_item)

def HandleResults(self):
print(self.ResultList)

ScraperPool.register(TestScraper)

scraper = TestScraper(Queue=["Jaap", "Piet"])
scraper.run()
print(threading.active_count())
# print(scraper.ResultList)


When all the threads are done, there's still one active thread -
threading.active_count()
on the last line gets me that number.

The active thread is
<_MainThread(MainThread, started 12960)>
- as printed with
threading.enumerate()
.

Can I assume that all my threads are done when
active_count() == 1
?
Or can, for instance, imported modules start additional threads so that my threads are actually done when
active_count() > 1
- also the condition for the loop I'm using in the run method.

Answer

You can assume that your threads are done when active_count() reaches 1. The problem is, if any other module creates a thread, you'll never get to 1. You should manage your threads explicitly.

Example: You can put the threads in a list and join them one at a time. The relevant changes to your code are:

def __init__(self, Queue, MaxNumWorkers=0, ItemsPerWorker=50):
    # Initialize attributes
    self.MaxNumWorkers = MaxNumWorkers
    self.ItemsPerWorker = ItemsPerWorker
    self.Queue = Queue # For testing purposes.
    self.WorkerThreads = []

def initWorkerPool(self, PrintIDs=True):
    for w in range(self.NumWorkers()):
        thread = Thread(target=self.worker, args=(w + 1, PrintIDs,))
        self.WorkerThreads.append(thread)
        thread.start()
        sleep(1) # Explicitly wait one second for this worker to start.

def run(self):
    self.initWorkerPool()

    # Wait until all workers (i.e. threads) are done. Waiting in order
    # so some threads further in the list may finish first, but we
    # will get to all of them eventually
    while self.WorkerThreads:
        self.WorkerThreads[0].join()

    self.HandleResults()