I programmed a script that should resolve multiple hostnames into ip addresses using Multithreading.
However, it fails and freezes at some random point. How can this be solved?
num_threads = 100
conn = pymysql.connect(host='xx.xx.xx.xx', unix_socket='/tmp/mysql.sock', user='user', passwd='pw', db='database')
cur = conn.cursor()
def mexec(befehl):
cur = conn.cursor()
cur.execute(befehl)
websites = ['facebook.com','facebook.org' ... ... ... ...] \#10.000 websites in array
queue = Queue()
def getips(i, q):
while True:
#--resolve IP--
try:
result = socket.gethostbyname_ex(site)
print(result)
mexec("UPDATE sites2block SET ip='"+result+"', updated='yes' ") #puts site in mysqldb
except (socket.gaierror):
print("no ip")
mexec("UPDATE sites2block SET ip='no ip', updated='yes',")
q.task_done()
#Spawn thread pool
for i in range(num_threads):
worker = Thread(target=getips, args=(i, queue))
worker.setDaemon(True)
worker.start()
#Place work in queue
for site in websites:
queue.put(site)
#Wait until worker threads are done to exit
queue.join()
You could use a sentinel value to signal threads that there is no work and join the threads instead of queue.task_done()
and queue.join()
:
#!/usr/bin/env python
import socket
from Queue import Queue
from threading import Thread
def getips(queue):
for site in iter(queue.get, None):
try: # resolve hostname
result = socket.gethostbyname_ex(site)
except IOError, e:
print("error %s reason: %s" % (site, e))
else:
print("done %s %s" % (site, result))
def main():
websites = "youtube google non-existent.example facebook yahoo live".split()
websites = [name+'.com' for name in websites]
# Spawn thread pool
queue = Queue()
threads = [Thread(target=getips, args=(queue,)) for _ in range(20)]
for t in threads:
t.daemon = True
t.start()
# Place work in queue
for site in websites: queue.put(site)
# Put sentinel to signal the end
for _ in threads: queue.put(None)
# Wait for completion
for t in threads: t.join()
main()
gethostbyname_ex()
function is obsolete. To support both IPv4/v6 addresses you could use socket.getaddrinfo()
instead.