The scripts code quality is …. discuss-able, but as it did its job already, I won't clean it up anyway, and as it may provide some hints for others trying to do the same … here you go.
The script expects a file /tmp/dnsbl.txt with the domains of the dnsbls you want to use, and it queries the dionaea logsqlite database.
#!/usr/bin/python
# requires python-adns
import io
import sqlite3
import adns
from time import time
class dn(dict):
def __init__(self, eod):
self.eod = eod
self.result = {}
def start(self):
self._start = time()
def stop(self):
self._stop = time()
self.duration = self._stop - self._start
def subs(self):
a = set(filter(lambda x: self.result[x] == True, self.result))
for child in self:
a = a | self[child].subs()
return a
def usize(self):
return len(self.subs())
def size(self):
size=0
if self.eod:
size += len(filter(lambda x: self.result[x] == True, self.result))
return size #sum([self[x].size() for x in self]) + size
def mkdomaintrie(path):
trie = dn(False)
dnsbls = []
f = io.open(path, "r")
while True:
line = f.readline(1024)
if not line:
break
line = line.rstrip()
parts = line.split('.')
parts.reverse()
cur = trie
for i in range(len(parts)):
part = parts[i]
if part not in cur:
cur[part] = dn(False)
if i == len(parts)-1:
cur[part].eod = True
cur[part].bl = line
dnsbls.append((line, cur[part]))
cur = cur[part]
# print(cur)
return (trie, dnsbls)
#print(trie)
#for i in dnsbls:
# print("%s %s" % (i[0], i[1].eod ) )
def querybls(dnsbls):
def resolve_result(resultcursor):
names = [resultcursor.description[x][0] for x in range(len(resultcursor.description))]
resolvedresult = [ dict(zip(names, i)) for i in resultcursor]
return resolvedresult
class AsyncResolver(object):
def __init__(self, hosts, dnsblhost, intensity=100):
self.dnsblhost = dnsblhost
self.hosts = hosts
self.intensity = intensity
self.adns = adns.init()
self.collected = 0
def resolve(self):
resolved_hosts = {}
active_queries = {}
host_queue = self.hosts[:]
xcache = {}
def collect_results():
for query in self.adns.completed(1):
self.collected+=1
answer = query.check()
host = active_queries[query]
del active_queries[query]
if host in resolved_hosts:
raise Exception("host %s is already resolved %s " % (host, resolved_hosts[host]))
if answer[0] == 0:
resolved_hosts[host] = True
else:
resolved_hosts[host] = False
def finished_resolving():
return len(resolved_hosts) >= len(self.hosts)
while not finished_resolving():
while len(host_queue) > 0 and len(active_queries) < self.intensity:
host = host_queue.pop()
domain = host.split('.')
domain.reverse()
# print(domain)
domain = '.'.join(domain)
domain = domain + '.' + self.dnsblhost
query = self.adns.submit(domain, adns.rr.A)
# print domain
if host in xcache:
raise Exception("host %s is already cached" % host)
xcache[host] = True
active_queries[query] = host
collect_results()
return resolved_hosts
dbh = sqlite3.connect('/opt/dionaea/var/dionaea/logsql.sqlite')
cursor = dbh.cursor()
result = cursor.execute("""SELECT
DISTINCT remote_host
FROM
connections
WHERE
-- connection_timestamp >= strftime('%s','now') - 48*3600 AND
remote_host != ''
ORDER BY
remote_host
LIMIT 10""")
result = resolve_result(result)
hosts = []
cache = {}
for record in sorted(result):
wormhole = record['remote_host']
if wormhole.startswith('::ffff:'):
wormhole = wormhole[7:]
cache[wormhole] = True
hosts = map(lambda x:x, cache)
for bl in dnsbls:
dnsblhost = bl[0]
dnsbl = bl[1]
if dnsblhost == 'bl.emailbasura.org':
continue
# print(hosts)
ar = AsyncResolver(hosts, dnsblhost, intensity=50)
dnsbl.start()
dnsbl.result = ar.resolve()
dnsbl.stop()
print "%f %f %f" % (dnsbl.duration, dnsbl._start, dnsbl._stop)
print("number of unique queried hosts %i" % len(hosts))
return hosts
def printdn(d, indent):
for i in sorted(d, cmp=lambda x,y:cmp(d[x].usize(),d[y].usize()), reverse=True):
if d[i].usize() == 0:
continue
if d[i].eod == True:
# print("%*s%s * %s" % (indent, "", i, d[i].bl))
if len(d[i]) > 0:
print("%*s%s\t * self=%i uniq=%i %.1fs" % (indent, "", i, d[i].size(), d[i].usize(), d[i].duration))
else:
print("%*s%s\t * self=%i %.1fs" % (indent, "", i, d[i].size(), d[i].duration))
else:
# print("%*s%s size=%i s%i" % (indent, "", i, d[i].size(), d[i].usize()))
print("%*s%s\t uniq=%i" % (indent, "", i, d[i].usize() ))
printdn(d[i],indent+4)
def xdnsbls(dnsbls, hosts):
cdnsbls = []
i = j = 0
for a in dnsbls:
j = 0
for b in dnsbls:
if j >= i:
continue
cdnsbls.append((float(len(a[1].subs() | b[1].subs()))/float(len(hosts))*100.0, a, b))
j+=1
i+=1
cdnsbls = sorted(cdnsbls, cmp=lambda x,y: cmp( x[0],y[0]), reverse=True)
# print cdnsbls
i = 0
for x in cdnsbls:
print("%.1f %s %s" % (x[0], x[1][0], x[2][0]) )
i=i+1
if i == 5:
break
dnsbls = {}
for x in cdnsbls:
if len(dnsbls) < 4:
dnsbls[x[1][0]] = x[1]
if len(dnsbls) < 4:
dnsbls[x[2][0]] = x[2]
dnsbls = [dnsbls[x] for x in dnsbls]
print dnsbls
dnsbls = sorted(dnsbls, cmp=lambda x,y: cmp( x[1].usize(),y[1].usize()), reverse=True)
dnsbls = dnsbls[:4]
xlen = [len(x[0]) for x in filter(lambda x: x[1].usize() > 0, dnsbls)]
print xlen
xlen = max(xlen)
print xlen
print "| %*s " % (xlen,""),
for i in dnsbls:
if len(i[1].subs()) == 0:
continue
print "| %*s " % (len(i[0]),i[0]),
print "|"
a=b=0
for i in dnsbls:
b=0
if len(i[1].subs()) == 0:
continue
print "| %*s " % (xlen,i[0]),
i = i[1]
for j in dnsbls:
h = j[0]
j = j[1]
if len(j.subs()) == 0:
continue
if b >= a :
print "| %*.1f " % (len(h), float(len(i.subs() | j.subs()))/float(len(hosts))*100.0),
else:
print "| %*s " % (len(h), ""),
b=b+1
a=a+1
print "|"
(trie,dnsbls) = mkdomaintrie("/tmp/dnsbls.txt")
hosts = querybls(dnsbls)
root = dn(False)
root.bl = "."
root['.'] = trie
printdn(root, 0)
xdnsbls(dnsbls, hosts)