dnsbl test

Given the rumor infected windows boxes send spam, I wanted to check how many ips attacking my honeypot are blacklisted on several dnsbls. I used this list for the dnsbls and wrote some script to query all of them for the last 1000 hosts which addressed my honeypot.

the result(s)

Basically there is good coverage pbl.spamhaus.org had about 62% of the host listed as not appropriate for sending mails, but xbl.spamhaus.org the spamhaus Exploits Block List had only 7.7% coverage.

Given the main focus of dnsbls - spam - I think I should have used hosts gathered from spamtraps instead …

combine lists

Combining multiple dnsbls can give better coverage, here are my numbers on the best combined dnsbls.

coverage in % dnsbl A dnsbl B
71.5 zen.spamhaus.org blackholes.five-ten-sg.com
69.8 pbl.spamhaus.org blackholes.five-ten-sg.com
67.5 zen.spamhaus.org dnsbl-3.uceprotect.net
67.0 zen.spamhaus.org b.barracudacentral.org
66.8 pbl.spamhaus.org b.barracudacentral.org
zen.spamhaus.org pbl.spamhaus.org dnsbl-3.uceprotect.net blackholes.five-ten-sg.com
zen.spamhaus.org 64.0 64.0 67.5 71.5
pbl.spamhaus.org 62.0 65.9 69.8
dnsbl-3.uceprotect.net 45.7 60.3
blackholes.five-ten-sg.com 19.0

trie it

  1. only dnsbls which had at least one host listed got into the trie
  2. . is the virtual root element, so one can see the coverage for all domains
  3. * indicates a dnsbl
  4. self is the number of listed hosts for dnsbls
  5. uniq is the number of uniq listed hosts for the subtree
  6. X.Ys is the amount of time in seconds it took to query the dnsbl for 1000 hosts
  • . uniq=807
    • org uniq=683
      • spamhaus uniq=640
        • zen * self=640 6.5s
        • pbl * self=620 6.0s
        • xbl * self=77 5.8s
        • sbl * self=1 6.1s
      • barracudacentral uniq=378
        • b * self=378 0.4s
      • abuseat uniq=77
        • cbl * self=77 3.0s
      • spamcannibal uniq=23
        • bl * self=23 4.8s
      • njabl uniq=6
        • dnsbl * self=6 4.0s
      • ahbl uniq=5
        • dnsbl * self=5 4.5s
      • backscatterer uniq=5
        • ips * self=5 6.9s
    • net uniq=584
      • uceprotect uniq=503
        • dnsbl-3 * self=457 7.4s
        • dnsbl-2 * self=417 7.8s
        • dnsbl-1 * self=61 6.6s
      • sorbs uniq=224
        • dnsbl * self=193 uniq=224 5.2s
          • dul * self=163 4.6s
          • spam * self=49 4.7s
          • web * self=32 4.6s
        • services uniq=26
          • korea * self=26 10.0s
        • spamcop uniq=15
          • bl * self=15 4.2s
        • manitu uniq=12
          • dnsbl uniq=12
            • ix * self=12 3.2s
    • com uniq=442
      • spamrats uniq=296
        • dyna * self=228 4.9s
        • noptr * self=64 5.5s
        • spam * self=8 5.5s
      • five-ten-sg uniq=190
        • blackholes * self=190 6.0s
      • surriel uniq=26
        • psbl * self=26 3.5s
      • cymru uniq=23
        • bogons * self=23 5.6s
      • unsubscore uniq=11
        • ubl * self=11 5.7s
    • nl uniq=163
      • transip uniq=163
        • block uniq=163
          • residential * self=163 2.7s
    • cn uniq=117
      • org uniq=117
        • anti-spam uniq=117
          • cdl * self=117 9.0s
    • ch uniq=35
      • abuse uniq=35
        • combined * self=35 2.7s
          • spam * self=31 3.8s
          • drone * self=2 2.6s
    • info uniq=15
      • wpbl uniq=15
        • db * self=15 4.4s
    • de uniq=14
      • inps uniq=11
        • dnsbl * self=11 2.6s
      • sectoor uniq=3
        • dnsbl uniq=3
          • tor * self=3 uniq=3 3.0s
    • kr uniq=5
      • or uniq=5
        • spamlist * self=5 9.0s
    • jp uniq=1
      • rbl uniq=1
        • short * self=1 7.0s

script

The scripts code quality is …. discuss-able, but as it did its job already, I won't clean it up anyway, and as it may provide some hints for others trying to do the same … here you go.

The script expects a file /tmp/dnsbl.txt with the domains of the dnsbls you want to use, and it queries the dionaea logsqlite database.

#!/usr/bin/python
 
# requires python-adns
 
import io
import sqlite3
import adns
from time import time
 
 
class dn(dict):
	def __init__(self, eod):
		self.eod = eod
		self.result = {}
 
	def start(self):
		self._start = time()
 
	def stop(self):
		self._stop = time()
		self.duration = self._stop - self._start	
 
 
	def subs(self):
		a = set(filter(lambda x: self.result[x] == True, self.result))
		for child in self:
			a = a | self[child].subs()
		return a
 
	def usize(self):
		return len(self.subs())		
 
	def size(self):
		size=0
		if self.eod:
			size += len(filter(lambda x: self.result[x] == True, self.result))
		return size #sum([self[x].size() for x in self]) + size
 
 
def mkdomaintrie(path):
	trie = dn(False)
	dnsbls = []
	f = io.open(path, "r")
	while True:
		line = f.readline(1024)
		if not line:
			break
		line = line.rstrip()
		parts = line.split('.')
		parts.reverse()
		cur = trie
		for i in range(len(parts)):
			part = parts[i]
			if part not in cur:
				cur[part] = dn(False)
			if i == len(parts)-1:
				cur[part].eod = True
				cur[part].bl = line
				dnsbls.append((line, cur[part]))
			cur = cur[part]
	#	print(cur)
	return (trie, dnsbls)
#print(trie)
 
#for i in dnsbls:
#	print("%s %s" % (i[0], i[1].eod ) )
 
 
def querybls(dnsbls):
	def resolve_result(resultcursor):
		names = [resultcursor.description[x][0] for x in range(len(resultcursor.description))]
		resolvedresult = [ dict(zip(names, i)) for i in resultcursor]
		return resolvedresult
 
	class AsyncResolver(object):
		def __init__(self, hosts, dnsblhost, intensity=100):
			self.dnsblhost = dnsblhost
			self.hosts = hosts
			self.intensity = intensity
			self.adns = adns.init()
			self.collected = 0
 
		def resolve(self):
			resolved_hosts = {}
			active_queries = {}
			host_queue = self.hosts[:]
			xcache = {}
 
			def collect_results():
				for query in self.adns.completed(1):
					self.collected+=1
					answer = query.check()
					host = active_queries[query]
					del active_queries[query]
					if host in resolved_hosts:
						raise Exception("host %s is already resolved %s " % (host, resolved_hosts[host]))
					if answer[0] == 0:
						resolved_hosts[host] = True
					else:
						resolved_hosts[host] = False
 
			def finished_resolving():
				return len(resolved_hosts) >= len(self.hosts)
 
			while not finished_resolving():
				while len(host_queue) > 0 and len(active_queries) < self.intensity:
					host = host_queue.pop()
					domain = host.split('.')
					domain.reverse()
#					print(domain)
					domain = '.'.join(domain)
					domain = domain + '.' + self.dnsblhost
					query = self.adns.submit(domain, adns.rr.A)
#					print domain
					if host in xcache:
						raise Exception("host %s is already cached" % host)
					xcache[host] = True
					active_queries[query] = host
				collect_results()
 
			return resolved_hosts
 
 
	dbh = sqlite3.connect('/opt/dionaea/var/dionaea/logsql.sqlite')
	cursor = dbh.cursor()
	result = cursor.execute("""SELECT 
	DISTINCT remote_host 
FROM 
	connections 
WHERE 
--	connection_timestamp >= strftime('%s','now') - 48*3600 AND
	remote_host != ''
ORDER BY
	remote_host
LIMIT 10""")
	result = resolve_result(result)
 
	hosts = []
	cache = {}
	for record in sorted(result):
		wormhole = record['remote_host']
		if wormhole.startswith('::ffff:'):
			wormhole = wormhole[7:]
		cache[wormhole] = True
	hosts = map(lambda x:x, cache)
 
	for bl in dnsbls:
		dnsblhost = bl[0]
		dnsbl = bl[1]
		if dnsblhost == 'bl.emailbasura.org':
			continue
#		print(hosts)
 
		ar = AsyncResolver(hosts, dnsblhost, intensity=50)
		dnsbl.start()
		dnsbl.result = ar.resolve()
		dnsbl.stop()
		print "%f %f %f" % (dnsbl.duration, dnsbl._start, dnsbl._stop)
 
	print("number of unique queried hosts %i" % len(hosts))
	return hosts
 
def printdn(d, indent):
	for i in sorted(d, cmp=lambda x,y:cmp(d[x].usize(),d[y].usize()),  reverse=True):
		if d[i].usize() == 0:
			continue
		if d[i].eod == True:
#			print("%*s%s * %s" % (indent, "", i, d[i].bl))
			if len(d[i]) > 0:
				print("%*s%s\t * self=%i uniq=%i %.1fs" % (indent, "", i, d[i].size(), d[i].usize(), d[i].duration))
			else:
				print("%*s%s\t * self=%i %.1fs" % (indent, "", i, d[i].size(), d[i].duration))
		else:
#			print("%*s%s size=%i s%i" % (indent, "", i, d[i].size(), d[i].usize()))
			print("%*s%s\t uniq=%i" % (indent, "", i, d[i].usize() ))
		printdn(d[i],indent+4)
 
 
def xdnsbls(dnsbls, hosts):
	cdnsbls = []
	i = j = 0
	for a in dnsbls:
		j = 0
		for b in dnsbls:
			if j >= i:
				continue
			cdnsbls.append((float(len(a[1].subs() | b[1].subs()))/float(len(hosts))*100.0, a, b))
			j+=1
		i+=1
 
	cdnsbls = sorted(cdnsbls, cmp=lambda x,y: cmp( x[0],y[0]), reverse=True)
#	print cdnsbls
 
	i = 0
	for x in cdnsbls:
		print("%.1f %s %s" % (x[0], x[1][0], x[2][0]) )
		i=i+1
		if i == 5:
			break
 
	dnsbls = {}
	for x in cdnsbls:
		if len(dnsbls) < 4:
			dnsbls[x[1][0]] = x[1]
		if len(dnsbls) < 4:
			dnsbls[x[2][0]] = x[2]
	dnsbls = [dnsbls[x] for x in dnsbls]
	print dnsbls
	dnsbls = sorted(dnsbls, cmp=lambda x,y: cmp( x[1].usize(),y[1].usize()), reverse=True)
 
	dnsbls = dnsbls[:4]
	xlen =  [len(x[0]) for x in filter(lambda x: x[1].usize() > 0, dnsbls)]
 
	print xlen	
	xlen = max(xlen)
	print xlen
	print "| %*s " % (xlen,""),	
 
	for i in dnsbls:
		if len(i[1].subs()) == 0:
			continue
		print "| %*s " % (len(i[0]),i[0]),
	print "|"
 
	a=b=0
	for i in dnsbls:
		b=0
		if len(i[1].subs()) == 0:
			continue
		print "| %*s " % (xlen,i[0]), 
		i = i[1]
		for j in dnsbls:
			h = j[0]
			j = j[1] 
			if len(j.subs()) == 0:
				continue
			if b >= a :
				print "| %*.1f " % (len(h), float(len(i.subs() | j.subs()))/float(len(hosts))*100.0),
			else:
				print "| %*s " % (len(h), ""),
			b=b+1
		a=a+1
 
		print "|"
 
 
(trie,dnsbls) = mkdomaintrie("/tmp/dnsbls.txt")
hosts = querybls(dnsbls)
root = dn(False)
root.bl = "."
root['.'] = trie
printdn(root, 0)
xdnsbls(dnsbls, hosts)

Comments



2010/06/06/dnsbl_test.txt · Last modified: 2010/06/15 01:00 by common
chimeric.de = chi`s home Creative Commons License Valid CSS Driven by DokuWiki do yourself a favour and use a real browser - get firefox!! Recent changes RSS feed Valid XHTML 1.0