-
Notifications
You must be signed in to change notification settings - Fork 171
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
TESTING: Enrich dns indicators #112
Changes from 14 commits
d4f835c
781b2cc
0fb5b13
d1136d6
4a47265
8ae4343
317aaa4
319a3f0
c5cb876
ace40b1
8d7d221
40fae19
9cd54f1
ba6a6c8
ed398a6
3a89648
94feb29
239f0c3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,20 +6,19 @@ | |
import json | ||
import pygeoip | ||
import re | ||
import sys | ||
|
||
from netaddr import IPAddress, IPRange, IPSet | ||
from sortedcontainers import SortedDict | ||
|
||
from logger import get_logger | ||
import logging | ||
|
||
logger = get_logger('winnower') | ||
|
||
# from http://en.wikipedia.org/wiki/Reserved_IP_addresses: | ||
reserved_ranges = IPSet(['0.0.0.0/8', '100.64.0.0/10', '127.0.0.0/8', '192.88.99.0/24', | ||
'198.18.0.0/15', '198.51.100.0/24', '203.0.113.0/24', '233.252.0.0/24']) | ||
gi_org = SortedDict() | ||
geo_data = pygeoip.GeoIP('data/GeoIP.dat', pygeoip.MEMORY_CACHE) | ||
|
||
|
||
def load_gi_org(filename): | ||
|
@@ -43,32 +42,49 @@ def org_by_addr(address): | |
|
||
|
||
def maxhits(dns_records): | ||
max = 0 | ||
hmax = 0 | ||
hostname = None | ||
for record in dns_records: | ||
if record['count'] > max: | ||
max = record['count'] | ||
#logger.info("examining %s" % record) | ||
if record['count'] > hmax: | ||
hmax = record['count'] | ||
hostname = record['rrname'].rstrip('.') | ||
return hostname | ||
|
||
|
||
def enrich_IPv4(address, geo_data, dnsdb=None): | ||
def maxhits_rdata(dns_records): | ||
hmax = 0 | ||
hostname = None | ||
for record in dns_records: | ||
# logger.info("Examining %s" % record) | ||
if record['count'] > hmax: | ||
hmax = record['count'] | ||
hostname = record['rdata'][0].rstrip('.') | ||
return hostname | ||
|
||
|
||
def enrich_IPv4(address, dnsdb=None, hostname=None): | ||
as_num, as_name = org_by_addr(address) | ||
country = geo_data.country_code_by_addr('%s' % address) | ||
if dnsdb: | ||
hostname = maxhits(dnsdb.query_rdata_ip('%s' % address)) | ||
rhost = maxhits(dnsdb.query_rdata_ip('%s' % address)) | ||
else: | ||
hostname = None | ||
return (as_num, as_name, country, None, hostname) | ||
rhost = None | ||
return (as_num, as_name, country, hostname, rhost) | ||
|
||
|
||
def enrich_FQDN(address, date, dnsdb): | ||
records = dnsdb.query_rrset(address, rrtype='A') | ||
records = filter_date(records, date) | ||
ip_addr = maxhits(records) | ||
yesterday = dt.datetime.strptime(date, '%Y-%m-%d') - dt.timedelta(days=1) | ||
yesterday_str = yesterday.strftime('%Y-%m-%d') | ||
records = filter_date(records, yesterday_str) | ||
ip_addr = maxhits_rdata(records) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And here is the second, and hardest "logical issue". We should not be getting This will obviously mess up the way the enrichment process is built right now. |
||
if ip_addr: | ||
logger.info('Mapped %s to %s' % (address, ip_addr)) | ||
return ip_addr | ||
# logger.info('Mapped %s to %s on %s' % (address, ip_addr, date)) | ||
ip_addr_data = enrich_IPv4(IPAddress(ip_addr), dnsdb, address) | ||
return (ip_addr,) + ip_addr_data | ||
else: | ||
return None | ||
|
||
|
||
def filter_date(records, date): | ||
|
@@ -113,15 +129,15 @@ def winnow(in_file, out_file, enr_file): | |
server = config.get('Winnower', 'dnsdb_server') | ||
api = config.get('Winnower', 'dnsdb_api') | ||
enrich_ip = config.get('Winnower', 'enrich_ip') | ||
if enrich_ip == '1': | ||
if enrich_ip == '1' or enrich_ip == 'True': | ||
enrich_ip = True | ||
logger.info('Enriching IPv4 indicators: TRUE') | ||
else: | ||
enrich_ip = False | ||
logger.info('Enriching IPv4 indicators: FALSE') | ||
|
||
enrich_dns = config.get('Winnower', 'enrich_dns') | ||
if enrich_dns == '1': | ||
if enrich_dns == '1' or enrich_dns == 'True': | ||
enrich_dns = True | ||
logger.info('Enriching DNS indicators: TRUE') | ||
else: | ||
|
@@ -132,7 +148,7 @@ def winnow(in_file, out_file, enr_file): | |
|
||
# handle the case where we aren't using DNSDB | ||
dnsdb = dnsdb_query.DnsdbClient(server, api) | ||
if len(dnsdb.query_rdata_name('google.com')) == 0: | ||
if api == 'YOUR_API_KEY_HERE' or len(dnsdb.query_rdata_name('google.com')) == 0: | ||
dnsdb = None | ||
logger.info('Invalid DNSDB configuration found') | ||
|
||
|
@@ -142,33 +158,36 @@ def winnow(in_file, out_file, enr_file): | |
# TODO: make these locations configurable? | ||
logger.info('Loading GeoIP data') | ||
gi_org = load_gi_org('data/GeoIPASNum2.csv') | ||
geo_data = pygeoip.GeoIP('data/GeoIP.dat', pygeoip.MEMORY_CACHE) | ||
|
||
wheat = [] | ||
enriched = [] | ||
|
||
logger.info('Beginning winnowing process') | ||
for each in crop: | ||
(addr, addr_type, direction, source, note, date) = each | ||
# this should be refactored into appropriate functions | ||
if addr_type == 'IPv4' and is_ipv4(addr): | ||
#logger.info('Enriching %s' % addr) | ||
ipaddr = IPAddress(addr) | ||
if not reserved(ipaddr): | ||
wheat.append(each) | ||
if enrich_ip: | ||
e_data = (addr, addr_type, direction, source, note, date) + enrich_IPv4(ipaddr, geo_data, dnsdb) | ||
e_data = (addr, addr_type, direction, source, note, date) + enrich_IPv4(ipaddr, dnsdb) | ||
enriched.append(e_data) | ||
else: | ||
e_data = (addr, addr_type, direction, source, note, date) + enrich_IPv4(ipaddr, geo_data) | ||
e_data = (addr, addr_type, direction, source, note, date) + enrich_IPv4(ipaddr) | ||
enriched.append(e_data) | ||
else: | ||
logger.error('Found invalid address: %s from: %s' % (addr, source)) | ||
elif addr_type == 'FQDN' and is_fqdn(addr): | ||
#logger.info('Enriching %s' % addr) | ||
wheat.append(each) | ||
if enrich_dns and dnsdb: | ||
e_data = (addr, addr_type, direction, source, note, date, enrich_FQDN(addr, date, dnsdb)) | ||
enriched.append(e_data) | ||
# print "Enriching %s" % addr | ||
e_data = enrich_FQDN(addr, date, dnsdb) | ||
if e_data: | ||
e_data = (e_data[0], "IPv4", direction, source, note, date) + e_data[1:] | ||
enriched.append(e_data) | ||
else: | ||
logger.error('Could not determine address type for %s listed as %s' % (addr, addr_type)) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the first "logical issue" - we should be gathering the PTR records here, so it should actually be a
dnsdb.query_rrset
using theinaddr.arpa
address of the IP address. I have changed the other ofhostname
andrhost
variables to reflect what they are on the CSV so it can be clearer what is expected of this function.We are going for
maxhits
all right. It should be rare that there is more than 1 PTR record associated with anin-addr.arpa
. We might get a CNAME that points to a PTR, and we should just let the CNAME go in for now.