Skip to content

Commit

Permalink
Merge pull request #28 from clearcode/master
Browse files Browse the repository at this point in the history
Adds replay-tracking option to import_logs script.

Thanks @clearcode!
  • Loading branch information
halfdan committed Feb 20, 2013
2 parents 803944c + 17166e5 commit dfd779b
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 0 deletions.
16 changes: 16 additions & 0 deletions misc/log-analytics/import_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import time
import urllib
import urllib2
import urlparse

try:
import json
Expand Down Expand Up @@ -351,6 +352,11 @@ def _create_parser(self):
'--recorder-max-payload-size', dest='recorder_max_payload_size', default=200, type='int',
help="Maximum number of log entries to record in one tracking request (default: %default). "
)
option_parser.add_option(
'--replay-tracking', dest='replay_tracking',
action='store_true', default=False,
help="Replay piwik.php requests found in custom logs (only piwik.php requests expected)"
)
option_parser.add_option(
'--output', dest='output',
help="Redirect output (stdout and stderr) to the specified file"
Expand Down Expand Up @@ -1408,6 +1414,16 @@ def invalid_line(line, reason):
Recorder.add_hits(hits)
hits = []

if config.options.replay_tracking:
# we need a query string and we only consider requests with piwik.php
if hit.query_string and hit.path.lower().endswith('piwik.php'):
query_arguments = urlparse.parse_qs(hit.query_string)
if "idsite" in query_arguments:
try:
hit.args.update((k, v.pop().encode('raw_unicode_escape').decode(config.options.encoding)) for k, v in query_arguments.iteritems())
except UnicodeDecodeError:
invalid_line(line, 'invalid encoding')
continue
# add last chunk of hits
if len(hits) > 0:
Recorder.add_hits(hits)
Expand Down
3 changes: 3 additions & 0 deletions misc/log-analytics/tests/logs_to_tests.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
203.38.78.246 - - [05/Feb/2013:07:01:26 +0000] "GET /piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050 HTTP/1.1" 200 192 "http://clearcode.cc/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17"
203.38.78.246 - - [05/Feb/2013:07:01:41 +0000] "GET /piwik.php?action_name=AdviserBrief%20-%20Track%20Your%20Investments%20and%20Plan%20Financial%20Future%20%7C%20Clearcode&idsite=1&rec=1&r=109464&h=17&m=31&s=40&url=http%3A%2F%2Fclearcode.cc%2Fcase%2Fadviserbrief-track-your-investments-and-plan-financial-future%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050 HTTP/1.1" 200 192 "http://clearcode.cc/case/adviserbrief-track-your-investments-and-plan-financial-future" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17"
203.38.78.246 - - [05/Feb/2013:07:01:46 +0000] "GET /piwik.php?action_name=ATL%20Apps%20-%20American%20Tailgating%20League%20Mobile%20Android%20IOS%20Games%20%7C%20Clearcode&idsite=1&rec=1&r=080064&h=17&m=31&s=46&url=http%3A%2F%2Fclearcode.cc%2Fcase%2Fatl-apps-mobile-android-ios-games%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050 HTTP/1.1" 200 192 "http://clearcode.cc/case/atl-apps-mobile-android-ios-games" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17"
136 changes: 136 additions & 0 deletions misc/log-analytics/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,139 @@ def _test(format_name):
f = functools.partial(_test, format_name)
f.description = 'Testing autodetection of format ' + format_name
yield f


class Options(object):
"""Mock config options necessary to run checkers from Parser class."""
debug = False
encoding = 'utf-8'
log_hostname = 'foo'
query_string_delimiter = '?'
piwik_token_auth = False
piwik_url = 'http://example.com'
recorder_max_payload_size = 200
replay_tracking = True
show_progress = False
skip = False
hostnames = []
excluded_paths = []
excluded_useragents = []
enable_bots = []


class Config(object):
"""Mock configuration."""
options = Options()
format = import_logs.FORMATS['ncsa_extended']


class Resolver(object):
"""Mock resolver which doesn't check connection to real piwik."""
def check_format(self, format_):
pass


class Recorder(object):
"""Mock recorder which collects hits but doesn't put their in database."""
recorders = []

@classmethod
def add_hits(cls, hits):
cls.recorders.extend(hits)


def test_replay_tracking_arguments():
"""Test data parsing from sample log file."""
file_ = 'logs_to_tests.log'
import_logs.stats = import_logs.Statistics()
import_logs.config = Config()
import_logs.resolver = Resolver()
import_logs.Recorder = Recorder()
import_logs.parser = import_logs.Parser()
import_logs.parser.parse(file_)

hits = [hit.args for hit in import_logs.Recorder.recorders]

assert hits[0]['_idn'] == '0'
assert hits[0]['ag'] == '1'
assert hits[0]['_viewts'] == '1360047661'
assert hits[0]['urlref'] == 'http://clearcode.cc/welcome'
assert hits[0]['_ref'] == 'http://piwik.org/thank-you-all/'
assert hits[0]['_idts'] == '1360047661'
assert hits[0]['java'] == '1'
assert hits[0]['res'] == '1680x1050'
assert hits[0]['idsite'] == '1'
assert hits[0]['realp'] == '0'
assert hits[0]['wma'] == '1'
assert hits[0]['_idvc'] == '1'
assert hits[0]['action_name'] == 'Clearcode - Web and Mobile Development | Technology With Passion'
assert hits[0]['cookie'] == '1'
assert hits[0]['rec'] == '1'
assert hits[0]['qt'] == '1'
assert hits[0]['url'] == 'http://clearcode.cc/'
assert hits[0]['h'] == '17'
assert hits[0]['m'] == '31'
assert hits[0]['s'] == '25'
assert hits[0]['r'] == '983420'
assert hits[0]['gears'] == '0'
assert hits[0]['fla'] == '1'
assert hits[0]['pdf'] == '1'
assert hits[0]['_id'] == '1da79fc743e8bcc4'
assert hits[0]['dir'] == '1'
assert hits[0]['_refts'] == '1360047661'

assert hits[1]['_idn'] == '0'
assert hits[1]['ag'] == '1'
assert hits[1]['_viewts'] == '1360047661'
assert hits[1]['urlref'] == 'http://clearcode.cc/welcome'
assert hits[1]['_ref'] == 'http://piwik.org/thank-you-all/'
assert hits[1]['_idts'] == '1360047661'
assert hits[1]['java'] == '1'
assert hits[1]['res'] == '1680x1050'
assert hits[1]['idsite'] == '1'
assert hits[1]['realp'] == '0'
assert hits[1]['wma'] == '1'
assert hits[1]['_idvc'] == '1'
assert hits[1]['action_name'] == 'AdviserBrief - Track Your Investments and Plan Financial Future | Clearcode'
assert hits[1]['cookie'] == '1'
assert hits[1]['rec'] == '1'
assert hits[1]['qt'] == '1'
assert hits[1]['url'] == 'http://clearcode.cc/case/adviserbrief-track-your-investments-and-plan-financial-future/'
assert hits[1]['h'] == '17'
assert hits[1]['m'] == '31'
assert hits[1]['s'] == '40'
assert hits[1]['r'] == '109464'
assert hits[1]['gears'] == '0'
assert hits[1]['fla'] == '1'
assert hits[1]['pdf'] == '1'
assert hits[1]['_id'] == '1da79fc743e8bcc4'
assert hits[1]['dir'] == '1'
assert hits[1]['_refts'] == '1360047661'

assert hits[2]['_idn'] == '0'
assert hits[2]['ag'] == '1'
assert hits[2]['_viewts'] == '1360047661'
assert hits[2]['urlref'] == 'http://clearcode.cc/welcome'
assert hits[2]['_ref'] == 'http://piwik.org/thank-you-all/'
assert hits[2]['_idts'] == '1360047661'
assert hits[2]['java'] == '1'
assert hits[2]['res'] == '1680x1050'
assert hits[2]['idsite'] == '1'
assert hits[2]['realp'] == '0'
assert hits[2]['wma'] == '1'
assert hits[2]['_idvc'] == '1'
assert hits[2]['action_name'] == 'ATL Apps - American Tailgating League Mobile Android IOS Games | Clearcode'
assert hits[2]['cookie'] == '1'
assert hits[2]['rec'] == '1'
assert hits[2]['qt'] == '1'
assert hits[2]['url'] == 'http://clearcode.cc/case/atl-apps-mobile-android-ios-games/'
assert hits[2]['h'] == '17'
assert hits[2]['m'] == '31'
assert hits[2]['s'] == '46'
assert hits[2]['r'] == '080064'
assert hits[2]['gears'] == '0'
assert hits[2]['fla'] == '1'
assert hits[2]['pdf'] == '1'
assert hits[2]['_id'] == '1da79fc743e8bcc4'
assert hits[2]['dir'] == '1'
assert hits[2]['_refts'] == '1360047661'

0 comments on commit dfd779b

Please sign in to comment.