Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds replay-tracking option to import_logs script. #28

Merged
merged 3 commits into from
Feb 20, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions misc/log-analytics/import_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import time
import urllib
import urllib2
import urlparse

try:
import json
Expand Down Expand Up @@ -351,6 +352,11 @@ def _create_parser(self):
'--recorder-max-payload-size', dest='recorder_max_payload_size', default=200, type='int',
help="Maximum number of log entries to record in one tracking request (default: %default). "
)
option_parser.add_option(
'--replay-tracking', dest='replay_tracking',
action='store_true', default=False,
help="Replay piwik.php requests found in custom logs (only piwik.php requests expected)"
)
option_parser.add_option(
'--output', dest='output',
help="Redirect output (stdout and stderr) to the specified file"
Expand Down Expand Up @@ -1408,6 +1414,16 @@ def invalid_line(line, reason):
Recorder.add_hits(hits)
hits = []

if config.options.replay_tracking:
# we need a query string and we only consider requests with piwik.php
if hit.query_string and hit.path.lower().endswith('piwik.php'):
query_arguments = urlparse.parse_qs(hit.query_string)
if "idsite" in query_arguments:
try:
hit.args.update((k, v.pop().encode('raw_unicode_escape').decode(config.options.encoding)) for k, v in query_arguments.iteritems())
except UnicodeDecodeError:
invalid_line(line, 'invalid encoding')
continue
# add last chunk of hits
if len(hits) > 0:
Recorder.add_hits(hits)
Expand Down
3 changes: 3 additions & 0 deletions misc/log-analytics/tests/logs_to_tests.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
203.38.78.246 - - [05/Feb/2013:07:01:26 +0000] "GET /piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050 HTTP/1.1" 200 192 "http://clearcode.cc/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17"
203.38.78.246 - - [05/Feb/2013:07:01:41 +0000] "GET /piwik.php?action_name=AdviserBrief%20-%20Track%20Your%20Investments%20and%20Plan%20Financial%20Future%20%7C%20Clearcode&idsite=1&rec=1&r=109464&h=17&m=31&s=40&url=http%3A%2F%2Fclearcode.cc%2Fcase%2Fadviserbrief-track-your-investments-and-plan-financial-future%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050 HTTP/1.1" 200 192 "http://clearcode.cc/case/adviserbrief-track-your-investments-and-plan-financial-future" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17"
203.38.78.246 - - [05/Feb/2013:07:01:46 +0000] "GET /piwik.php?action_name=ATL%20Apps%20-%20American%20Tailgating%20League%20Mobile%20Android%20IOS%20Games%20%7C%20Clearcode&idsite=1&rec=1&r=080064&h=17&m=31&s=46&url=http%3A%2F%2Fclearcode.cc%2Fcase%2Fatl-apps-mobile-android-ios-games%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050 HTTP/1.1" 200 192 "http://clearcode.cc/case/atl-apps-mobile-android-ios-games" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17"
136 changes: 136 additions & 0 deletions misc/log-analytics/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,139 @@ def _test(format_name):
f = functools.partial(_test, format_name)
f.description = 'Testing autodetection of format ' + format_name
yield f


class Options(object):
"""Mock config options necessary to run checkers from Parser class."""
debug = False
encoding = 'utf-8'
log_hostname = 'foo'
query_string_delimiter = '?'
piwik_token_auth = False
piwik_url = 'http://example.com'
recorder_max_payload_size = 200
replay_tracking = True
show_progress = False
skip = False
hostnames = []
excluded_paths = []
excluded_useragents = []
enable_bots = []


class Config(object):
"""Mock configuration."""
options = Options()
format = import_logs.FORMATS['ncsa_extended']


class Resolver(object):
"""Mock resolver which doesn't check connection to real piwik."""
def check_format(self, format_):
pass


class Recorder(object):
"""Mock recorder which collects hits but doesn't put their in database."""
recorders = []

@classmethod
def add_hits(cls, hits):
cls.recorders.extend(hits)


def test_replay_tracking_arguments():
"""Test data parsing from sample log file."""
file_ = 'logs_to_tests.log'
import_logs.stats = import_logs.Statistics()
import_logs.config = Config()
import_logs.resolver = Resolver()
import_logs.Recorder = Recorder()
import_logs.parser = import_logs.Parser()
import_logs.parser.parse(file_)

hits = [hit.args for hit in import_logs.Recorder.recorders]

assert hits[0]['_idn'] == '0'
assert hits[0]['ag'] == '1'
assert hits[0]['_viewts'] == '1360047661'
assert hits[0]['urlref'] == 'http://clearcode.cc/welcome'
assert hits[0]['_ref'] == 'http://piwik.org/thank-you-all/'
assert hits[0]['_idts'] == '1360047661'
assert hits[0]['java'] == '1'
assert hits[0]['res'] == '1680x1050'
assert hits[0]['idsite'] == '1'
assert hits[0]['realp'] == '0'
assert hits[0]['wma'] == '1'
assert hits[0]['_idvc'] == '1'
assert hits[0]['action_name'] == 'Clearcode - Web and Mobile Development | Technology With Passion'
assert hits[0]['cookie'] == '1'
assert hits[0]['rec'] == '1'
assert hits[0]['qt'] == '1'
assert hits[0]['url'] == 'http://clearcode.cc/'
assert hits[0]['h'] == '17'
assert hits[0]['m'] == '31'
assert hits[0]['s'] == '25'
assert hits[0]['r'] == '983420'
assert hits[0]['gears'] == '0'
assert hits[0]['fla'] == '1'
assert hits[0]['pdf'] == '1'
assert hits[0]['_id'] == '1da79fc743e8bcc4'
assert hits[0]['dir'] == '1'
assert hits[0]['_refts'] == '1360047661'

assert hits[1]['_idn'] == '0'
assert hits[1]['ag'] == '1'
assert hits[1]['_viewts'] == '1360047661'
assert hits[1]['urlref'] == 'http://clearcode.cc/welcome'
assert hits[1]['_ref'] == 'http://piwik.org/thank-you-all/'
assert hits[1]['_idts'] == '1360047661'
assert hits[1]['java'] == '1'
assert hits[1]['res'] == '1680x1050'
assert hits[1]['idsite'] == '1'
assert hits[1]['realp'] == '0'
assert hits[1]['wma'] == '1'
assert hits[1]['_idvc'] == '1'
assert hits[1]['action_name'] == 'AdviserBrief - Track Your Investments and Plan Financial Future | Clearcode'
assert hits[1]['cookie'] == '1'
assert hits[1]['rec'] == '1'
assert hits[1]['qt'] == '1'
assert hits[1]['url'] == 'http://clearcode.cc/case/adviserbrief-track-your-investments-and-plan-financial-future/'
assert hits[1]['h'] == '17'
assert hits[1]['m'] == '31'
assert hits[1]['s'] == '40'
assert hits[1]['r'] == '109464'
assert hits[1]['gears'] == '0'
assert hits[1]['fla'] == '1'
assert hits[1]['pdf'] == '1'
assert hits[1]['_id'] == '1da79fc743e8bcc4'
assert hits[1]['dir'] == '1'
assert hits[1]['_refts'] == '1360047661'

assert hits[2]['_idn'] == '0'
assert hits[2]['ag'] == '1'
assert hits[2]['_viewts'] == '1360047661'
assert hits[2]['urlref'] == 'http://clearcode.cc/welcome'
assert hits[2]['_ref'] == 'http://piwik.org/thank-you-all/'
assert hits[2]['_idts'] == '1360047661'
assert hits[2]['java'] == '1'
assert hits[2]['res'] == '1680x1050'
assert hits[2]['idsite'] == '1'
assert hits[2]['realp'] == '0'
assert hits[2]['wma'] == '1'
assert hits[2]['_idvc'] == '1'
assert hits[2]['action_name'] == 'ATL Apps - American Tailgating League Mobile Android IOS Games | Clearcode'
assert hits[2]['cookie'] == '1'
assert hits[2]['rec'] == '1'
assert hits[2]['qt'] == '1'
assert hits[2]['url'] == 'http://clearcode.cc/case/atl-apps-mobile-android-ios-games/'
assert hits[2]['h'] == '17'
assert hits[2]['m'] == '31'
assert hits[2]['s'] == '46'
assert hits[2]['r'] == '080064'
assert hits[2]['gears'] == '0'
assert hits[2]['fla'] == '1'
assert hits[2]['pdf'] == '1'
assert hits[2]['_id'] == '1da79fc743e8bcc4'
assert hits[2]['dir'] == '1'
assert hits[2]['_refts'] == '1360047661'