diff --git a/misc/log-analytics/import_logs.py b/misc/log-analytics/import_logs.py index b1196f7731a..265a0685510 100755 --- a/misc/log-analytics/import_logs.py +++ b/misc/log-analytics/import_logs.py @@ -30,6 +30,7 @@ import time import urllib import urllib2 +import urlparse try: import json @@ -351,6 +352,11 @@ def _create_parser(self): '--recorder-max-payload-size', dest='recorder_max_payload_size', default=200, type='int', help="Maximum number of log entries to record in one tracking request (default: %default). " ) + option_parser.add_option( + '--replay-tracking', dest='replay_tracking', + action='store_true', default=False, + help="Replay piwik.php requests found in custom logs (only piwik.php requests expected)" + ) option_parser.add_option( '--output', dest='output', help="Redirect output (stdout and stderr) to the specified file" @@ -1408,6 +1414,16 @@ def invalid_line(line, reason): Recorder.add_hits(hits) hits = [] + if config.options.replay_tracking: + # we need a query string and we only consider requests with piwik.php + if hit.query_string and hit.path.lower().endswith('piwik.php'): + query_arguments = urlparse.parse_qs(hit.query_string) + if "idsite" in query_arguments: + try: + hit.args.update((k, v.pop().encode('raw_unicode_escape').decode(config.options.encoding)) for k, v in query_arguments.iteritems()) + except UnicodeDecodeError: + invalid_line(line, 'invalid encoding') + continue # add last chunk of hits if len(hits) > 0: Recorder.add_hits(hits) diff --git a/misc/log-analytics/tests/logs_to_tests.log b/misc/log-analytics/tests/logs_to_tests.log new file mode 100644 index 00000000000..bbb08d6cf68 --- /dev/null +++ b/misc/log-analytics/tests/logs_to_tests.log @@ -0,0 +1,3 @@ +203.38.78.246 - - [05/Feb/2013:07:01:26 +0000] "GET /piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050 HTTP/1.1" 200 192 "http://clearcode.cc/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17" +203.38.78.246 - - [05/Feb/2013:07:01:41 +0000] "GET /piwik.php?action_name=AdviserBrief%20-%20Track%20Your%20Investments%20and%20Plan%20Financial%20Future%20%7C%20Clearcode&idsite=1&rec=1&r=109464&h=17&m=31&s=40&url=http%3A%2F%2Fclearcode.cc%2Fcase%2Fadviserbrief-track-your-investments-and-plan-financial-future%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050 HTTP/1.1" 200 192 "http://clearcode.cc/case/adviserbrief-track-your-investments-and-plan-financial-future" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17" +203.38.78.246 - - [05/Feb/2013:07:01:46 +0000] "GET /piwik.php?action_name=ATL%20Apps%20-%20American%20Tailgating%20League%20Mobile%20Android%20IOS%20Games%20%7C%20Clearcode&idsite=1&rec=1&r=080064&h=17&m=31&s=46&url=http%3A%2F%2Fclearcode.cc%2Fcase%2Fatl-apps-mobile-android-ios-games%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050 HTTP/1.1" 200 192 "http://clearcode.cc/case/atl-apps-mobile-android-ios-games" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17" diff --git a/misc/log-analytics/tests/tests.py b/misc/log-analytics/tests/tests.py index 963dfd243ca..55b71943172 100644 --- a/misc/log-analytics/tests/tests.py +++ b/misc/log-analytics/tests/tests.py @@ -12,3 +12,139 @@ def _test(format_name): f = functools.partial(_test, format_name) f.description = 'Testing autodetection of format ' + format_name yield f + + +class Options(object): + """Mock config options necessary to run checkers from Parser class.""" + debug = False + encoding = 'utf-8' + log_hostname = 'foo' + query_string_delimiter = '?' + piwik_token_auth = False + piwik_url = 'http://example.com' + recorder_max_payload_size = 200 + replay_tracking = True + show_progress = False + skip = False + hostnames = [] + excluded_paths = [] + excluded_useragents = [] + enable_bots = [] + + +class Config(object): + """Mock configuration.""" + options = Options() + format = import_logs.FORMATS['ncsa_extended'] + + +class Resolver(object): + """Mock resolver which doesn't check connection to real piwik.""" + def check_format(self, format_): + pass + + +class Recorder(object): + """Mock recorder which collects hits but doesn't put their in database.""" + recorders = [] + + @classmethod + def add_hits(cls, hits): + cls.recorders.extend(hits) + + +def test_replay_tracking_arguments(): + """Test data parsing from sample log file.""" + file_ = 'logs_to_tests.log' + import_logs.stats = import_logs.Statistics() + import_logs.config = Config() + import_logs.resolver = Resolver() + import_logs.Recorder = Recorder() + import_logs.parser = import_logs.Parser() + import_logs.parser.parse(file_) + + hits = [hit.args for hit in import_logs.Recorder.recorders] + + assert hits[0]['_idn'] == '0' + assert hits[0]['ag'] == '1' + assert hits[0]['_viewts'] == '1360047661' + assert hits[0]['urlref'] == 'http://clearcode.cc/welcome' + assert hits[0]['_ref'] == 'http://piwik.org/thank-you-all/' + assert hits[0]['_idts'] == '1360047661' + assert hits[0]['java'] == '1' + assert hits[0]['res'] == '1680x1050' + assert hits[0]['idsite'] == '1' + assert hits[0]['realp'] == '0' + assert hits[0]['wma'] == '1' + assert hits[0]['_idvc'] == '1' + assert hits[0]['action_name'] == 'Clearcode - Web and Mobile Development | Technology With Passion' + assert hits[0]['cookie'] == '1' + assert hits[0]['rec'] == '1' + assert hits[0]['qt'] == '1' + assert hits[0]['url'] == 'http://clearcode.cc/' + assert hits[0]['h'] == '17' + assert hits[0]['m'] == '31' + assert hits[0]['s'] == '25' + assert hits[0]['r'] == '983420' + assert hits[0]['gears'] == '0' + assert hits[0]['fla'] == '1' + assert hits[0]['pdf'] == '1' + assert hits[0]['_id'] == '1da79fc743e8bcc4' + assert hits[0]['dir'] == '1' + assert hits[0]['_refts'] == '1360047661' + + assert hits[1]['_idn'] == '0' + assert hits[1]['ag'] == '1' + assert hits[1]['_viewts'] == '1360047661' + assert hits[1]['urlref'] == 'http://clearcode.cc/welcome' + assert hits[1]['_ref'] == 'http://piwik.org/thank-you-all/' + assert hits[1]['_idts'] == '1360047661' + assert hits[1]['java'] == '1' + assert hits[1]['res'] == '1680x1050' + assert hits[1]['idsite'] == '1' + assert hits[1]['realp'] == '0' + assert hits[1]['wma'] == '1' + assert hits[1]['_idvc'] == '1' + assert hits[1]['action_name'] == 'AdviserBrief - Track Your Investments and Plan Financial Future | Clearcode' + assert hits[1]['cookie'] == '1' + assert hits[1]['rec'] == '1' + assert hits[1]['qt'] == '1' + assert hits[1]['url'] == 'http://clearcode.cc/case/adviserbrief-track-your-investments-and-plan-financial-future/' + assert hits[1]['h'] == '17' + assert hits[1]['m'] == '31' + assert hits[1]['s'] == '40' + assert hits[1]['r'] == '109464' + assert hits[1]['gears'] == '0' + assert hits[1]['fla'] == '1' + assert hits[1]['pdf'] == '1' + assert hits[1]['_id'] == '1da79fc743e8bcc4' + assert hits[1]['dir'] == '1' + assert hits[1]['_refts'] == '1360047661' + + assert hits[2]['_idn'] == '0' + assert hits[2]['ag'] == '1' + assert hits[2]['_viewts'] == '1360047661' + assert hits[2]['urlref'] == 'http://clearcode.cc/welcome' + assert hits[2]['_ref'] == 'http://piwik.org/thank-you-all/' + assert hits[2]['_idts'] == '1360047661' + assert hits[2]['java'] == '1' + assert hits[2]['res'] == '1680x1050' + assert hits[2]['idsite'] == '1' + assert hits[2]['realp'] == '0' + assert hits[2]['wma'] == '1' + assert hits[2]['_idvc'] == '1' + assert hits[2]['action_name'] == 'ATL Apps - American Tailgating League Mobile Android IOS Games | Clearcode' + assert hits[2]['cookie'] == '1' + assert hits[2]['rec'] == '1' + assert hits[2]['qt'] == '1' + assert hits[2]['url'] == 'http://clearcode.cc/case/atl-apps-mobile-android-ios-games/' + assert hits[2]['h'] == '17' + assert hits[2]['m'] == '31' + assert hits[2]['s'] == '46' + assert hits[2]['r'] == '080064' + assert hits[2]['gears'] == '0' + assert hits[2]['fla'] == '1' + assert hits[2]['pdf'] == '1' + assert hits[2]['_id'] == '1da79fc743e8bcc4' + assert hits[2]['dir'] == '1' + assert hits[2]['_refts'] == '1360047661'