-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcreate_flagged_data.py
159 lines (134 loc) · 5.09 KB
/
create_flagged_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
"""
This module allows to create statistical data about flagged revisions.
It will store all data in the specified path. The data can be separated by
month and day (total) or by month (catogory-wise). It will produce a tab
separated file with number of active revisions (not passive ones that you get
by changing an article and the user id over the specified time periond and
articles).
create_data_daily Daily revision data for all articles
create_data_monthly Monthly revision data for all articles
create_data_monthly_cat Monthly revision data for all articles in category
tree. It needs a table pages_catname which contains
the ids of all pages in question. This table can be
generated using create_cat_tables
Variables:
path -- where to store the files
slow_ok_text -- text to put into the query when slow queries are allowed
"""
import db_api
#import MySQLdb, create_flagged_data
#db = MySQLdb.connect(read_default_file="/home/hroest/.my.cnf")
##create_flagged_data.create_cat_tables( db, 'Chemie' )
#create_flagged_data.create_data_monthly_cat( db, 2010, 5, 'Chemie' )
from general_lib import flagged_data_path as path
slow_ok_text = { True : " /* SLOW_OK */ " , False : "" }
def create_data_daily(db, year, month, day, slow_ok = True):
"""Creates the files with the per-month data in it. """
query = \
"""
select count(*),fr_user from dewiki_p.flaggedrevs
where
( fr_flags = 'dynamic' or fr_flags = ',dynamic' )
and fr_timestamp like '%s%02d%02d%%'
group by fr_user
order by count(*)
%s
""" % (year, month, day, slow_ok_text[slow_ok] )
myfile = path + 'all_month_day%s%02d%02d'% (year, month, day )
_create_data(db, query, myfile)
def create_data_monthly(db, year, month, slow_ok = True):
"""Creates the files with the per-month data in it. """
query = \
"""
select count(*),fr_user from dewiki_p.flaggedrevs
where
( fr_flags = 'dynamic' or fr_flags = ',dynamic' )
and fr_timestamp like '%s%02d%%'
group by fr_user
order by count(*)
%s
""" % (year, month, slow_ok_text[slow_ok])
myfile = path + 'all_month_users_%s%02d'% (year, month )
_create_data(db, query, myfile)
def create_data_all_year(db, year, slow_ok = True):
"""Creates the files with per-month data in it. """
query = \
"""
select count(*),fr_user from dewiki_p.flaggedrevs
where
( fr_flags = 'dynamic' or fr_flags = ',dynamic' )
and fr_timestamp like '%s%%'
group by fr_user
order by count(*)
%s
""" % (year, slow_ok_text[slow_ok])
myfile = path + 'all_year_users_%s' % year
_create_data(db, query, myfile)
def create_data_all_time(db, slow_ok = True):
"""Creates the files with all time data in it. """
query = \
"""
select count(*),fr_user from dewiki_p.flaggedrevs
where
( fr_flags = 'dynamic' or fr_flags = ',dynamic' )
group by fr_user
order by count(*)
%s
""" % (slow_ok_text[slow_ok])
myfile = path + 'all_time'
_create_data(db, query, myfile)
def create_data_monthly_cat(db, year, month, cat, slow_ok = True):
"""Creates the files with the per-month data in it. Restricted to one cat"""
query = \
"""
select count(*),fr_user from dewiki_p.flaggedrevs
where
( fr_flags = 'dynamic' or fr_flags = ',dynamic' )
and fr_timestamp like '%s%02d%%'
and fr_page_id in (select * from %s)
group by fr_user
order by count(*)
%s
""" % (year, month, "u_hroest.pages_" + cat, slow_ok_text[slow_ok])
myfile = path + 'all_month_users_%s%02d%s'% (year, month, cat)
#_create_data(db, query, myfile)
f = open(myfile + '_tmp', 'w')
print "writing into " , f.name
cursor = db.cursor()
cursor.execute( query )
rows = cursor.fetchall()
f.write( 'count(*)\tfr_user\n' )
for r in rows:
f.write( '%s\t%s\n' % (r[0], r[1]) )
f.close()
#after closing we move the tmp file to the real location
import os
print 'moving now'
cmd = 'mv %s %s' % (myfile + '_tmp', myfile )
print cmd
os.system( cmd )
def _create_data(db, query, myfile):
f = open(myfile + '_tmp', 'w')
print "writing into " , f.name
cursor = db.cursor()
cursor.execute( query )
rows = cursor.fetchall()
f.write( 'count(*)\tfr_user\n' )
for r in rows:
f.write( '%s\t%s\n' % (r[0], r[1]) )
f.close()
#after closing we move the tmp file to the real location
import os
print 'moving now'
cmd = 'mv %s %s' % (myfile + '_tmp', myfile )
print cmd
os.system( cmd )
def create_cat_tables(db, name):
#db = MySQLdb.connect(read_default_file="/home/hroest/.my.cnf")
c = db.cursor()
c.execute( 'drop table if exists u_hroest.pages_%s' % name)
c.execute( "create table u_hroest.pages_%s( id_page INT)" % name)
result = db_api.db_get_articles_in_category_object( 'de' , name, c, depth = -100 )
ids = [page.id for page in result]
prepared = "insert into u_hroest.pages_%s" % name
c.executemany( prepared + " (id_page) values (%s)", ids)