diff --git a/paramgenerator/generateparamsbi.jl b/paramgenerator/generateparamsbi.jl
new file mode 100644
index 000000000..0a3e0d597
--- /dev/null
+++ b/paramgenerator/generateparamsbi.jl
@@ -0,0 +1,204 @@
+# class ParamsWriter:
+#    def __init__(self, outdir, number, param_nameFactors):
+#       self.file = codecs.open(outdir+"/bi_"+str(number)+"_param.txt", "w",encoding="utf-8")
+#       for i in range(0,len(param_nameFactors)):
+#          if i>0:
+#             self.file.write("|")
+#          self.file.write(param_nameFactors[i])
+#       self.file.write("\n")
+
+#    def append(self, params):
+#       for i, param in enumerate(params):
+#          if i>0:
+#             self.file.write("|")
+#          self.file.write(param)
+#       self.file.write("\n")
+
+# def key_params(sample, lower_bound, upper_bound):
+#    results = []
+#    for key, count in sample:
+#       if count > lower_bound and count < upper_bound:
+#          results.append([key, count])
+#    return results
+
+# def serialize_q6(outdir, tagFactors):
+#    writer = ParamsWriter(outdir, 6, ["tag"])
+#    for tag, count in tagFactors:
+#       writer.append([tag])
+
+#    # read precomputed counts from files
+#    (personFactors, countryFactors, tagFactors, tagClassFactors, nameFactors, givennameFactors,  ts, postsHisto) = \
+#       readfactors.load(personFactorFiles,activityFactorFiles, friendsFiles)
+
+#    tag_posts = tagFactors
+#    tag_posts.sort(key=lambda x: x[1], reverse=True)
+
+#    total_posts = 0
+#    for day, count in tag_posts:
+#       total_posts += count
+
+#    serialize_q6 (outdir, key_params(tag_posts, total_posts/1300, total_posts/900))
+
+
+## readfactors.py
+
+
+# for inputFileName in activityFactorFiles:
+#     with codecs.open(inputFileName, "r", "utf-8") as f:
+#         countryCount = int(f.readline())
+#         for i in range(countryCount):
+#             line = f.readline().split(",")
+#             country = line[0]
+#             if not countryFactors.existParam(country):
+#                 countryFactors.addNewParam(country)
+#             countryFactors.addValue(country, "p", int(line[1]))
+
+#         tagClassCount = int(f.readline())
+#         for i in range(tagClassCount):
+#             line = f.readline().split(",")
+#             tagClass = line[0]
+#             if not tagClass in tagClassFactors:
+#                 tagClassFactors[tagClass] = 0
+#             tagClassFactors[tagClass] += int(line[2])
+
+#         tagCount = int(f.readline())
+#         for i in range(tagCount):
+#             line = f.readline()
+#             count = line[1+line.rfind(","):]
+#             name = line[:line.rfind(",")]
+#             if not name in tagFactors:
+#                 tagFactors[name] = 0
+#             tagFactors[name] += int(count)
+
+#         nameCount = int(f.readline())
+#         for i in range(nameCount):
+#             line = f.readline().split(",")
+#             name = line[0]
+#             if not name in nameFactors:
+#                 nameFactors[name] = 0
+#             nameFactors[name] += int(line[1])
+
+#         for i in range(4):
+#             t = f.readline().rstrip()
+#             if timestamp[i] == 0 and t != 'null':
+#                 timestamp[i] = int(t)
+
+# loadFriends(friendFiles, results)
+
+# return (results, countryFactors, tagFactors.items(), tagClassFactors.items(), nameFactors.items(), givennameFactors,timestamp, postsHisto)
+
+
+
+##############################################################################
+# Julia code starts from here
+##############################################################################
+
+# if length(ARGS) < 2
+#     println("arguments: <input dir> <output dir>")
+#     exit(1)
+# end
+
+# indir = ARGS[1] * "/"
+# outdir = ARGS[2] * "/"
+
+cd("/home/szarnyasg/git/ldbc/snb/ldbc_snb_datagen/paramgenerator")
+indir = "../hadoop/"
+outdir = "../substitution_out/"
+
+files = readdir(indir)
+activityFactorFiles = filter(f -> endswith(f, "activityFactors.txt"), files)
+#personFactorFiles = filter!(r"personFactors\.txt$", files)
+#friendsFiles = filter!(r"^m0friendList", files)
+
+println(activityFactorFiles)
+#println(personFactorFiles)
+#println(friendsFiles)
+
+# DefaultDict
+#import Pkg; Pkg.add("DataStructures")
+using DataStructures
+
+countryFactors = Dict{String,Int64}()
+tagClassFactors = DefaultDict{String,Int64}(0)
+tagFactors = DefaultDict{String,Int64}(0)
+nameFactors = DefaultDict{String,Int64}(0)
+
+activityFactorFile = activityFactorFiles[1]
+open(indir * activityFactorFile) do f
+    # read countryFactors
+    # example: India,464151
+    countryCount = parse(Int64, readline(f))
+    for i = 1:countryCount
+        line = split(readline(f), ",")
+        country = line[1]
+        population = parse(Int64, line[2])
+        countryFactors[country] = population
+    end
+
+    # read tag classes
+    # example: Thing,Thing,29737
+    tagClassCount = parse(Int64, readline(f))
+    for i = 1:tagClassCount
+        line = split(readline(f), ",")
+        tagClass = line[1]
+        count = parse(Int64, line[3])
+        tagClassFactors[tagClass] += count
+    end
+
+    # read tagFactors
+    # example: Hamid_Karzai,8815
+    # example: Frederick_III,_Holy_Roman_Emperor,19
+    tagCount = parse(Int64, readline(f))
+    for i = 1:tagCount
+        line = split(readline(f), ",")
+        tag = line[1]
+        count = parse(Int64, line[end])
+        tagFactors[tag] += + count
+    end
+
+    # read nameFactors
+    # example: Daisuke,20
+    nameCount = parse(Int64, readline(f))
+    for i = 1:nameCount
+        line = split(readline(f), ",")
+        name = line[1]
+        count = parse(Int64, line[2])
+        nameFactors[name] += + count
+    end
+
+    # the last 4 lines are timestamps
+    # TODO: copy the behaviour of the Py code
+    # if timestamp[i] == 0 and t != 'null':
+    #     timestamp[i] = int(t)
+    for i = 1:4
+        parse(Int64, readline(f))
+    end
+end
+
+countryFactors
+tagClassFactors
+tagFactors
+nameFactors
+
+tag_posts = tagFactors
+tag_posts = sort(collect(tag_posts), by=x->x[2], rev=true)
+
+total_posts = 0
+for (tag, count) in tag_posts
+   global total_posts += count
+end
+
+
+#def key_params(sample, lower_bound, upper_bound):
+#   results = []
+#   for key, count in sample:
+#      if count > lower_bound and count < upper_bound:
+#         results.append([key, count])
+#   return results
+
+function key_params(sample, lower_bound, upper_bound)
+    filter(e -> (lower_bound < e[2] && e[2] < upper_bound), sample)
+end
+
+bi6 = key_params(tag_posts, total_posts/1300, total_posts/900)
+bi6
diff --git a/paramgenerator/generateparamsbi.py b/paramgenerator/generateparamsbi.py
index a46831c28..7687ffcd3 100755
--- a/paramgenerator/generateparamsbi.py
+++ b/paramgenerator/generateparamsbi.py
@@ -10,11 +10,11 @@
 import readfactors
 from timeparameters import *
 
-START_DATE=datetime.strptime("2010-01-01", "%Y-%m-%d")
-END_DATE=datetime.strptime("2013-01-01", "%Y-%m-%d")
-
-def format_date(date):
-   return int(time.mktime(date.timetuple())*1000)
+# START_DATE=datetime.strptime("2010-01-01", "%Y-%m-%d")
+# END_DATE=datetime.strptime("2013-01-01", "%Y-%m-%d")
+#
+# def format_date(date):
+#   return int(time.mktime(date.timetuple())*1000)
 
 
 class ParamsWriter:
@@ -34,62 +34,62 @@ def append(self, params):
       self.file.write("\n")
 
 
-def post_date_right_open_range_params(sample, lower_bound, upper_bound):
-   results = []
-   for ix in range(0, len(sample)):
-      start_offset = sample[ix][0]
-      count_sum = 0
-      for offset, count in sample[ix:]:
-         count_sum += count
-      if count_sum > lower_bound and count_sum < upper_bound:
-         results.append([start_offset, count_sum])
-   return results
-
-def post_date_range_params(sample, lower_bound, upper_bound):
-   results = []
-   for ix in range(0, len(sample)):
-      start_offset = sample[ix][0]
-      count_sum = 0
-      for offset, count in sample[ix:]:
-         count_sum += count
-         if count_sum > lower_bound and count_sum < upper_bound:
-            results.append([[start_offset, offset], count_sum])
-   return results
-
-def post_month_params(sample, lower_bound, upper_bound):
-   results = []
-   for ix in range(0, len(sample)/4):
-      start_ix = ix*4
-      count_sum = 0
-      for offset, count in sample[start_ix:start_ix+4]:
-         count_sum += count
-      if count_sum > lower_bound and count_sum < upper_bound:
-         start_day = sample[start_ix][0]
-         end_day = sample[start_ix+4][0]
-         results.append([[start_day, end_day], count_sum])
-   return results
-
-def enumerate_path_bounds(minLength,maxLength,minDifference):
-  results = []
-  for i in range(minLength, maxLength):
-     for j in range(i+minDifference,maxLength):
-        results.append([i,j])
-  return results
-
-def prob_language_codes():
-  results = []
-  results.append(["ar"])
-  for i in range(0, 2):
-     results.append(["tk"])
-  for i in range(0, 8):
-     results.append(["uz"])
-  for i in range(0, 2):
-     results.append(["uz","tk"])
-  return results
-
-def prob_post_lengths():
-  results = [20,40,113,97,240]
-  return results
+# def post_date_right_open_range_params(sample, lower_bound, upper_bound):
+#    results = []
+#    for ix in range(0, len(sample)):
+#       start_offset = sample[ix][0]
+#       count_sum = 0
+#       for offset, count in sample[ix:]:
+#          count_sum += count
+#       if count_sum > lower_bound and count_sum < upper_bound:
+#          results.append([start_offset, count_sum])
+#    return results
+#
+# def post_date_range_params(sample, lower_bound, upper_bound):
+#    results = []
+#    for ix in range(0, len(sample)):
+#       start_offset = sample[ix][0]
+#       count_sum = 0
+#       for offset, count in sample[ix:]:
+#          count_sum += count
+#          if count_sum > lower_bound and count_sum < upper_bound:
+#             results.append([[start_offset, offset], count_sum])
+#    return results
+#
+# def post_month_params(sample, lower_bound, upper_bound):
+#    results = []
+#    for ix in range(0, len(sample)/4):
+#       start_ix = ix*4
+#       count_sum = 0
+#       for offset, count in sample[start_ix:start_ix+4]:
+#          count_sum += count
+#       if count_sum > lower_bound and count_sum < upper_bound:
+#          start_day = sample[start_ix][0]
+#          end_day = sample[start_ix+4][0]
+#          results.append([[start_day, end_day], count_sum])
+#    return results
+#
+# def enumerate_path_bounds(minLength,maxLength,minDifference):
+#   results = []
+#   for i in range(minLength, maxLength):
+#      for j in range(i+minDifference,maxLength):
+#         results.append([i,j])
+#   return results
+
+# def prob_language_codes():
+#   results = []
+#   results.append(["ar"])
+#   for i in range(0, 2):
+#      results.append(["tk"])
+#   for i in range(0, 8):
+#      results.append(["uz"])
+#   for i in range(0, 2):
+#      results.append(["uz","tk"])
+#   return results
+#
+# def prob_post_lengths():
+#   results = [20,40,113,97,240]
+#   return results
 
 def key_params(sample, lower_bound, upper_bound):
    results = []
@@ -98,210 +98,210 @@ def key_params(sample, lower_bound, upper_bound):
          results.append([key, count])
    return results
 
-def serialize_q1(outdir, post_weeks):
-   writer = ParamsWriter(outdir, 1, ["date"])
-   for week, count in post_weeks:
-      writer.append([str(week)])
-
-def serialize_q2(outdir, countries, post_day_ranges):
-   writer = ParamsWriter(outdir, 2, ["date1", "date2", "country1", "country2"])
-   for day_range, count_post in post_day_ranges:
-      for ix in range(0,len(countries)):
-         country_1, count_1 = countries[ix]
-         for country_2, count_2 in countries[ix+1:]:
-            writer.append([str(day_range[0]),str(day_range[1]),country_1,country_2])
-
-def serialize_q3(outdir, post_months):
-   writer = ParamsWriter(outdir, 3, ["year", "month"] )
-   for post_month in post_months:
-      t = time.gmtime(post_month[0][0]/1000)
-      writer.append([str(t.tm_year), str(t.tm_mon)])
-
-def serialize_q4(outdir, tagclasses, countries):
-   writer = ParamsWriter(outdir, 4, ["tagClass", "country"])
-   for tag, count_a in tagclasses:
-      for country, count_b in countries:
-         writer.append([tag,country])
-
-def serialize_q5(outdir, countries):
-   writer = ParamsWriter(outdir, 5, ["country"])
-   for country, count in countries:
-      writer.append([country])
-
+# def serialize_q1(outdir, post_weeks):
+#    writer = ParamsWriter(outdir, 1, ["date"])
+#    for week, count in post_weeks:
+#       writer.append([str(week)])
+#
+# def serialize_q2(outdir, countries, post_day_ranges):
+#    writer = ParamsWriter(outdir, 2, ["date1", "date2", "country1", "country2"])
+#    for day_range, count_post in post_day_ranges:
+#       for ix in range(0,len(countries)):
+#          country_1, count_1 = countries[ix]
+#          for country_2, count_2 in countries[ix+1:]:
+#             writer.append([str(day_range[0]),str(day_range[1]),country_1,country_2])
+#
+# def serialize_q3(outdir, post_months):
+#    writer = ParamsWriter(outdir, 3, ["year", "month"] )
+#    for post_month in post_months:
+#       t = time.gmtime(post_month[0][0]/1000)
+#       writer.append([str(t.tm_year), str(t.tm_mon)])
+#
+# def serialize_q4(outdir, tagclasses, countries):
+#    writer = ParamsWriter(outdir, 4, ["tagClass", "country"])
+#    for tag, count_a in tagclasses:
+#       for country, count_b in countries:
+#          writer.append([tag,country])
+#
+# def serialize_q5(outdir, countries):
+#    writer = ParamsWriter(outdir, 5, ["country"])
+#    for country, count in countries:
+#       writer.append([country])
+#
 
 def serialize_q6(outdir, tags):
    writer = ParamsWriter(outdir, 6, ["tag"])
    for tag, count in tags:
       writer.append([tag])
 
-def serialize_q7(outdir, tags):
-   writer = ParamsWriter(outdir, 7, ["tag"])
-   for tag, count in tags:
-      writer.append([tag])
-
-def serialize_q8(outdir, tags):
-   writer = ParamsWriter(outdir, 8, ["tag"])
-   for tag, count in tags:
-      writer.append([tag])
-
-def serialize_q9(outdir, tagclasses):
-   writer = ParamsWriter(outdir, 9, ["tagClass1", "tagClass2", "threshold"])
-   for ix in range(0,len(tagclasses)):
-      tag_class_a, count_a = tagclasses[ix]
-      for tag_class_b, count_b in tagclasses[ix+1:]:
-         writer.append([tag_class_a, tag_class_b, str(200)])
-
-def serialize_q10(outdir, tags, post_weeks):
-   writer = ParamsWriter(outdir, 10, ["tag", "date"])
-   for tag, count in tags:
-      for week, count in post_weeks:
-         writer.append([tag, str(week)])
-
-def serialize_q11(outdir, countries, bad_words):
-   writer = ParamsWriter(outdir, 11, ["country", "blacklist"])
-   random.seed(1988+1)
-   # note: this approach keeps shuffling the bad_words list
-   for country, count in countries:
-      num_words = random.randint(1,min(len(bad_words),4));
-      random.shuffle(bad_words)
-      blacklist = bad_words[0:num_words]
-      writer.append([country,";".join(blacklist)])
-
-      num_words = random.randint(1,min(len(bad_words),10));
-      random.shuffle(bad_words)
-      blacklist = bad_words[0:num_words]
-      writer.append([country,";".join(blacklist)])
-
-      num_words = random.randint(1,min(len(bad_words),7));
-      random.shuffle(bad_words)
-      blacklist = bad_words[0:num_words]
-      writer.append([country,";".join(blacklist)])
-
-def serialize_q12(outdir, post_weeks):
-   writer = ParamsWriter(outdir, 12, ["date", "likeThreshold"])
-   for week, count in post_weeks:
-      writer.append([str(week),str(400)])
-
-def serialize_q13(outdir, countries):
-   writer = ParamsWriter(outdir, 13, ["country"])
-   for country, count in countries:
-      writer.append([country])
-
-def serialize_q14(outdir, creationdates):
-   writer = ParamsWriter(outdir, 14, ["startDate", "endDate"])
-   for creation, count in creationdates:
-      writer.append([str(creation[0]),str(creation[1])])
-
-def serialize_q15(outdir, countries):
-   writer = ParamsWriter(outdir, 15, ["country"])
-   for country, count in countries:
-      writer.append([country])
-
-def serialize_q16(outdir, persons, tagclasses, countries, path_bounds):
-   writer = ParamsWriter(outdir, 16, ["person", "country", "tagClass", "minPathDistance", "maxPathDistance"])
-   random.seed(1988+2)
-   for country, count_b in countries:
-      for tagClass, count_a in tagclasses:
-         for minDist, maxDist in path_bounds:
-            writer.append([str(persons[random.randint(0, len(persons))]), country, tagClass, str(minDist), str(maxDist)])
-
-def serialize_q17(outdir, countries):
-   writer = ParamsWriter(outdir, 17, ["country"])
-   for country, count in countries:
-      writer.append([country])
-
-def serialize_q18(outdir, post_weeks, lengths, languages):
-   writer = ParamsWriter(outdir, 18, ["date", "lengthThreshold", "languages"])
-   for week, count in post_weeks:
-      for length in lengths:
-         for language_set in languages:
-            writer.append([str(week), str(length), ";".join(language_set)])
-
-def serialize_q19(outdir, tagclasses):
-   PERS_DATE=datetime.strptime("1989-1-1", "%Y-%m-%d")
-   writer = ParamsWriter(outdir, 19, ["date", "tagClass1", "tagClass2"])
-   for ix in range(0,len(tagclasses)):
-      tag_class_a, count_a = tagclasses[ix]
-      for tag_class_b, count_b in tagclasses[ix+1:]:
-         writer.append([str(format_date(PERS_DATE)),tag_class_a, tag_class_b])
-
-def serialize_q20(outdir, tagclasses):
-   random.seed(1988+3)
-   writer = ParamsWriter(outdir, 20, ["tagClasses"])
-
-   tagclasses = [tc[0] for tc in tagclasses]
-
-   # I'm not sure this is the correct way to approach this problem,
-   # but it should work reasonably well
-   num_words = random.randint(1,min(len(tagclasses),4));
-   random.shuffle(tagclasses)
-   tcs = tagclasses[0:num_words]
-   writer.append([";".join(tcs)])
-
-   num_words = random.randint(1,min(len(tagclasses),10));
-   random.shuffle(tagclasses)
-   tcs = tagclasses[0:num_words]
-   writer.append([";".join(tcs)])
-
-   num_words = random.randint(1,min(len(tagclasses),7));
-   random.shuffle(tagclasses)
-   tcs = tagclasses[0:num_words]
-   writer.append([";".join(tcs)])
-
-def serialize_q21(outdir, countries):
-   writer = ParamsWriter(outdir, 21, ["country", "endDate"])
-   for country, count in countries:
-      writer.append([country,str(format_date(END_DATE))])
-
-def serialize_q22(outdir, countries):
-   writer = ParamsWriter(outdir, 22, ["country1", "country2"])
-   for ix in range(0,len(countries)):
-      country_a, count_a = countries[ix]
-      for country_b, count_b in countries[ix+1:]:
-         writer.append([country_a, country_b])
-
-def serialize_q23(outdir, countries):
-   writer = ParamsWriter(outdir, 23, ["country"])
-   for country, count in countries:
-      writer.append([country])
-
-def serialize_q24(outdir, tagclasses):
-   writer = ParamsWriter(outdir, 24, ["tagClass"])
-   for tagclass, count in tagclasses:
-      writer.append([tagclass])
-
-def serialize_q25(outdir, persons, post_month_ranges):
-   writer = ParamsWriter(outdir, 25, ["person1Id", "person2Id", "startDate", "endDate"])
-   for day_range, count_post in post_month_ranges:
-      count = min(len(persons), 10)
-      for _ in range(0, count):
-         person1Id = persons[random.randint(0, len(persons) - 1)]
-         while True:
-            person2Id = persons[random.randint(0, len(persons) - 1)]
-            if person2Id != person1Id:
-               writer.append([str(person1Id), str(person2Id), str(day_range[0]), str(day_range[1])])
-               break
-
-
-def add_months(sourcedate,months):
-   month = sourcedate.month - 1 + months
-   year = int(sourcedate.year + month / 12 )
-   month = month % 12 + 1
-   day = min(sourcedate.day,calendar.monthrange(year,month)[1])
-   return sourcedate.replace(year, month, day)
-
-def convert_posts_histo(histogram):
-   week_posts = []
-   month = 0
-   while (histogram.existParam(month)):
-      monthTotal = histogram.getValue(month, "p")
-      baseDate=add_months(START_DATE,month)
-      week_posts.append([format_date(baseDate), monthTotal/4])
-      week_posts.append([format_date(baseDate+timedelta(days=7)), monthTotal/4])
-      week_posts.append([format_date(baseDate+timedelta(days=14)), monthTotal/4])
-      week_posts.append([format_date(baseDate+timedelta(days=21)), monthTotal/4])
-      month = month + 1
-   return week_posts
+# def serialize_q7(outdir, tags):
+#    writer = ParamsWriter(outdir, 7, ["tag"])
+#    for tag, count in tags:
+#       writer.append([tag])
+#
+# def serialize_q8(outdir, tags):
+#    writer = ParamsWriter(outdir, 8, ["tag"])
+#    for tag, count in tags:
+#       writer.append([tag])
+#
+# def serialize_q9(outdir, tagclasses):
+#    writer = ParamsWriter(outdir, 9, ["tagClass1", "tagClass2", "threshold"])
+#    for ix in range(0,len(tagclasses)):
+#       tag_class_a, count_a = tagclasses[ix]
+#       for tag_class_b, count_b in tagclasses[ix+1:]:
+#          writer.append([tag_class_a, tag_class_b, str(200)])
+#
+# def serialize_q10(outdir, tags, post_weeks):
+#    writer = ParamsWriter(outdir, 10, ["tag", "date"])
+#    for tag, count in tags:
+#       for week, count in post_weeks:
+#          writer.append([tag, str(week)])
+#
+# def serialize_q11(outdir, countries, bad_words):
+#    writer = ParamsWriter(outdir, 11, ["country", "blacklist"])
+#    random.seed(1988+1)
+#    # note: this approach keeps shuffling the bad_words list
+#    for country, count in countries:
+#       num_words = random.randint(1,min(len(bad_words),4));
+#       random.shuffle(bad_words)
+#       blacklist = bad_words[0:num_words]
+#       writer.append([country,";".join(blacklist)])
+#
+#       num_words = random.randint(1,min(len(bad_words),10));
+#       random.shuffle(bad_words)
+#       blacklist = bad_words[0:num_words]
+#       writer.append([country,";".join(blacklist)])
+#
+#       num_words = random.randint(1,min(len(bad_words),7));
+#       random.shuffle(bad_words)
+#       blacklist = bad_words[0:num_words]
+#       writer.append([country,";".join(blacklist)])
+#
+# def serialize_q12(outdir, post_weeks):
+#    writer = ParamsWriter(outdir, 12, ["date", "likeThreshold"])
+#    for week, count in post_weeks:
+#       writer.append([str(week),str(400)])
+#
+# def serialize_q13(outdir, countries):
+#    writer = ParamsWriter(outdir, 13, ["country"])
+#    for country, count in countries:
+#       writer.append([country])
+#
+# def serialize_q14(outdir, creationdates):
+#    writer = ParamsWriter(outdir, 14, ["startDate", "endDate"])
+#    for creation, count in creationdates:
+#       writer.append([str(creation[0]),str(creation[1])])
+#
+# def serialize_q15(outdir, countries):
+#    writer = ParamsWriter(outdir, 15, ["country"])
+#    for country, count in countries:
+#       writer.append([country])
+#
+# def serialize_q16(outdir, persons, tagclasses, countries, path_bounds):
+#    writer = ParamsWriter(outdir, 16, ["person", "country", "tagClass", "minPathDistance", "maxPathDistance"])
+#    random.seed(1988+2)
+#    for country, count_b in countries:
+#       for tagClass, count_a in tagclasses:
+#          for minDist, maxDist in path_bounds:
+#             writer.append([str(persons[random.randint(0, len(persons))]), country, tagClass, str(minDist), str(maxDist)])
+#
+# def serialize_q17(outdir, countries):
+#    writer = ParamsWriter(outdir, 17, ["country"])
+#    for country, count in countries:
+#       writer.append([country])
+#
+# def serialize_q18(outdir, post_weeks, lengths, languages):
+#    writer = ParamsWriter(outdir, 18, ["date", "lengthThreshold", "languages"])
+#    for week, count in post_weeks:
+#       for length in lengths:
+#          for language_set in languages:
+#             writer.append([str(week), str(length), ";".join(language_set)])
+#
+# def serialize_q19(outdir, tagclasses):
+#    PERS_DATE=datetime.strptime("1989-1-1", "%Y-%m-%d")
+#    writer = ParamsWriter(outdir, 19, ["date", "tagClass1", "tagClass2"])
+#    for ix in range(0,len(tagclasses)):
+#       tag_class_a, count_a = tagclasses[ix]
+#       for tag_class_b, count_b in tagclasses[ix+1:]:
+#          writer.append([str(format_date(PERS_DATE)),tag_class_a, tag_class_b])
+#
+# def serialize_q20(outdir, tagclasses):
+#    random.seed(1988+3)
+#    writer = ParamsWriter(outdir, 20, ["tagClasses"])
+#
+#    tagclasses = [tc[0] for tc in tagclasses]
+#
+#    # I'm not sure this is the correct way to approach this problem,
+#    # but it should work reasonably well
+#    num_words = random.randint(1,min(len(tagclasses),4));
+#    random.shuffle(tagclasses)
+#    tcs = tagclasses[0:num_words]
+#    writer.append([";".join(tcs)])
+#
+#    num_words = random.randint(1,min(len(tagclasses),10));
+#    random.shuffle(tagclasses)
+#    tcs = tagclasses[0:num_words]
+#    writer.append([";".join(tcs)])
+#
+#    num_words = random.randint(1,min(len(tagclasses),7));
+#    random.shuffle(tagclasses)
+#    tcs = tagclasses[0:num_words]
+#    writer.append([";".join(tcs)])
+#
+# def serialize_q21(outdir, countries):
+#    writer = ParamsWriter(outdir, 21, ["country", "endDate"])
+#    for country, count in countries:
+#       writer.append([country,str(format_date(END_DATE))])
+#
+# def serialize_q22(outdir, countries):
+#    writer = ParamsWriter(outdir, 22, ["country1", "country2"])
+#    for ix in range(0,len(countries)):
+#       country_a, count_a = countries[ix]
+#       for country_b, count_b in countries[ix+1:]:
+#          writer.append([country_a, country_b])
+#
+# def serialize_q23(outdir, countries):
+#    writer = ParamsWriter(outdir, 23, ["country"])
+#    for country, count in countries:
+#       writer.append([country])
+#
+# def serialize_q24(outdir, tagclasses):
+#    writer = ParamsWriter(outdir, 24, ["tagClass"])
+#    for tagclass, count in tagclasses:
+#       writer.append([tagclass])
+#
+# def serialize_q25(outdir, persons, post_month_ranges):
+#    writer = ParamsWriter(outdir, 25, ["person1Id", "person2Id", "startDate", "endDate"])
+#    for day_range, count_post in post_month_ranges:
+#       count = min(len(persons), 10)
+#       for _ in range(0, count):
+#          person1Id = persons[random.randint(0, len(persons) - 1)]
+#          while True:
+#             person2Id = persons[random.randint(0, len(persons) - 1)]
+#             if person2Id != person1Id:
+#                writer.append([str(person1Id), str(person2Id), str(day_range[0]), str(day_range[1])])
+#                break
+
+
+# def add_months(sourcedate,months):
+#    month = sourcedate.month - 1 + months
+#    year = int(sourcedate.year + month / 12 )
+#    month = month % 12 + 1
+#    day = min(sourcedate.day,calendar.monthrange(year,month)[1])
+#    return sourcedate.replace(year, month, day)
+
+# def convert_posts_histo(histogram):
+#    week_posts = []
+#    month = 0
+#    while (histogram.existParam(month)):
+#       monthTotal = histogram.getValue(month, "p")
+#       baseDate=add_months(START_DATE,month)
+#       week_posts.append([format_date(baseDate), monthTotal/4])
+#       week_posts.append([format_date(baseDate+timedelta(days=7)), monthTotal/4])
+#       week_posts.append([format_date(baseDate+timedelta(days=14)), monthTotal/4])
+#       week_posts.append([format_date(baseDate+timedelta(days=21)), monthTotal/4])
+#       month = month + 1
+#    return week_posts
 
 def main(argv=None):
    if argv is None:
@@ -325,24 +325,24 @@ def main(argv=None):
       if file.startswith("m0friendList"):
          friendsFiles.append(indir+file)
 
-   # read precomputed counts from files   
+   # read precomputed counts from files
    (personFactors, countryFactors, tagFactors, tagClassFactors, nameFactors, givenNames,  ts, postsHisto) = \
       readfactors.load(personFactorFiles,activityFactorFiles, friendsFiles)
-   week_posts = convert_posts_histo(postsHisto)
-
-   persons = []
-   for key, _ in personFactors.values.iteritems():
-      persons.append(key)
-   random.seed(1988)
-   random.shuffle(persons)
-
-   country_sample = []
-   for key, value in countryFactors.values.iteritems():
-      country_sample.append([key, value.getValue("p")])
-   country_sample.sort(key=lambda x: x[1], reverse=True)
-
-   tagclass_posts = tagClassFactors
-   tagclass_posts.sort(key=lambda x: x[1], reverse=True)
+   # week_posts = convert_posts_histo(postsHisto)
+
+   # persons = []
+   # for key, _ in personFactors.values.iteritems():
+   #    persons.append(key)
+   # random.seed(1988)
+   # random.shuffle(persons)
+   #
+   # country_sample = []
+   # for key, value in countryFactors.values.iteritems():
+   #    country_sample.append([key, value.getValue("p")])
+   # country_sample.sort(key=lambda x: x[1], reverse=True)
+
+   # tagclass_posts = tagClassFactors
+   # tagclass_posts.sort(key=lambda x: x[1], reverse=True)
 
    tag_posts = tagFactors
    tag_posts.sort(key=lambda x: x[1], reverse=True)
@@ -351,60 +351,60 @@ def main(argv=None):
    for day, count in tag_posts:
       total_posts += count
 
-   person_sum = 0
-   for country, count in country_sample:
-      person_sum += count
-
-   post_lower_threshold = 0.1*total_posts*0.9
-   post_upper_threshold = 0.1*total_posts*1.1
-   post_day_ranges = post_date_range_params(week_posts, post_lower_threshold, post_upper_threshold)
+   # person_sum = 0
+   # for country, count in country_sample:
+   #    person_sum += count
+   #
+   # post_lower_threshold = 0.1*total_posts*0.9
+   # post_upper_threshold = 0.1*total_posts*1.1
+   # post_day_ranges = post_date_range_params(week_posts, post_lower_threshold, post_upper_threshold)
    
-   bad_words = ['Augustine','William','James','with','Henry','Robert','from','Pope','Hippo','album','David','has','one','also','Green','which','that']
-   #post_lower_threshold = (total_posts/(week_posts[len(week_posts)-1][0]/7/4))*0.8
-   #post_upper_threshold = (total_posts/(week_posts[len(week_posts)-1][0]/7/4))*1.2
-   non_empty_weeks=len(week_posts)
-   for ix in range(0,len(week_posts)):
-      if week_posts[ix][1]==0:
-         non_empty_weeks-= 1
-
-   post_lower_threshold = (total_posts/(non_empty_weeks/4))*0.8
-   post_upper_threshold = (total_posts/(non_empty_weeks/4))*1.2
-   post_months = post_month_params(week_posts, post_lower_threshold, post_upper_threshold)
-
-   # the lower bound is inclusive and the upper bound is exclusive
-   path_bounds = enumerate_path_bounds(3, 6, 2)
-   language_codes = prob_language_codes()
-   post_lengths = prob_post_lengths()
-
-   serialize_q2 (outdir, key_params(country_sample, total_posts/200, total_posts/100), post_day_ranges) # TODO determine constants
-   serialize_q3 (outdir, post_months)
-   serialize_q14(outdir, post_months)
-
-   serialize_q1 (outdir, post_date_right_open_range_params(week_posts, 0.3*total_posts, 0.6*total_posts))
-   serialize_q12(outdir, post_date_right_open_range_params(week_posts, 0.3*total_posts, 0.6*total_posts))
-   serialize_q18(outdir, post_date_right_open_range_params(week_posts, 0.3*total_posts, 0.6*total_posts), post_lengths, language_codes)
-   serialize_q10(outdir, key_params(tag_posts, total_posts/900, total_posts/600), post_date_right_open_range_params(week_posts, 0.3*total_posts, 0.6*total_posts))
-
-   serialize_q4 (outdir, key_params(tagclass_posts, total_posts/20, total_posts/10), key_params(country_sample, total_posts/150, total_posts/50))
-   serialize_q5 (outdir, key_params(country_sample, total_posts/200, total_posts/100))
+   # bad_words = ['Augustine','William','James','with','Henry','Robert','from','Pope','Hippo','album','David','has','one','also','Green','which','that']
+   # #post_lower_threshold = (total_posts/(week_posts[len(week_posts)-1][0]/7/4))*0.8
+   # #post_upper_threshold = (total_posts/(week_posts[len(week_posts)-1][0]/7/4))*1.2
+   # non_empty_weeks=len(week_posts)
+   # for ix in range(0,len(week_posts)):
+   #    if week_posts[ix][1]==0:
+   #       non_empty_weeks-= 1
+   #
+   # post_lower_threshold = (total_posts/(non_empty_weeks/4))*0.8
+   # post_upper_threshold = (total_posts/(non_empty_weeks/4))*1.2
+   # post_months = post_month_params(week_posts, post_lower_threshold, post_upper_threshold)
+   #
+   # # the lower bound is inclusive and the upper bound is exclusive
+   # path_bounds = enumerate_path_bounds(3, 6, 2)
+   # language_codes = prob_language_codes()
+   # post_lengths = prob_post_lengths()
+   #
+   # serialize_q2 (outdir, key_params(country_sample, total_posts/200, total_posts/100), post_day_ranges) # TODO determine constants
+   # serialize_q3 (outdir, post_months)
+   # serialize_q14(outdir, post_months)
+   #
+   # serialize_q1 (outdir, post_date_right_open_range_params(week_posts, 0.3*total_posts, 0.6*total_posts))
+   # serialize_q12(outdir, post_date_right_open_range_params(week_posts, 0.3*total_posts, 0.6*total_posts))
+   # serialize_q18(outdir, post_date_right_open_range_params(week_posts, 0.3*total_posts, 0.6*total_posts), post_lengths, language_codes)
+   # serialize_q10(outdir, key_params(tag_posts, total_posts/900, total_posts/600), post_date_right_open_range_params(week_posts, 0.3*total_posts, 0.6*total_posts))
+   #
+   # serialize_q4 (outdir, key_params(tagclass_posts, total_posts/20, total_posts/10), key_params(country_sample, total_posts/150, total_posts/50))
+   # serialize_q5 (outdir, key_params(country_sample, total_posts/200, total_posts/100))
    serialize_q6 (outdir, key_params(tag_posts, total_posts/1300, total_posts/900))
-   serialize_q7 (outdir, key_params(tag_posts, total_posts/900, total_posts/600))
-   serialize_q8 (outdir, key_params(tag_posts, total_posts/600, total_posts/300))
-   serialize_q9 (outdir, key_params(tagclass_posts, 6000, 25000))
-   serialize_q13(outdir, key_params(country_sample, total_posts/200, total_posts/100))
-   serialize_q15(outdir, key_params(country_sample, total_posts/200, total_posts/100))
-   serialize_q16(outdir, persons, key_params(tagclass_posts, total_posts/30, total_posts/10), key_params(country_sample, total_posts/80, total_posts/20), path_bounds)
-   serialize_q17(outdir, key_params(country_sample, total_posts/200, total_posts/100))
-   serialize_q19(outdir, key_params(tagclass_posts, total_posts/60, total_posts/10))
-   serialize_q21(outdir, key_params(country_sample, total_posts/200, total_posts/100))
-   serialize_q22(outdir, key_params(country_sample, total_posts/120, total_posts/40))
-   serialize_q23(outdir, key_params(country_sample, total_posts/200, total_posts/100))
-   serialize_q24(outdir, key_params(tagclass_posts, total_posts/140, total_posts/5))
-   serialize_q25(outdir, persons, post_months)
-
-   # TODO: Refine
-   serialize_q20(outdir, key_params(tagclass_posts, total_posts/20, total_posts/2))
-   serialize_q11(outdir, key_params(country_sample, total_posts/80, total_posts/20), bad_words)
-
-if __name__ == "__main__":
-   sys.exit(main())
+   # serialize_q7 (outdir, key_params(tag_posts, total_posts/900, total_posts/600))
+   # serialize_q8 (outdir, key_params(tag_posts, total_posts/600, total_posts/300))
+   # serialize_q9 (outdir, key_params(tagclass_posts, 6000, 25000))
+   # serialize_q13(outdir, key_params(country_sample, total_posts/200, total_posts/100))
+   # serialize_q15(outdir, key_params(country_sample, total_posts/200, total_posts/100))
+   # serialize_q16(outdir, persons, key_params(tagclass_posts, total_posts/30, total_posts/10), key_params(country_sample, total_posts/80, total_posts/20), path_bounds)
+   # serialize_q17(outdir, key_params(country_sample, total_posts/200, total_posts/100))
+   # serialize_q19(outdir, key_params(tagclass_posts, total_posts/60, total_posts/10))
+   # serialize_q21(outdir, key_params(country_sample, total_posts/200, total_posts/100))
+   # serialize_q22(outdir, key_params(country_sample, total_posts/120, total_posts/40))
+   # serialize_q23(outdir, key_params(country_sample, total_posts/200, total_posts/100))
+   # serialize_q24(outdir, key_params(tagclass_posts, total_posts/140, total_posts/5))
+   # serialize_q25(outdir, persons, post_months)
+   #
+   # # TODO: Refine
+   # serialize_q20(outdir, key_params(tagclass_posts, total_posts/20, total_posts/2))
+   # serialize_q11(outdir, key_params(country_sample, total_posts/80, total_posts/20), bad_words)
+
+#if __name__ == "__main__":
+#   sys.exit(main())
diff --git a/paramgenerator/readfactors.py b/paramgenerator/readfactors.py
index 6427a072f..ee0c2df7d 100755
--- a/paramgenerator/readfactors.py
+++ b/paramgenerator/readfactors.py
@@ -67,26 +67,6 @@ def load(personFactorFiles,activityFactorFiles, friendFiles):
 	names = {}
 	timestamp = [0,0,0,0]
 
-	for inputfileName in personFactorFiles:
-		with codecs.open(inputfileName, "r", "utf-8") as f:
-			for line in f.readlines():
-				line = line.split(",")
-				person = int(line[0])
-				if not results.existParam(person):
-					results.addNewParam(person)
-				name = line[1]
-				givenNames.setValue(person, name)
-				results.addValue(person, "f", int(line[2]))
-				results.addValue(person, "p", int(line[3]))
-				results.addValue(person, "pl", int(line[4]))
-				results.addValue(person, "pt", int(line[5]))
-				results.addValue(person, "g", int(line[6]))
-				results.addValue(person, "w", int(line[7]))
-				results.addValue(person, "pr", int(line[8]))
-				for i in range((len(line)-9)/2):
-					if not postsHisto.existParam(i):
-						postsHisto.addNewParam(i)
-					postsHisto.addValue(i, "p", int(line[9+i]))
 
 	for inputFileName in activityFactorFiles:
 		with codecs.open(inputFileName, "r", "utf-8") as f:
@@ -98,13 +78,13 @@ def load(personFactorFiles,activityFactorFiles, friendFiles):
 					countries.addNewParam(country)
 				countries.addValue(country, "p", int(line[1]))
 
-			tagCount = int(f.readline())
-			for i in range(tagCount):
+			tagClassCount = int(f.readline())
+			for i in range(tagClassCount):
 				line = f.readline().split(",")
-				tag = line[0]
-				if not tag in tagClasses:
-					tagClasses[tag] = 0
-				tagClasses[tag] += int(line[2])
+				tagClass = line[0]
+				if not tagClass in tagClasses:
+					tagClasses[tagClass] = 0
+				tagClasses[tagClass] += int(line[2])
 
 			tagCount = int(f.readline())
 			for i in range(tagCount):
@@ -178,48 +158,48 @@ def getColumns(factors, columnNames):
 	return res
 
 
-def getFactorsForQuery(queryId, factors):
-
-	queryFactorDict = {
-		1: getColumns(factors, ["f", "ff"]),
-		2: getColumns(factors, [ "f", "fp"]),
-		3: getColumns(factors, ["ff", "ffp"]),
-		4: getColumns(factors, ["fp", "f",  "fpt"]),
-		5: getColumns(factors, ["ff", "ffg"]),	
-		6: getColumns(factors, ["f","ff", "ffp", "ffpt"]),
-		7: getColumns(factors, ["pl", "p"]),
-		8: getColumns(factors, ["pr","p"]), ### add "pr"
-		9: getColumns(factors, ["f", "ffp", "ff"]),
-		10: getColumns(factors, ["f","ff", "ffp", "ffpt"]),
-		11: getColumns(factors, ["f","ff", "ffw"]),
-		12: getColumns(factors, ["f", "fp"]), ### add "fpr"
-		13: getColumns(factors, ["ff"]),
-		14: getColumns(factors, ["ff"])
-	}
-
-	return queryFactorDict[queryId]
-
-def getCountryFactorsForQuery(queryId, factors):
-	queryFactorDict = {
-		3: getColumns(factors, ["p"]),
-		11: getColumns(factors, ["p"]) ### replace with "org"
-	}
-
-	return queryFactorDict[queryId]
-
-def getTagFactorsForQuery(queryId, factors):
-	queryFactorDict = {
-		6: getColumns(factors, ["p"]),
-	}
-
-	return queryFactorDict[queryId]
-
-if __name__ == "__main__":
-	argv = sys.argv
-	if len(argv)< 3:
-		print "arguments: <m0factors.txt> <m0friendsList.txt>"
-		sys.exit(1)
-
-	sys.exit(load(argv[1], argv[2]))
-
-
+# def getFactorsForQuery(queryId, factors):
+#
+# 	queryFactorDict = {
+# 		1: getColumns(factors, ["f", "ff"]),
+# 		2: getColumns(factors, [ "f", "fp"]),
+# 		3: getColumns(factors, ["ff", "ffp"]),
+# 		4: getColumns(factors, ["fp", "f",  "fpt"]),
+# 		5: getColumns(factors, ["ff", "ffg"]),
+# 		6: getColumns(factors, ["f","ff", "ffp", "ffpt"]),
+# 		7: getColumns(factors, ["pl", "p"]),
+# 		8: getColumns(factors, ["pr","p"]), ### add "pr"
+# 		9: getColumns(factors, ["f", "ffp", "ff"]),
+# 		10: getColumns(factors, ["f","ff", "ffp", "ffpt"]),
+# 		11: getColumns(factors, ["f","ff", "ffw"]),
+# 		12: getColumns(factors, ["f", "fp"]), ### add "fpr"
+# 		13: getColumns(factors, ["ff"]),
+# 		14: getColumns(factors, ["ff"])
+# 	}
+#
+# 	return queryFactorDict[queryId]
+#
+# def getCountryFactorsForQuery(queryId, factors):
+# 	queryFactorDict = {
+# 		3: getColumns(factors, ["p"]),
+# 		11: getColumns(factors, ["p"]) ### replace with "org"
+# 	}
+#
+# 	return queryFactorDict[queryId]
+#
+# def getTagFactorsForQuery(queryId, factors):
+# 	queryFactorDict = {
+# 		6: getColumns(factors, ["p"]),
+# 	}
+#
+# 	return queryFactorDict[queryId]
+#
+# if __name__ == "__main__":
+# 	argv = sys.argv
+# 	if len(argv)< 3:
+# 		print "arguments: <m0factors.txt> <m0friendsList.txt>"
+# 		sys.exit(1)
+#
+# 	sys.exit(load(argv[1], argv[2]))
+#
+#
diff --git a/substitution_out/.gitignore b/substitution_out/.gitignore
new file mode 100644
index 000000000..d6b7ef32c
--- /dev/null
+++ b/substitution_out/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore