diff --git a/apprecommender/data_collect/collect_data.sh b/apprecommender/data_collect/collect_data.sh deleted file mode 100755 index 87f5d31..0000000 --- a/apprecommender/data_collect/collect_data.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash - -echo "Para executar o AppRecommender as seguintes dependencias serao instaladas:" -echo "" -echo "python python-xapian python-apt python-cluster python-webpy" -echo "python-simplejson python-numpy apt-xapian-index python-xdg debtags" -echo "python-pip python-sklearn python-matplotlib python-stemmer" -echo "" -echo "Apos a instalação das dependencias os pacotes serao indexados ao xapian, que é o banco de dados utilizado pelo AppRecommender" -echo "" - -cd bin/data_collect/ -./install_dependencies.sh -cd - - -cd bin/ -echo "" -echo "Agora os dados do AppRecommender serao inicializados" -./apprec.py --init - -cd data_collect/ -echo "" -echo "Iniciando a coleta de dados" -./collect_user_data.py - -echo "" -echo "Desinstalando as dependencias do AppRecommender" -./remove_dependencies.sh - -echo "" -echo "" -echo "" -echo "Compacte o arquivo de log que está na home" -echo "o nome do arquivo comeca com 'app_recommender_log'" -echo "$ cd ~" -echo "$ tar -zcvf [nome_da_pasta].tar.gz [nome_da_pasta]" -echo "" -echo "Envie o arquivo compactado para um dos seguintes emails:" -echo "lucianopcbr@gmail.com" -echo "lucas.moura128@gmail.com" -echo "" -echo "Como titulo do e-mail utilize 'coleta de dados'" -echo "" -echo "" -echo "Obrigado por colaborar com nosso trabalho" -echo "" -echo "Att," -echo "Lucas Moura e Luciano Prestes" -echo "" diff --git a/apprecommender/data_collect/install_dependencies.sh b/apprecommender/data_collect/install_dependencies.sh deleted file mode 100755 index 0eb0c6b..0000000 --- a/apprecommender/data_collect/install_dependencies.sh +++ /dev/null @@ -1,3 +0,0 @@ -sudo apt-get install python-xapian python-cluster python-simplejson python-numpy apt-xapian-index debtags -y - -sudo update-apt-xapian-index diff --git a/apprecommender/data_collect/load_cross_validations.py b/apprecommender/data_collect/load_cross_validations.py deleted file mode 100755 index ca63d59..0000000 --- a/apprecommender/data_collect/load_cross_validations.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python - -import commands - -from load_data import get_folder_path, get_all_folders_path - - -def get_cross_validations_path(folders_path): - files = [] - for folder_path in folders_path: - all_files = commands.getoutput( - "ls {}".format(folder_path)).splitlines() - files += [folder_path + f for f in all_files - if f.startswith('cross_validation_result')] - - return files - - -def get_metrics_values(files_path): - metrics_values = {'S_Accuracy': [], 'Precision': [], 'Recall': [], - 'FPR': [], 'F(1.0)': []} - - for file_path in files_path: - with open(file_path, 'rb') as text: - lines = [line.strip() for line in text] - for line in lines: - line_split = line.split(':') - metric = line_split[0].strip() - - if metric in metrics_values.keys() and len(line_split[1]) > 0: - value = float(line_split[1]) - metrics_values[metric].append(value) - - return metrics_values - - -def convert_to_csv(metrics_values): - rows = [] - metrics = ';'.join(metrics_values.keys()) - rows.append(metrics) - - for index in range(len(metrics_values.values()[0])): - row = [] - for metric in metrics_values.keys(): - row.append(metrics_values[metric][index]) - - row = ';'.join(str(element) for element in row) - rows.append(row) - - return rows - - -def main(): - folder_path = get_folder_path() - all_folders_path = get_all_folders_path(folder_path) - files_path = get_cross_validations_path(all_folders_path) - metrics_values = get_metrics_values(files_path) - csv_rows = convert_to_csv(metrics_values) - - for row in csv_rows: - print row - - -if __name__ == '__main__': - main() diff --git a/apprecommender/data_collect/load_data.py b/apprecommender/data_collect/load_data.py deleted file mode 100644 index e55ea3e..0000000 --- a/apprecommender/data_collect/load_data.py +++ /dev/null @@ -1,59 +0,0 @@ -import commands -import os -import sys - - -def get_folder_path(): - usage_message = "Usage: {} [folder_path]".format(sys.argv[0]) - - if len(sys.argv) < 2: - print usage_message - exit(1) - - folder_path = sys.argv[1] - folder_path = os.path.expanduser(folder_path) - if not folder_path.endswith('/'): - folder_path += '/' - - if not os.path.exists(folder_path): - print usage_message - print "Folder do not exist" - exit(1) - - return folder_path - - -def get_all_folders_path(folder_path): - folders_path = commands.getoutput("ls {}".format(folder_path)).splitlines() - folders_path = [folder for folder in folders_path - if folder.startswith('app_recommender_log')] - folders_path = ["{}{}/".format(folder_path, folder) - for folder in folders_path] - - return folders_path - - -def get_csv_file_path(): - usage_message = "Usage: {} [csv_file_path]".format(sys.argv[0]) - - if len(sys.argv) < 2: - print usage_message - exit(1) - - csv_file_path = sys.argv[1] - csv_file_path = os.path.expanduser(csv_file_path) - - if not os.path.exists(csv_file_path): - print usage_message - print "CSV file not exists" - exit(1) - - return csv_file_path - - -def get_lines_from_csv_file(csv_file_path): - with open(csv_file_path, 'rb') as text: - lines = [line.strip() for line in text] - lines = [line.split(';') for line in lines] - - return lines diff --git a/apprecommender/data_collect/load_user_preferences.py b/apprecommender/data_collect/load_user_preferences.py deleted file mode 100755 index a584d43..0000000 --- a/apprecommender/data_collect/load_user_preferences.py +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env python - -import commands - -from load_data import get_folder_path, get_all_folders_path - - -def load_user_preferences(folder_path): - preferences_file = "{}user_preferences.txt".format(folder_path) - - user_preferences = {} - with open(preferences_file, 'rb') as text: - lines = [line.strip() for line in text] - user_preferences = dict([(line.split(':')[0], int(line.split(':')[1])) - for line in lines]) - - return user_preferences - - -def load_strategies(folder_path): - all_files = commands.getoutput("ls {}".format(folder_path)).splitlines() - files = [f for f in all_files if f.endswith('recommendation.txt')] - - strategies = {} - strategy_names = [f.split('_')[0] for f in files] - for strategy in strategy_names: - strategy_file = "{}{}_{}".format(folder_path, strategy, - 'recommendation.txt') - with open(strategy_file, 'rb') as text: - strategies[strategy] = [line.strip() for line in text] - - return strategies - - -def load_pc_informations(folder_path): - all_files = commands.getoutput("ls {}".format(folder_path)).splitlines() - files = [f for f in all_files if f.endswith('informations.txt')] - - informations = {} - pc_informations_file = '{}{}'.format(folder_path, files[0]) - valid_info = set(['distributor_id', 'codename']) - - with open(pc_informations_file, 'rb') as text: - for line in text: - if ':' not in line: - continue - - info = line.split(':') - info[0] = info[0].lower().replace(' ', '_') - - if info[0] in valid_info: - informations[info[0]] = info[1].strip() - - return informations - - -def get_strategies_score(strategies, user_preferences): - classifications = {1: 'bad', 2: 'redundant', 3: 'useful', - 4: 'useful_surprise'} - - strategies_score = {} - for strategy, pkgs in strategies.iteritems(): - strategies_score[strategy] = {'bad': 0, 'redundant': 0, 'useful': 0, - 'useful_surprise': 0} - - for pkg in pkgs: - classification = classifications[user_preferences[pkg]] - strategies_score[strategy][classification] += 1 - - return strategies_score - - -def print_strategies_score(strategies_score): - classifications = ['bad', 'redundant', 'useful', 'useful_surprise'] - - for strategy, score in strategies_score.iteritems(): - print "\nStrategy: {}".format(strategy) - - for classification in classifications: - print " {}: {}".format(classification, score[classification]) - print '\n' - - -def get_all_strategies_score(all_folders_path): - all_strategies_score = [] - for folder_path in all_folders_path: - strategies = load_strategies(folder_path) - user_preferences = load_user_preferences(folder_path) - strategies_score = get_strategies_score(strategies, user_preferences) - - all_strategies_score.append(strategies_score) - - return all_strategies_score - - -def get_all_pc_informations(all_folders_path): - all_pc_informations = [] - - for folder_path in all_folders_path: - pc_information = load_pc_informations(folder_path) - all_pc_informations.append(pc_information) - - return all_pc_informations - - -def convert_to_csv(all_strategies_score, all_pc_informations): - rows = [] - possible_strategies = sorted(all_strategies_score[0].keys()) - pc_info_header = sorted(all_pc_informations[0].keys()) - classifications = ['bad', 'redundant', 'useful', 'useful_surprise'] - - csv_header = "" - - for strategy in possible_strategies: - for classification in classifications: - csv_header += '{}_{};'.format(strategy, classification) - - for info in pc_info_header: - csv_header += '{};'.format(info) - - rows.append(csv_header[:-1]) - - for strategies_score in all_strategies_score: - row = [] - - for strategy, scores in sorted(strategies_score.items()): - for classification in classifications: - row.append(scores[classification]) - - row = ';'.join(str(element) for element in row) - rows.append(row) - - index = 1 - for pc_informations, row in zip(all_pc_informations, rows[1:]): - distributor_id = pc_informations['distributor_id'] - codename = pc_informations['codename'] - row = row + ';{};{}'.format(codename, distributor_id) - - rows[index] = row - index += 1 - - return rows - - -def main(): - folder_path = get_folder_path() - all_folders_path = get_all_folders_path(folder_path) - all_strategies_score = get_all_strategies_score(all_folders_path) - all_pc_informations = get_all_pc_informations(all_folders_path) - - csv_rows = convert_to_csv(all_strategies_score, all_pc_informations) - for row in csv_rows: - print row - - -if __name__ == '__main__': - main() diff --git a/apprecommender/data_collect/plot_cross_validations.py b/apprecommender/data_collect/plot_cross_validations.py deleted file mode 100755 index e9dcc56..0000000 --- a/apprecommender/data_collect/plot_cross_validations.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python - -import matplotlib.pyplot as plt -import numpy as np - -from load_data import get_csv_file_path, get_lines_from_csv_file - - -def plot_cross_validation_averages(metrics_values): - values_plot = [] - metrics_plot = [] - for metric, values in metrics_values.iteritems(): - metrics_plot.append(metric) - values_plot.append(values) - - fig = plt.figure() - width = .35 - ind = np.arange(len(values_plot)) - plt.bar(ind, values_plot, width=width) - plt.xticks(ind + width / 2, metrics_plot) - plt.yticks(np.arange(0.0, 1.1, 0.1)) - - for a, b in zip(ind, values_plot): - plt.text(a + 0.17, b + 0.02, str(b)[0:5], ha='center') - - fig.autofmt_xdate() - - plt.show() - - -def load_csv_file(csv_file_path): - lines = get_lines_from_csv_file(csv_file_path) - - metrics = [metric for metric in lines[0]] - values = [map(float, line) for line in lines[1:]] - - return values, metrics - - -def get_metrics_values(values, metrics): - metrics_values = dict((metric, 0) for metric in metrics) - - for i in range(len(values)): - for j in range(len(values[i])): - metrics_values[metrics[j]] += values[i][j] - - for metric in metrics: - metrics_values[metric] /= len(values) - - return metrics_values - - -def main(): - csv_file_path = get_csv_file_path() - values, metrics = load_csv_file(csv_file_path) - metrics_values = get_metrics_values(values, metrics) - - plot_cross_validation_averages(metrics_values) - - -if __name__ == '__main__': - main() diff --git a/apprecommender/data_collect/plot_user_preferences.py b/apprecommender/data_collect/plot_user_preferences.py deleted file mode 100755 index ff246c4..0000000 --- a/apprecommender/data_collect/plot_user_preferences.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python - -import matplotlib.pyplot as plt -import numpy as np - -from load_data import get_csv_file_path, get_lines_from_csv_file - -STRATEGIES = ['cbh', 'cbml', 'cbtm'] -CLASSIFICATIONS = ['Bad', 'Redundant', 'Useful', 'Useful Surprise'] - - -def autolabel(ax, rects, string_format): - for rect in rects: - height = rect.get_height() - ax.text(rect.get_x() + rect.get_width() / 2., - 1.02 * height, string_format % height, - ha='center', va='bottom') - - -def plot_strategies_score(strategies_score, classifications, title, ylabel, - plot_min, plot_max, plot_step, string_format='%d'): - colors = ['red', 'orange', 'yellow', 'green'] - - groups_number = len(strategies_score) - ind = np.arange(groups_number) - width = 0.2 - - rects = [] - fig, ax = plt.subplots() - for index, classification in enumerate(classifications): - values = [] - for _, score in strategies_score.iteritems(): - values.append(score[index]) - rects.append(ax.bar(ind + (width * index), values, width, - color=colors[index])) - - ax.set_ylabel(ylabel) - ax.set_title(title) - ax.set_xticks(ind + width) - ax.set_xticklabels(strategies_score.keys()) - - ax.legend([r[0] for r in rects], classifications) - - for rect in rects: - autolabel(ax, rect, string_format) - - plt.yticks(np.arange(plot_min, plot_max, plot_step)) - plt.show() - - -def load_csv_file(csv_file_path): - lines = get_lines_from_csv_file(csv_file_path) - - scores = [] - for line in lines[1:]: - begin = 0 - for strategy in STRATEGIES: - score = [strategy] + map(int, line[begin: begin + 4]) - begin += 4 - scores.append(score) - - return scores, CLASSIFICATIONS - - -def get_sum_of_scores(scores): - sum_scores = {} - - for score in scores: - classification = score[0] - if classification not in sum_scores.keys(): - sum_scores[classification] = [0] * (len(score) - 1) - - for index, value in enumerate(score[1:]): - sum_scores[classification][index] += value - - return sum_scores - - -def main(): - csv_file_path = get_csv_file_path() - scores, classifications = load_csv_file(csv_file_path) - sum_scores = get_sum_of_scores(scores) - - plot_strategies_score(sum_scores, classifications, - 'Amount by classification', 'Amount', 0.0, 55.0, - 5.0) - - -if __name__ == '__main__': - main()