Skip to content

Commit

Permalink
Merge branch 'add_because_pkgs' into 'master'
Browse files Browse the repository at this point in the history
Add because pkgs



See merge request !14
  • Loading branch information
LucianoPC committed Jun 27, 2016
2 parents 42ec558 + 008f416 commit 9a01816
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 85 deletions.
7 changes: 5 additions & 2 deletions apprecommender/app_recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,26 @@

from apprecommender.recommender import Recommender
from apprecommender.user import LocalSystem
from apprecommender.config import Config


class AppRecommender:
def __init__(self):
self.recommender = Recommender()
self.config = Config()

def make_recommendation(self, recommendation_size):
def make_recommendation(self):
begin_time = datetime.datetime.now()
logging.info("Computation started at %s" % begin_time)
# user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir,
# "desktopapps"))
user = LocalSystem()
recommendation_size = Config().num_recommendations
user_reccomendation = (self.recommender.get_recommendation(
user, recommendation_size))

logging.info("Recommending applications for user %s" % user.user_id)
logging.info(user_reccomendation)
print (user_reccomendation)

end_time = datetime.datetime.now()
logging.info("Computation completed at %s" % end_time)
Expand Down
2 changes: 2 additions & 0 deletions apprecommender/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ def __init__(self):
self.bm25_nl = 0.5
# user content profile size
self.profile_size = 10
self.num_recommendations = 8
self.because = False
# neighborhood size
self.k_neighbors = 50
# popcon profiling method: full, voted
Expand Down
63 changes: 20 additions & 43 deletions apprecommender/load_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,67 +15,40 @@ def __init__(self):

def load(self):
config = Config()
short_options = "hdvo:f:b:a:e:p:m:u:l:c:x:w:s:z:r:n:idvo:tdvo"
long_options = ["help", "debug", "verbose", "output=", "filtersdir=",
"pkgsfilter=", "axi=", "dde=", "popconindex=",
"popcondir=", "indexmode=", "clustersdir=",
"kmedoids=", "maxpopcon=", "weight=", "strategy=",
"profile_size=", "profiling=", "neighbors=", "init",
"train"]
short_options = 'hdvo:f:b:a:e:p:m:u:l:c:x:w:s:z:r:n:idvo:tdvo'
long_options = ['help', 'debug', 'verbose', 'kmedoids=', 'maxpopcon=',
'weight=', 'strategy=', 'profile_size=', 'init',
'train', 'because', 'nrecommendation']
try:
opts, args = getopt.getopt(sys.argv[1:], short_options,
long_options)
self.options = opts
except getopt.GetoptError as error:
config.set_logger()
logging.error("Bad syntax: %s" % str(error))
logging.error('Bad syntax: {}'.format(str(error)))
self.usage()
sys.exit()

for o, p in opts:
if o in ("-h", "--help"):
if o in ('-h', '--help'):
self.usage()
sys.exit()
elif o in ("-d", "--debug"):
elif o in ('-d', '--debug'):
config.debug = 1
elif o in ("-v", "--verbose"):
elif o in ('-v', '--verbose'):
config.verbose = 1
elif o in ("-o", "--output"):
config.output = p
elif o in ("-f", "--filtersdir"):
config.filters_dir = p
elif o in ("-b", "--pkgsfilter"):
config.pkgs_filter = p
elif o in ("-a", "--axi"):
config.axi = p
elif o in ("-e", "--dde"):
config.dde_url = p
elif o in ("-p", "--popconindex"):
config.popcon_index = p
elif o in ("-m", "--popcondir"):
config.popcon_dir = p
elif o in ("-u", "--index_mode"):
config.index_mode = p
elif o in ("-l", "--clustersdir"):
config.clusters_dir = p
elif o in ("-c", "--kmedoids"):
config.k_medoids = int(p)
elif o in ("-x", "--max_popcon"):
config.max_popcon = int(p)
elif o in ("-w", "--weight"):
config.weight = p
elif o in ("-s", "--strategy"):
elif o in ('-s', '--strategy'):
config.strategy = p
elif o in ("-z", "--profile_size"):
elif o in ('-z', '--profile_size'):
config.profile_size = int(p)
elif o in ("-z", "--profiling"):
config.profiling = p
elif o in ("-n", "--neighbors"):
config.k_neighbors = int(p)
elif o in ("-i", "--init"):
elif o in ('-i', '--init'):
continue
elif o in ("-t", "--train"):
elif o in ('-t', '--train'):
continue
elif o in ('-b', '--because'):
config.because = True
elif o in ('-n', '--num-recommendations'):
config.num_recommendations = int(p)
else:
assert False, "unhandled option"

Expand All @@ -89,6 +62,10 @@ def usage(self):
print " -i, --init Initialize AppRecommender data"
print " -t, --train Make training of AppRecommender" \
" machine learning"
print " -n, --num-recommendations Set the number of packages that" \
" will be recommended"
print " -b, --because Display user packages that" \
" generated a given recommendation"
print " -d, --debug Set logging level to debug"
print " -v, --verbose Set logging level to verbose"
print " -o, --output=PATH Path to file to save output"
Expand Down
59 changes: 48 additions & 11 deletions apprecommender/recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,17 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""

import apt
import heapq
import logging
import os
import xapian
import operator
import os
import strategy
import xapian

from collections import namedtuple
from fuzzywuzzy import fuzz
from operator import attrgetter

from apprecommender.config import Config

Expand All @@ -32,29 +38,60 @@ class RecommendationResult:
"""
Class designed to describe a recommendation result: items and scores.
"""
def __init__(self, item_score, ranking=0, limit=0):
def __init__(self, item_score, ranking=0, limit=0, user_profile=None):
"""
Set initial parameters.
"""
self.item_score = item_score
self.size = len(item_score)
self.limit = limit
self.cache = apt.Cache()
self.pkg_descriptions = {}

if ranking:
self.ranking = ranking

if user_profile:
self.fill_pkg_descriptions(user_profile)

def fill_pkg_descriptions(self, user_profile):
for pkg in user_profile:
description = self.cache[pkg].candidate.description
self.pkg_descriptions[pkg] = description.lower()

def __str__(self):
"""
String representation of the object.
"""
# [FIXME] try alternative way to get pkgs summarys (efficiency)
# cache = apt.Cache()
result = self.get_prediction(self.limit)
str = "\n"
for i in range(len((list(result)))):
# summary = cache[result[i][0]].candidate.summary
# str += "%2d: %s\t\t- %s\n" % (i,result[i][0],summary)
str += "%2d: %s\n" % (i, result[i][0])
return str
rec_str = '\n'
index = 1

for pkg, _ in result:
summary = self.cache[pkg].candidate.summary
description = self.cache[pkg].candidate.description
rec_str += '{}: {} \t {}\n'.format(
index, pkg.ljust(20), summary)

if self.pkg_descriptions:
because_pkgs = self.get_because(description.lower())
rec_str += ' because you installed: \t {}\n\n'.format(
', '.join(because_pkgs))

index += 1

return rec_str

def get_because(self, rec_description):
because = []
PkgRatio = namedtuple('PkgRatio', ['pkg', 'ratio'])

for pkg, description in self.pkg_descriptions.iteritems():
ratio = fuzz.ratio(rec_description, description)
because.append(PkgRatio(pkg, ratio))

pkgs = heapq.nlargest(4, because, key=attrgetter('ratio'))
return [pkg for pkg, _ in pkgs]

def get_prediction(self, limit=0):
"""
Expand Down
33 changes: 24 additions & 9 deletions apprecommender/strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import operator
import os
import pickle
import re
import recommender
import xapian

Expand Down Expand Up @@ -70,11 +71,12 @@ def __init__(self, content, profile_size):
self.profile_size = profile_size

def get_sugestion_from_profile(self, rec, user, profile,
recommendation_size):
recommendation_size, because=True):
query = xapian.Query(xapian.Query.OP_OR, profile)
enquire = xapian.Enquire(rec.items_repository)
enquire.set_weighting_scheme(rec.weight)
enquire.set_query(query)
user_profile = None
# Retrieve matching packages
try:
mset = enquire.get_mset(0, recommendation_size, None,
Expand All @@ -89,7 +91,11 @@ def get_sugestion_from_profile(self, rec, user, profile,
item_score[m.document.get_data()] = m.weight
ranking.append(m.document.get_data())

result = recommender.RecommendationResult(item_score, ranking)
if because and Config().because:
user_profile = user.pkg_profile

result = recommender.RecommendationResult(
item_score, ranking, user_profile=user_profile)
return result

def run(self, rec, user, rec_size):
Expand Down Expand Up @@ -412,12 +418,17 @@ def get_pkgs_and_scores(self, rec, user):
profile = user.content_profile(rec.items_repository, self.content,
self.suggestion_size, rec.valid_tags)

content_based = self.get_sugestion_from_profile(rec, user,
profile,
self.suggestion_size)
content_based = self.get_sugestion_from_profile(
rec, user, profile, self.suggestion_size, because=False)
pkgs, pkgs_score = [], {}

for pkg_line in str(content_based).splitlines()[1:]:
pkg = pkg_line.split(':')[1][1:]
pkg = re.search(r'\d+:\s([\w-]+)', pkg_line)

if not pkg.groups():
continue

pkg = pkg.groups()[0]
pkg_score = int(pkg_line.split(':')[0].strip())

pkgs.append(pkg)
Expand All @@ -435,7 +446,6 @@ def get_pkgs_classifications(self, pkgs, terms_name, debtags_name):
kwargs['ml_strategy'] = ml_strategy

for pkg in pkgs:

if pkg not in self.cache:
continue

Expand Down Expand Up @@ -496,6 +506,8 @@ def run_train(cls, pkgs_classifications):
raise NotImplementedError("Method not implemented.")

def run(self, rec, user, rec_size):
user_profile = None

terms_name, debtags_name = self.load_terms_and_debtags()

pkgs, pkgs_score = self.get_pkgs_and_scores(rec, user)
Expand All @@ -504,9 +516,12 @@ def run(self, rec, user, rec_size):
debtags_name)

item_score = self.get_item_score(pkgs_score, pkgs_classifications)
result = recommender.RecommendationResult(item_score, limit=rec_size)

return result
if Config().because:
user_profile = user.pkg_profile

return recommender.RecommendationResult(
item_score, limit=rec_size, user_profile=user_profile)


class MachineLearningBVA(MachineLearning):
Expand Down
10 changes: 6 additions & 4 deletions apprecommender/tests/test_recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,25 @@


class RecommendationResultTests(unittest.TestCase):

@classmethod
def setUpClass(self):
self.result = RecommendationResult({"gimp": 1.5, "inkscape": 3.0,
"eog": 1})

def test_str(self):
string = "\n 0: inkscape\n 1: gimp\n 2: eog\n"
self.assertEqual(self.result.__str__(), string)
rec = '\n1: inkscape \t vector-based drawing program\n'
rec += '2: gimp \t The GNU Image Manipulation Program\n' # noqa
rec += '3: eog \t Eye of GNOME graphics viewer program\n' # noqa
self.assertEqual(self.result.__str__(), rec)

def test_get_prediction(self):
prediction = [("inkscape", 3.0), ("gimp", 1.5), ("eog", 1)]
self.assertEqual(self.result.get_prediction(), prediction)


class RecommenderTests(unittest.TestCase):

@classmethod
def setUpClass(self):
cfg = Config()
Expand Down Expand Up @@ -78,8 +82,6 @@ def test_set_strategy(self):
self.rec.set_strategy("mlbow_eset")
self.assertIsInstance(self.rec.strategy, MachineLearningBOW)
self.assertEqual(self.rec.strategy.content, "mlbow_mix_eset")
# self.rec.set_strategy("knn")
# self.assertIsInstance(self.rec.strategy,Collaborative)

def test_get_recommendation(self):
user = User({"inkscape": 1, "gimp": 1, "eog": 1})
Expand Down
Loading

0 comments on commit 9a01816

Please sign in to comment.