Merge branch 'add_because_pkgs' into 'master'

Add because pkgs See merge request !14
tassia · Jun 27, 2016 · 9a01816 · 9a01816
2 parents 42ec558 + 008f416
commit 9a01816
Show file tree

Hide file tree

Showing 8 changed files with 112 additions and 85 deletions.
diff --git a/apprecommender/app_recommender.py b/apprecommender/app_recommender.py
@@ -5,23 +5,26 @@
 
 from apprecommender.recommender import Recommender
 from apprecommender.user import LocalSystem
+from apprecommender.config import Config
 
 
 class AppRecommender:
     def __init__(self):
         self.recommender = Recommender()
+        self.config = Config()
 
-    def make_recommendation(self, recommendation_size):
+    def make_recommendation(self):
         begin_time = datetime.datetime.now()
         logging.info("Computation started at %s" % begin_time)
         # user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir,
         #                                                 "desktopapps"))
         user = LocalSystem()
+        recommendation_size = Config().num_recommendations
         user_reccomendation = (self.recommender.get_recommendation(
                                user, recommendation_size))
 
         logging.info("Recommending applications for user %s" % user.user_id)
-        logging.info(user_reccomendation)
+        print (user_reccomendation)
 
         end_time = datetime.datetime.now()
         logging.info("Computation completed at %s" % end_time)

diff --git a/apprecommender/config.py b/apprecommender/config.py
@@ -96,6 +96,8 @@ def __init__(self):
             self.bm25_nl = 0.5
             # user content profile size
             self.profile_size = 10
+            self.num_recommendations = 8
+            self.because = False
             # neighborhood size
             self.k_neighbors = 50
             # popcon profiling method: full, voted

diff --git a/apprecommender/load_options.py b/apprecommender/load_options.py
@@ -15,67 +15,40 @@ def __init__(self):
 
     def load(self):
         config = Config()
-        short_options = "hdvo:f:b:a:e:p:m:u:l:c:x:w:s:z:r:n:idvo:tdvo"
-        long_options = ["help", "debug", "verbose", "output=", "filtersdir=",
-                        "pkgsfilter=", "axi=", "dde=", "popconindex=",
-                        "popcondir=", "indexmode=", "clustersdir=",
-                        "kmedoids=", "maxpopcon=", "weight=", "strategy=",
-                        "profile_size=", "profiling=", "neighbors=", "init",
-                        "train"]
+        short_options = 'hdvo:f:b:a:e:p:m:u:l:c:x:w:s:z:r:n:idvo:tdvo'
+        long_options = ['help', 'debug', 'verbose', 'kmedoids=', 'maxpopcon=',
+                        'weight=', 'strategy=', 'profile_size=', 'init',
+                        'train', 'because', 'nrecommendation']
         try:
             opts, args = getopt.getopt(sys.argv[1:], short_options,
                                        long_options)
             self.options = opts
         except getopt.GetoptError as error:
             config.set_logger()
-            logging.error("Bad syntax: %s" % str(error))
+            logging.error('Bad syntax: {}'.format(str(error)))
             self.usage()
             sys.exit()
 
         for o, p in opts:
-            if o in ("-h", "--help"):
+            if o in ('-h', '--help'):
                 self.usage()
                 sys.exit()
-            elif o in ("-d", "--debug"):
+            elif o in ('-d', '--debug'):
                 config.debug = 1
-            elif o in ("-v", "--verbose"):
+            elif o in ('-v', '--verbose'):
                 config.verbose = 1
-            elif o in ("-o", "--output"):
-                config.output = p
-            elif o in ("-f", "--filtersdir"):
-                config.filters_dir = p
-            elif o in ("-b", "--pkgsfilter"):
-                config.pkgs_filter = p
-            elif o in ("-a", "--axi"):
-                config.axi = p
-            elif o in ("-e", "--dde"):
-                config.dde_url = p
-            elif o in ("-p", "--popconindex"):
-                config.popcon_index = p
-            elif o in ("-m", "--popcondir"):
-                config.popcon_dir = p
-            elif o in ("-u", "--index_mode"):
-                config.index_mode = p
-            elif o in ("-l", "--clustersdir"):
-                config.clusters_dir = p
-            elif o in ("-c", "--kmedoids"):
-                config.k_medoids = int(p)
-            elif o in ("-x", "--max_popcon"):
-                config.max_popcon = int(p)
-            elif o in ("-w", "--weight"):
-                config.weight = p
-            elif o in ("-s", "--strategy"):
+            elif o in ('-s', '--strategy'):
                 config.strategy = p
-            elif o in ("-z", "--profile_size"):
+            elif o in ('-z', '--profile_size'):
                 config.profile_size = int(p)
-            elif o in ("-z", "--profiling"):
-                config.profiling = p
-            elif o in ("-n", "--neighbors"):
-                config.k_neighbors = int(p)
-            elif o in ("-i", "--init"):
+            elif o in ('-i', '--init'):
                 continue
-            elif o in ("-t", "--train"):
+            elif o in ('-t', '--train'):
                 continue
+            elif o in ('-b', '--because'):
+                config.because = True
+            elif o in ('-n', '--num-recommendations'):
+                config.num_recommendations = int(p)
             else:
                 assert False, "unhandled option"
 
@@ -89,6 +62,10 @@ def usage(self):
         print "  -i, --init                 Initialize AppRecommender data"
         print "  -t, --train                Make training of AppRecommender" \
               " machine learning"
+        print "  -n, --num-recommendations  Set the number of packages that" \
+              " will be recommended"
+        print "  -b, --because              Display user packages that" \
+              " generated a given recommendation"
         print "  -d, --debug                Set logging level to debug"
         print "  -v, --verbose              Set logging level to verbose"
         print "  -o, --output=PATH          Path to file to save output"

diff --git a/apprecommender/recommender.py b/apprecommender/recommender.py
@@ -19,11 +19,17 @@
     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 
+import apt
+import heapq
 import logging
-import os
-import xapian
 import operator
+import os
 import strategy
+import xapian
+
+from collections import namedtuple
+from fuzzywuzzy import fuzz
+from operator import attrgetter
 
 from apprecommender.config import Config
 
@@ -32,29 +38,60 @@ class RecommendationResult:
     """
     Class designed to describe a recommendation result: items and scores.
     """
-    def __init__(self, item_score, ranking=0, limit=0):
+    def __init__(self, item_score, ranking=0, limit=0, user_profile=None):
         """
         Set initial parameters.
         """
         self.item_score = item_score
         self.size = len(item_score)
         self.limit = limit
+        self.cache = apt.Cache()
+        self.pkg_descriptions = {}
+
         if ranking:
             self.ranking = ranking
 
+        if user_profile:
+            self.fill_pkg_descriptions(user_profile)
+
+    def fill_pkg_descriptions(self, user_profile):
+        for pkg in user_profile:
+            description = self.cache[pkg].candidate.description
+            self.pkg_descriptions[pkg] = description.lower()
+
     def __str__(self):
         """
         String representation of the object.
         """
-        # [FIXME] try alternative way to get pkgs summarys (efficiency)
-        # cache = apt.Cache()
         result = self.get_prediction(self.limit)
-        str = "\n"
-        for i in range(len((list(result)))):
-            # summary = cache[result[i][0]].candidate.summary
-            # str += "%2d: %s\t\t- %s\n" % (i,result[i][0],summary)
-            str += "%2d: %s\n" % (i, result[i][0])
-        return str
+        rec_str = '\n'
+        index = 1
+
+        for pkg, _ in result:
+            summary = self.cache[pkg].candidate.summary
+            description = self.cache[pkg].candidate.description
+            rec_str += '{}: {} \t {}\n'.format(
+                index, pkg.ljust(20), summary)
+
+            if self.pkg_descriptions:
+                because_pkgs = self.get_because(description.lower())
+                rec_str += '   because you installed: \t {}\n\n'.format(
+                    ', '.join(because_pkgs))
+
+            index += 1
+
+        return rec_str
+
+    def get_because(self, rec_description):
+        because = []
+        PkgRatio = namedtuple('PkgRatio', ['pkg', 'ratio'])
+
+        for pkg, description in self.pkg_descriptions.iteritems():
+            ratio = fuzz.ratio(rec_description, description)
+            because.append(PkgRatio(pkg, ratio))
+
+        pkgs = heapq.nlargest(4, because, key=attrgetter('ratio'))
+        return [pkg for pkg, _ in pkgs]
 
     def get_prediction(self, limit=0):
         """

diff --git a/apprecommender/strategy.py b/apprecommender/strategy.py
@@ -26,6 +26,7 @@
 import operator
 import os
 import pickle
+import re
 import recommender
 import xapian
 
@@ -70,11 +71,12 @@ def __init__(self, content, profile_size):
         self.profile_size = profile_size
 
     def get_sugestion_from_profile(self, rec, user, profile,
-                                   recommendation_size):
+                                   recommendation_size, because=True):
         query = xapian.Query(xapian.Query.OP_OR, profile)
         enquire = xapian.Enquire(rec.items_repository)
         enquire.set_weighting_scheme(rec.weight)
         enquire.set_query(query)
+        user_profile = None
         # Retrieve matching packages
         try:
             mset = enquire.get_mset(0, recommendation_size, None,
@@ -89,7 +91,11 @@ def get_sugestion_from_profile(self, rec, user, profile,
             item_score[m.document.get_data()] = m.weight
             ranking.append(m.document.get_data())
 
-        result = recommender.RecommendationResult(item_score, ranking)
+        if because and Config().because:
+            user_profile = user.pkg_profile
+
+        result = recommender.RecommendationResult(
+            item_score, ranking, user_profile=user_profile)
         return result
 
     def run(self, rec, user, rec_size):
@@ -412,12 +418,17 @@ def get_pkgs_and_scores(self, rec, user):
         profile = user.content_profile(rec.items_repository, self.content,
                                        self.suggestion_size, rec.valid_tags)
 
-        content_based = self.get_sugestion_from_profile(rec, user,
-                                                        profile,
-                                                        self.suggestion_size)
+        content_based = self.get_sugestion_from_profile(
+            rec, user, profile, self.suggestion_size, because=False)
         pkgs, pkgs_score = [], {}
+
         for pkg_line in str(content_based).splitlines()[1:]:
-            pkg = pkg_line.split(':')[1][1:]
+            pkg = re.search(r'\d+:\s([\w-]+)', pkg_line)
+
+            if not pkg.groups():
+                continue
+
+            pkg = pkg.groups()[0]
             pkg_score = int(pkg_line.split(':')[0].strip())
 
             pkgs.append(pkg)
@@ -435,7 +446,6 @@ def get_pkgs_classifications(self, pkgs, terms_name, debtags_name):
         kwargs['ml_strategy'] = ml_strategy
 
         for pkg in pkgs:
-
             if pkg not in self.cache:
                 continue
 
@@ -496,6 +506,8 @@ def run_train(cls, pkgs_classifications):
         raise NotImplementedError("Method not implemented.")
 
     def run(self, rec, user, rec_size):
+        user_profile = None
+
         terms_name, debtags_name = self.load_terms_and_debtags()
 
         pkgs, pkgs_score = self.get_pkgs_and_scores(rec, user)
@@ -504,9 +516,12 @@ def run(self, rec, user, rec_size):
                                                              debtags_name)
 
         item_score = self.get_item_score(pkgs_score, pkgs_classifications)
-        result = recommender.RecommendationResult(item_score, limit=rec_size)
 
-        return result
+        if Config().because:
+            user_profile = user.pkg_profile
+
+        return recommender.RecommendationResult(
+            item_score, limit=rec_size, user_profile=user_profile)
 
 
 class MachineLearningBVA(MachineLearning):

diff --git a/apprecommender/tests/test_recommender.py b/apprecommender/tests/test_recommender.py
@@ -29,21 +29,25 @@
 
 
 class RecommendationResultTests(unittest.TestCase):
+
     @classmethod
     def setUpClass(self):
         self.result = RecommendationResult({"gimp": 1.5, "inkscape": 3.0,
                                             "eog": 1})
 
     def test_str(self):
-        string = "\n 0: inkscape\n 1: gimp\n 2: eog\n"
-        self.assertEqual(self.result.__str__(), string)
+        rec = '\n1: inkscape             \t vector-based drawing program\n'
+        rec += '2: gimp                 \t The GNU Image Manipulation Program\n'  # noqa
+        rec += '3: eog                  \t Eye of GNOME graphics viewer program\n'  # noqa
+        self.assertEqual(self.result.__str__(), rec)
 
     def test_get_prediction(self):
         prediction = [("inkscape", 3.0), ("gimp", 1.5), ("eog", 1)]
         self.assertEqual(self.result.get_prediction(), prediction)
 
 
 class RecommenderTests(unittest.TestCase):
+
     @classmethod
     def setUpClass(self):
         cfg = Config()
@@ -78,8 +82,6 @@ def test_set_strategy(self):
         self.rec.set_strategy("mlbow_eset")
         self.assertIsInstance(self.rec.strategy, MachineLearningBOW)
         self.assertEqual(self.rec.strategy.content, "mlbow_mix_eset")
-        # self.rec.set_strategy("knn")
-        # self.assertIsInstance(self.rec.strategy,Collaborative)
 
     def test_get_recommendation(self):
         user = User({"inkscape": 1, "gimp": 1, "eog": 1})