Skip to content

Commit

Permalink
Merge branch 'sklearn_stopwords' into 'master'
Browse files Browse the repository at this point in the history
Use sklearn stopwords instead of external file



See merge request !5
  • Loading branch information
LucianoPC committed May 29, 2016
2 parents ee45ae3 + 3bebf21 commit 45784ae
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 188 deletions.
1 change: 0 additions & 1 deletion apprecommender/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def __init__(self):
self.axi_programs = os.path.join(self.base_dir, "axi_programs")
self.axi_desktopapps = os.path.join(self.base_dir,
"axi_desktopapps")
self.stopwords = os.path.join(self.filters_dir, 'stopwords')
# popcon indexes
self.index_mode = "old"
# check if there are popcon indexes available
Expand Down
19 changes: 0 additions & 19 deletions apprecommender/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
from apprecommender.data_classification import time_weight
from apprecommender.dissimilarity import JaccardDistance
from apprecommender.error import Error
from apprecommender.singleton import Singleton


def axi_get_pkgs(axi):
Expand Down Expand Up @@ -214,24 +213,6 @@ def split_pkg_data(user_pkg, partition_size):
return round_partition


class StopWords(Singleton):

def __init__(self):
self._stopwords = set()

@property
def stopwords(self):
if not self._stopwords:
stopwords_path = Config().stopwords
with open(stopwords_path, 'r') as stopwords:
for word in stopwords:
self._stopwords.add(word.strip())

return self._stopwords
else:
return self._stopwords


class FilteredXapianIndex(xapian.WritableDatabase):

"""
Expand Down
4 changes: 2 additions & 2 deletions apprecommender/decider.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re
import xapian

from apprecommender.data import StopWords
from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS


class PkgMatchDecider(xapian.MatchDecider):
Expand Down Expand Up @@ -109,7 +109,7 @@ class FilterDescription(xapian.ExpandDecider):

def __init__(self):
xapian.ExpandDecider.__init__(self)
self.stop_words = StopWords().stopwords
self.stop_words = ENGLISH_STOP_WORDS

def __call__(self, term):
"""
Expand Down
5 changes: 0 additions & 5 deletions apprecommender/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,6 @@ def prepare_data(self):
self.save_list(pkgs, pkgs_path)

self.indexer_axi('sample', pkgs_path)
self.move_stopwords()

def get_role_program_pkgs(self):
command = "cat /var/lib/debtags/package-tags | " \
Expand All @@ -200,7 +199,3 @@ def get_role_program_pkgs(self):
def save_list(self, elements, path):
with open(path, 'w') as text:
text.write('\n'.join(elements) + '\n')

def move_stopwords(self):
filters_dir = self.config.filters_dir
shutil.copyfile('../data/stopwords', filters_dir + '/stopwords')
18 changes: 10 additions & 8 deletions apprecommender/tests/test_ml/test_pkg_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,18 @@ def test_get_pkg_debtags(self):
self.assertTrue(debtag in vim_debtags_result)

def test_get_pkg_terms(self):
vim_terms = [u'almost', u'compat', u'version', u'editor', u'new',
u'featur', u'ad', u'multi', u'level', u'undo', u'syntax',
u'highlight', u'command', u'line', u'histori',
u'help', u'filenam', u'complet', u'block', u'oper',
u'fold', u'support', u'etc', u'packag', u'contain',
u'version', u'vim', u'compil', u'rather', u'standard',
u'set', u'featur', u'packag', u'provid', u'version',
u'packag', u'need', u'less']
vim_terms = [u'vim', u'compat', u'version', u'unix', u'editor', u'vi',
u'new', u'featur', u'ad', u'multi', u'level', u'undo',
u'syntax', u'highlight', u'command', u'line', u'histori',
u'line', u'help', u'filenam', u'complet', u'block',
u'oper', u'fold', u'unicod', u'support', u'packag',
u'contain', u'version', u'vim', u'compil', u'standard',
u'set', u'featur', u'packag', u'doe', u'provid', u'gui',
u'version', u'vim', u'vim', u'packag', u'need']
vim_terms_result = self.ml_data.get_pkg_terms(self.cache, 'vim')

print vim_terms_result

for term in vim_terms:
self.assertTrue(term in vim_terms_result)

Expand Down
153 changes: 0 additions & 153 deletions data/stopwords

This file was deleted.

0 comments on commit 45784ae

Please sign in to comment.