diff --git a/skipthoughts.py b/skipthoughts.py index 1a6011d..de2fdb1 100644 --- a/skipthoughts.py +++ b/skipthoughts.py @@ -6,7 +6,10 @@ import theano import theano.tensor as tensor -import cPickle as pkl +try: + import _pickle as pkl +except: + import pickle as pkl import numpy import copy import nltk @@ -20,20 +23,25 @@ #-----------------------------------------------------------------------------# # Specify model and table locations here #-----------------------------------------------------------------------------# -path_to_models = '/u/rkiros/public_html/models/' -path_to_tables = '/u/rkiros/public_html/models/' +path_to_skipthoughts = './skipthoughts' +path_to_models = os.path.join(path_to_skipthoughts, 'models') +path_to_tables = os.path.join(path_to_skipthoughts, 'models') #-----------------------------------------------------------------------------# -path_to_umodel = path_to_models + 'uni_skip.npz' -path_to_bmodel = path_to_models + 'bi_skip.npz' +path_to_umodel = os.path.join(path_to_models, 'uni_skip.npz') +path_to_bmodel = os.path.join(path_to_models, 'bi_skip.npz') +path_to_utable = os.path.join(path_to_tables, 'utable.npy') +path_to_btable = os.path.join(path_to_tables, 'btable.npy') + +path_to_dictionary = os.path.join(path_to_tables, 'dictionary.txt') def load_model(): """ Load the model with saved tables """ # Load model options - print 'Loading model parameters...' + print('Loading model parameters...') with open('%s.pkl'%path_to_umodel, 'rb') as f: uoptions = pkl.load(f) with open('%s.pkl'%path_to_bmodel, 'rb') as f: @@ -48,18 +56,18 @@ def load_model(): btparams = init_tparams(bparams) # Extractor functions - print 'Compiling encoders...' + print('Compiling encoders...') embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions) f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v') embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions) f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2') # Tables - print 'Loading tables...' + print('Loading tables...') utable, btable = load_tables() # Store everything we need in a dictionary - print 'Packing up...' + print('Packing up...') model = {} model['uoptions'] = uoptions model['boptions'] = boptions @@ -76,9 +84,9 @@ def load_tables(): Load the tables """ words = [] - utable = numpy.load(path_to_tables + 'utable.npy') - btable = numpy.load(path_to_tables + 'btable.npy') - f = open(path_to_tables + 'dictionary.txt', 'rb') + utable = numpy.load(path_to_utable, encoding='latin1') + btable = numpy.load(path_to_btable, encoding='latin1') + f = open(path_to_dictionary, 'rb') for line in f: words.append(line.decode('utf-8').strip()) f.close() @@ -125,8 +133,8 @@ def encode(model, X, use_norm=True, verbose=True, batch_size=128, use_eos=False) # Get features. This encodes by length, in order to avoid wasting computation for k in ds.keys(): if verbose: - print k - numbatches = len(ds[k]) / batch_size + 1 + print(k) + numbatches = int(len(ds[k]) / batch_size) + 1 for minibatch in range(numbatches): caps = ds[k][minibatch::numbatches] @@ -194,10 +202,10 @@ def nn(model, text, vectors, query, k=5): scores = numpy.dot(qf, vectors.T).flatten() sorted_args = numpy.argsort(scores)[::-1] sentences = [text[a] for a in sorted_args[:k]] - print 'QUERY: ' + query - print 'NEAREST: ' + print('QUERY: ' + query) + print('NEAREST: ') for i, s in enumerate(sentences): - print s, sorted_args[i] + print(s, sorted_args[i]) def word_features(table): @@ -221,10 +229,10 @@ def nn_words(table, wordvecs, query, k=10): scores = numpy.dot(qf, wordvecs.T).flatten() sorted_args = numpy.argsort(scores)[::-1] words = [keys[a] for a in sorted_args[:k]] - print 'QUERY: ' + query - print 'NEAREST: ' + print('QUERY: ' + query) + print('NEAREST: ') for i, w in enumerate(words): - print w + print(w) def _p(pp, name): @@ -239,7 +247,7 @@ def init_tparams(params): initialize Theano shared variables according to the initial parameters """ tparams = OrderedDict() - for kk, pp in params.iteritems(): + for kk, pp in params.items(): tparams[kk] = theano.shared(params[kk], name=kk) return tparams @@ -249,7 +257,7 @@ def load_params(path, params): load parameters """ pp = numpy.load(path) - for kk, vv in params.iteritems(): + for kk, vv in params.items(): if kk not in pp: warnings.warn('%s is not in the archive'%kk) continue @@ -436,5 +444,3 @@ def _step_slice(m_, x_, xx_, h_, U, Ux): strict=True) rval = [rval] return rval - -