-
Notifications
You must be signed in to change notification settings - Fork 46
/
main.py
91 lines (65 loc) · 2.64 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# -*- coding: utf-8 -*-
"""main.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1eP_TTxxIGyv30xDs6dl8i5k1QIXyGvUa
"""
from model import GloVeModel
from dataloader import tokenized_dataLoader
import argparse
from numpy import save, load
from evaluate import are_Similar, get_ClosestWords, analogy
from collections import OrderedDict
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--epochs', type=int, default=100)
parser.add_argument('--embedding_size', type=int, default=100)
parser.add_argument('--context_size', type=int, default=10)
parser.add_argument('--mode', type=str, default="train")
parser.add_argument('--word1', type=str, default="man")
parser.add_argument('--word2', type=str, default="boy")
parser.add_argument('--word', type=str, default="woman")
args = parser.parse_args()
evaluation_words = OrderedDict([
('word1', args.word1),
('word2', args.word2),
('word', args.word),
])
arguments = OrderedDict([
('evaluation_words', evaluation_words),
('context_size', args.context_size),
('embedding_size', args.embedding_size),
('mode', args.mode),
])
return arguments
arguments = parse_args()
context_size = arguments['context_size']
embedding_size = arguments['embedding_size']
evaluation_words = arguments['evaluation_words']
mode = arguments['mode']
corpus = tokenized_dataLoader()
model = GloVeModel(embedding_size = embedding_size, context_size = context_size)
model.fit_to_corpus(corpus)
model.train(num_epochs=100)
if(mode == "plotEmbeddings"):
model.generate_tsne()
if(mode == "help"):
print("$ python3 main.py --embedding_size 100 --context_size 10\n")
print("$ python3 main.py --mode \"are_Similar\" --word1 \"man\" --word2 \"boy\"\n")
print("$ python3 main.py --mode \"get_ClosestWords\" --word \"man\"\n")
print("$ python3 main.py --mode \"analogy\" --word1 \"man\" --word2 \"boy\" --word \"woman\"\n")
print("$ python3 main.py --mode \"help\"")
print("$ python3 main.py --mode \"wordIsInVocab\" --word_ \"man\"")
else:
if(mode == "are_Similar"):
word1 = evaluation_words['word1']
word2 = evaluation_words['word2']
print(are_Similar(word1, word2, model))
if(mode == "get_ClosestWords"):
word = evaluation_words['word']
print(get_ClosestWords(word, model))
if(mode == "analogy"):
word1 = evaluation_words['word1']
word2 = evaluation_words['word2']
word3 = evaluation_words['word']
print(analogy(word1, word2, word3, model))