-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpizza.py
124 lines (96 loc) · 3.82 KB
/
pizza.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import datetime
from nltk.tokenize import word_tokenize
import constants
def log(m):
print '(Pizza): ' + str(m)
class PizzaException(Exception):
pass
class Pizza(object):
'''
A POTD for a given day
'''
def __init__(self, pizza):
self.from_instagram(pizza)
pass
def __repr__(self):
return self.base + " with " + ", ".join(self.ingredient_list())
def _count_ingredients(self, ingredients):
'''
Given a list of ingredients, of which each may be multiple words,
this function searches this pizzas description for the ingredient
and specifies it in the ingredient table
'''
def token_match(a, b):
for w1, w2 in zip(a, b):
if not w2.startswith(w1):
return False
return True
for ingredient in ingredients:
ingredient_tokens = ingredient.lower().split(" ")
ingredient_len = len(ingredient_tokens)
for i in range(len(self.tokens)-ingredient_len+1):
if token_match(ingredient_tokens, self.tokens[i:i+ingredient_len]):
self.ingredients[ingredient] = True
break
def _parse_base(self):
intersection = [b for b in constants.BASES if b in self.tokens]
if len(intersection) == 0:
raise PizzaException("No base found")
elif len(intersection) > 1:
raise PizzaException("Multiple bases found")
else:
self.base = intersection[0]
def _parse_ingredients(self):
self.ingredients = {i: False for i in constants.INGREDIENTS}
self._count_ingredients(constants.INGREDIENTS)
if len(self.ingredient_list()) == 0:
raise PizzaException("No ingredients found")
# specific cases w/ ambiguous ingredients
if self.ingredients.get("bacon marmalad"):
self.ingredients["bacon"] = False
if self.ingredients.get("chicken sausage"):
self.ingredients["chicken"] = False
if self.ingredients.get("sundried tomato"):
self.ingredients["tomato"] = False
if self.ingredients.get("soppressata"):
self.ingredients["salami"] = False
def ingredient_list(self):
return [i for i, v in self.ingredients.iteritems() if v == True]
def from_instagram(self, instagram_obj):
'''
Populate a pizza object from
downloaded instagram data
'''
self.raw_data = instagram_obj
def attr(name):
if not name in instagram_obj:
log("Missing attr: " + name)
return None
else:
return instagram_obj.get(name)
self.raw_timestamp = attr("created_time")
self.timestamp = datetime.datetime.fromtimestamp(int(self.raw_timestamp))
if not instagram_obj.get("caption"):
raise PizzaException("Missing caption")
self.description = attr("caption").get("text")
self.tokens = word_tokenize(self.description)
self.tokens = [t.replace("-", "").lower().encode('utf-8') for t in self.tokens]
self.like_count = attr("likes").get("count")
self.comment_count = attr("comments").get("count")
self._parse_base()
self._parse_ingredients()
def to_feature_vector(self):
'''
Exports the pizza object into a normalized feature vector
'''
features = [
# day of week as number (0-6)
float(self.timestamp.strftime("%w")),
# month as number (1-12)
float(self.timestamp.strftime("%m"))
]
return features
def ingred_feature_vector(self):
return [
(1.0 if i in self.ingredient_list() else 0.0) for i in constants.INGREDIENTS
]