-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmap.py
executable file
·181 lines (164 loc) · 6.1 KB
/
map.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/usr/bin/python
import argparse
import os
import socket
import threading
# To handle run two instances of map simultaneously, how to handle that? Like what would we pass in for the cli.
def main():
parser = argparse.ArgumentParser()
parser.add_argument("setup_file", type=str, help="configure with setup file")
args = parser.parse_args()
mapper = Map(args.setup_file)
class Map(object):
def __init__(self, setup_file):
self.site_to_connection = {}
## Parse with setupfile
with open(setup_file, 'r') as f:
try:
ip_addr, port = f.readline().split()
self.site_to_connection["cli"] = (ip_addr, int(port))
ip_addr, port = f.readline().split()
self.site_to_connection["map"] = ("0.0.0.0", int(port))
except:
print("ERROR: Can't read setup file")
self.file_name = "file1"
self.file_size = 0
self.half = 0
self.server_thread = threading.Thread(target=self.start_server)
self.server_thread.start()
def parse_file(self):
self.file_size = self.get_size()
if self.file_size == 0:
self.write_file({}, self.half - 1)
print("Warning: Empty file")
elif self.half == 1:
self.handle_first_half()
elif self.half == 2:
self.handle_second_half()
else:
print("ERROR: Invalid half")
self.message_cli("DONE")
# os._exit(1)
def handle_first_half(self):
with open(self.file_name) as f:
try:
data = ""
char = ''
for i in range(self.file_size/2):
char = f.read(1)
if not char:
break
data += char
# Include the word we land on.
while char.isspace() == False:
nextChar = f.read(1)
if not nextChar or nextChar.isspace():
break
data += nextChar
# Generate lists by splitting the strings on spaces.
data = data.split()
# Convert to dict with counts.
first_half = self.create_dict(data)
# Write the file onto disk.
self.write_file(first_half, 0)
except:
print("ERROR: Can't read file")
def handle_second_half(self):
with open(self.file_name) as f:
try:
self.file_size = self.get_size()
if self.file_size == 1:
self.write_file({}, 1)
return
# Read up to half the file.
half = self.file_size/2
start = half
f.seek(half, 0)
if f.read(1).isspace() == False:
# Check the previous character.
f.seek(half - 1, 0)
char = f.read(1)
while char.isspace() == False:
char = f.read(1)
if not char:
self.write_file({}, 1)
return
if char.isspace() == False:
start += 1
# Seek to where we want to start reading.
f.seek(start, 0)
data = ""
char = ''
while True:
char = f.read(1)
if not char:
break
data += char
# Generate lists by splitting the strings on spaces.
data = data.split()
# Convert to dict with counts.
second_half = self.create_dict(data)
# Write the file onto disk.
self.write_file(second_half, 1)
except:
print("ERROR: Can't read file")
def write_file(self, input_dict, file_id):
written = str(input_dict)
wr_file_name = '{0}_I_{1}'.format(self.file_name, file_id)
f = open(wr_file_name, 'w')
f.write(written) # python will convert \n to os.linesep
f.close()
def get_size(self):
st = os.stat(self.file_name)
return st.st_size
def create_dict(self, data):
store = {}
for word in data:
if word not in store.keys():
store[word] = 1
else:
store[word] += 1
return store
def start_server(self):
BUFFER_SIZE = 1024
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
print("Binding to {0}".format(self.site_to_connection["map"]))
sock.bind(self.site_to_connection["map"])
sock.listen(1)
while True:
conn, addr = sock.accept()
while True:
try:
data = conn.recv(BUFFER_SIZE)
except:
continue
if not data: break
# print("\nReceived: {0}".format(data))
split = data.split()
## CLI Commands.
if split[0] == "MAP":
self.file_name = split[1]
self.half = int(split[2])
self.parse_file()
conn.send(data)
conn.close()
def message_cli(self, message):
BUFFER_SIZE = 1024
RETRY_TIME = 2
if True:
print("Sent: {0}".format(message)) # Alex Wu: Debugging purposes
while True:
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect(self.site_to_connection["cli"])
sock.sendall(message)
sock.close()
print(self.site_to_connection["cli"])
# time.sleep(1)
break
except:
print("Attempt to connect to " + str(self.site_to_connection["cli"]) +
" failed. Retrying in " + str(RETRY_TIME) + " seconds.")
time.sleep(RETRY_TIME)
if __name__ == "__main__":
main()