Skip to content

Commit

Permalink
Add a spawnpoint clustering script to Tools (PokemonGoF#990)
Browse files Browse the repository at this point in the history
* Add a spawnpoint clustering script to Tools

* Made input file parser more flexible

Input spawnpoints can now contain 'spawnpoint_id', 'sid' or no ID at
all.

Output key format can be controlled by new --long-keys flag, defaults to
using 'sid', 'lat', 'lng' as opposed to 'spawnpoint_id', 'latitude',
'longitude'.

This also makes --output-clusters output 'latitude' and 'longitude' keys
as opposed to a 'position' array.
  • Loading branch information
gabrielsoldani authored and FrostTheFox committed Sep 7, 2016
1 parent 96cbe10 commit 301d762
Show file tree
Hide file tree
Showing 3 changed files with 247 additions and 0 deletions.
8 changes: 8 additions & 0 deletions Tools/Spawnpoint-Clustering/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
## Usage

```
python ./cluster.py spawnpoints.json -os spawnpoints.compressed.json -r 70 -t 180
```

Clusters all spawnpoints in `spawnpoints.json` within 70 meters of eachother and within 180 seconds of spawn time and saves the output to `spawnpoints.compressed.json`

197 changes: 197 additions & 0 deletions Tools/Spawnpoint-Clustering/cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import argparse
import json
import time
import random

import utils

class Spawnpoint(object):
def __init__(self, data):
# not needed but useful for debugging
self.spawnpoint_id = data.get('spawnpoint_id') or data.get('sid')

try:
self.position = (float(data['latitude']), float(data['longitude']))
except KeyError:
self.position = (float(data['lat']), float(data['lng']))

self.time = data['time']

def serialize(self):
obj = dict()

if self.spawnpoint_id != None:
obj['spawnpoint_id'] = self.spawnpoint_id
obj['latitude'] = self.position[0]
obj['longitude'] = self.position[1]
obj['time'] = self.time

return obj

class Spawncluster(object):
def __init__(self, spawnpoint):
self._spawnpoints = [spawnpoint]
self.centroid = spawnpoint.position
self.min_time = spawnpoint.time
self.max_time = spawnpoint.time

def __getitem__(self, key):
return self._spawnpoints[key]

def __iter__(self):
for x in self._spawnpoints:
yield x

def __contains__(self, item):
return item in self._spawnpoints

def __len__(self):
return len(self._spawnpoints)

def append(self, spawnpoint):
# update centroid
f = len(self._spawnpoints) / (len(self._spawnpoints) + 1.0)
self.centroid = utils.intermediate_point(spawnpoint.position, self.centroid, f)

self._spawnpoints.append(spawnpoint)

if spawnpoint.time < self.min_time:
self.min_time = spawnpoint.time

if spawnpoint.time > self.max_time:
self.max_time = spawnpoint.time

def simulate_centroid(self, spawnpoint):
f = len(self._spawnpoints) / (len(self._spawnpoints) + 1.0)
new_centroid = utils.intermediate_point(spawnpoint.position, self.centroid, f)

return new_centroid

def cost(spawnpoint, cluster, time_threshold):
distance = utils.distance(spawnpoint.position, cluster.centroid)

min_time = min(cluster.min_time, spawnpoint.time)
max_time = max(cluster.max_time, spawnpoint.time)

if max_time - min_time > time_threshold:
return float('inf')

return distance

def check_cluster(spawnpoint, cluster, radius, time_threshold):
# discard infinite cost or too far away
if cost(spawnpoint, cluster, time_threshold) > 2 * radius:
return False

new_centroid = cluster.simulate_centroid(spawnpoint)

# we'd be removing ourselves
if utils.distance(spawnpoint.position, new_centroid) > radius:
return False

# we'd be removing x
if any(utils.distance(x.position, new_centroid) > radius for x in cluster):
return False

return True

def cluster(spawnpoints, radius, time_threshold):
clusters = []
diameter = 2 * radius

for p in spawnpoints:
if len(clusters) == 0:
clusters.append(Spawncluster(p))
else:
c = min(clusters, key=lambda x: cost(p, x, time_threshold))

if check_cluster(p, c, radius, time_threshold):
c.append(p)
else:
c = Spawncluster(p)
clusters.append(c)

return clusters

def test(cluster, radius, time_threshold):
assert cluster.max_time - cluster.min_time <= time_threshold

for p in cluster:
assert utils.distance(p.position, cluster.centroid) <= radius
assert cluster.min_time <= p.time <= cluster.max_time

def main(args):
radius = args.radius
time_threshold = args.time_threshold

with open(args.filename, 'r') as f:
rows = json.loads(f.read())

spawnpoints = [Spawnpoint(x) for x in rows]

print 'Processing', len(spawnpoints), 'spawnpoints...'

start_time = time.time()
clusters = cluster(spawnpoints, radius, time_threshold)
end_time = time.time()

print 'Completed in {:.2f} seconds.'.format(end_time - start_time)
print len(clusters), 'clusters found.'
print '{:.2f}% compression achieved.'.format(100.0 * len(clusters) / len(spawnpoints))

try:
for c in clusters:
test(c, radius, time_threshold)
except AssertionError:
print 'error: something\'s seriously broken.'
raise

# clusters.sort(key=lambda x: len(x))

if args.output_clusters:
rows = []
for c in clusters:
row = dict()
row['spawnpoints'] = [x.serialize() for x in c]
row['latitude'] = c.centroid[0]
row['longitude'] = c.centroid[1]
row['min_time'] = c.min_time
row['max_time'] = c.max_time
rows.append(row)

with open(args.output_clusters, 'w') as f:
f.write(json.dumps(rows, indent=4, separators=(',', ': ')))

if args.output_spawnpoints:
rows = []
for c in clusters:
row = dict()
# pick a random id from a clustered spawnpoint
# we should probably not do this
if args.long_keys:
row['spawnpoint_id'] = random.choice(c).spawnpoint_id
row['latitude'] = c.centroid[0]
row['longitude'] = c.centroid[1]
else:
row['sid'] = random.choice(c).spawnpoint_id
row['lat'] = c.centroid[0]
row['lng'] = c.centroid[1]
# pick the latest time so earlier spawnpoints have already spawned
row['time'] = c.max_time
rows.append(row)

with open(args.output_spawnpoints, 'w') as f:
f.write(json.dumps(rows, indent=4, separators=(',', ': ')))

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Cluster close spawnpoints.')
parser.add_argument('filename', help='Your spawnpoints.json file.')
parser.add_argument('-os', '--output-spawnpoints', help='The filename to write compressed spawnpoints to.')
parser.add_argument('-oc', '--output-clusters', help='The filename to write cluster data to.')
parser.add_argument('-r', '--radius', type=float, help='Maximum radius (in meters) where spawnpoints are considered close (defaults to 70).', default=70)
parser.add_argument('-t', '--time-threshold', type=float, help='Maximum time threshold (in seconds) to consider when clustering (defaults to 180).', default=180)
parser.add_argument('--long-keys', action='store_true', help='Uses prettier longer key names in the output spawnpoints.json.')

args = parser.parse_args()

main(args)
42 changes: 42 additions & 0 deletions Tools/Spawnpoint-Clustering/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from math import acos, atan2, cos, degrees, pi, radians, sin, sqrt

R = 6378137.0

def distance(pos1, pos2):
if pos1 == pos2:
return 0.0

lat1 = radians(pos1[0])
lon1 = radians(pos1[1])
lat2 = radians(pos2[0])
lon2 = radians(pos2[1])

return acos(sin(lat1)*sin(lat2) + cos(lat1)*cos(lat2) * cos(lon2-lon1)) * R

def intermediate_point(pos1, pos2, f):
if pos1 == pos2:
return pos1

lat1 = radians(pos1[0])
lon1 = radians(pos1[1])
lat2 = radians(pos2[0])
lon2 = radians(pos2[1])

delta = acos(sin(lat1)*sin(lat2) + cos(lat1)*cos(lat2) * cos(lon2-lon1))

if delta == 0: # too close
return pos1 if f < 0.5 else pos2

a = sin((1 - f) * delta) / delta
b = sin(f * delta) / delta
x = a * cos(lat1) * cos(lon1) + b * cos(lat2) * cos(lon2)
y = a * cos(lat1) * sin(lon1) + b * cos(lat2) * sin(lon2)
z = a * sin(lat1) + b * sin(lat2)

lat3 = atan2(z, sqrt(x**2 + y**2))
lon3 = atan2(y, x)

def normalize(pos):
return ((pos[0]+540)%360)-180, ((pos[1]+540)%360)-180

return normalize((degrees(lat3), degrees(lon3)))

0 comments on commit 301d762

Please sign in to comment.