Skip to content

Commit

Permalink
Merge pull request #24 from nsmith5/update-dataframes
Browse files Browse the repository at this point in the history
Update Dataframes

Former-commit-id: 5f078e9
  • Loading branch information
randyzwitch authored Mar 16, 2018
2 parents c6a52e6 + 1097576 commit 36dbd56
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 14 deletions.
5 changes: 3 additions & 2 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
julia 0.6
IPNets
DataFrames 0.9.0 0.11.0
DataFrames 0.11.0
CSV
ZipFile 0.2.4
Requests 0.2.4
GZip
GZip
26 changes: 16 additions & 10 deletions src/data.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import Requests
import CSV
import GZip

# Path to directory with data, can define GEOIP_DATADIR to override
# the default (useful for testing with a smaller test set)
const DATADIR = haskey(ENV, "GEOIP_DATADIR") ?
const DATADIR = haskey(ENV, "GEOIP_DATADIR") ?
ENV["GEOIP_DATADIR"] :
joinpath(dirname(@__FILE__), "..", "data")

const MD5 = joinpath(DATADIR, ".md5")
const MD5 = joinpath(DATADIR, ".md5")
const CITYMD5URL = "http://geolite.maxmind.com/download/geoip/database/GeoLite2-City-CSV.zip.md5"
const CITYDLURL = "http://geolite.maxmind.com/download/geoip/database/GeoLite2-City-CSV.zip"

Expand Down Expand Up @@ -53,12 +55,12 @@ function dldata(md5::String)
dlcount = 0
for fn in archive.files
if contains(string(fn),BLOCKCSV)
gzopen(joinpath(DATADIR, BLOCKCSVGZ), "w") do f
GZip.open(joinpath(DATADIR, BLOCKCSVGZ), "w") do f
write(f, read(fn))
end
dlcount += 1
elseif contains(string(fn),CITYCSV)
gzopen(joinpath(DATADIR, CITYCSVGZ), "w") do f
GZip.open(joinpath(DATADIR, CITYCSVGZ), "w") do f
write(f, read(fn))
end
dlcount += 1
Expand All @@ -80,24 +82,28 @@ function update()
end

function load()
blockfile = joinpath(DATADIR, BLOCKCSVGZ)
blockfile = joinpath(DATADIR, BLOCKCSVGZ)
locfile = joinpath(DATADIR, CITYCSVGZ)

blocks = DataFrame()
locs = DataFrame()
try
blocks = readtable(blockfile)
locs = readtable(locfile)
blocks = GZip.open(blockfile, "r") do stream
CSV.read(stream, nullable=true, types=[String, Int, Int, String, Int, Int, String, Float64, Float64, Int])
end
locs = GZip.open(locfile, "r") do stream
CSV.read(stream, nullable=true, types=[Int, String, String, String, String, String, String, String, String, String, String, Int, String, Int])
end
catch
error("Geolocation data cannot be read. Data directory may be corrupt...")
end

# Clean up unneeded columns and map others to appropriate data structures
delete!(blocks, [:represented_country_geoname_id, :is_anonymous_proxy, :is_satellite_provider])

blocks[:v4net] = map(x -> IPNets.IPv4Net(x), blocks[:network])
delete!(blocks, :network)

blocks[:location] = map(Location, blocks[:longitude], blocks[:latitude])
delete!(blocks, [:longitude, :latitude])

Expand Down
4 changes: 2 additions & 2 deletions src/geoip-module.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ struct Location <: Point3D
datum::String

function Location(x,y,z=0, datum="WGS84")
if x === NA || y === NA
return NA
if x === missing || y === missing
return missing
else
return new(x,y,z,datum)
end
Expand Down

0 comments on commit 36dbd56

Please sign in to comment.