Skip to content

Commit

Permalink
IPNet fix and initial refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrey Oskin committed Apr 27, 2021
1 parent 06f0970 commit e8f952b
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 350 deletions.
6 changes: 2 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,14 @@ version = "0.4.1"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
GZip = "92fee26a-97fe-5a0c-ad85-20a5f3185b63"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
IPNets = "66763231-799b-5fff-8662-389acfc33a85"
Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"

[compat]
CSV = "0.8"
DataFrames = "0.22, 1.0"
GZip = "0.5"
HTTP = "0.9"
ZipFile = "0.8, 0.9"
IPNets = "1"
julia = "1"

[extras]
Expand Down
6 changes: 1 addition & 5 deletions src/GeoIP.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
module GeoIP

using DataFrames
using ZipFile
using GZip
using HTTP
using CSV
using IPNets
import Sockets: IPv4

include("ipnets.jl")
using .IPNets

export
# types
Location,
Expand Down
94 changes: 15 additions & 79 deletions src/data.jl
Original file line number Diff line number Diff line change
@@ -1,85 +1,21 @@
# Path to directory with data, can define GEOIP_DATADIR to override
# the default (useful for testing with a smaller test set)
const DATADIR = haskey(ENV, "GEOIP_DATADIR") ?
ENV["GEOIP_DATADIR"] :
joinpath(dirname(@__FILE__), "..", "data")

const MD5 = joinpath(DATADIR, ".md5")
const CITYMD5URL = "http://geolite.maxmind.com/download/geoip/database/GeoLite2-City-CSV.zip.md5"
const CITYDLURL = "http://geolite.maxmind.com/download/geoip/database/GeoLite2-City-CSV.zip"

const BLOCKCSV = "GeoLite2-City-Blocks-IPv4.csv"
const CITYCSV = "GeoLite2-City-Locations-en.csv"

const BLOCKCSVGZ = "$BLOCKCSV.gz"
const CITYCSVGZ = "$CITYCSV.gz"

dataloaded = false
geodata = DataFrame()

function readmd5()
if isfile(MD5)
return open(MD5) do f
strip(readline(f))
end
else
@info "Failed to find checksum file, updating data..."
update()
readmd5()
end
end

function getmd5()
try
r = HTTP.get(CITYMD5URL)
return string(r.data)
catch
@error "Failed to download checksum file from MaxMind, check network connectivity"
end
function getdatadir(datadir)
isempty(datadir) || return datadir
haskey(ENV, "GEOIP_DATADIR") ?
ENV["GEOIP_DATADIR"] :
joinpath(dirname(@__FILE__), "..", "data")
end

updaterequired() = (readmd5() != getmd5())

function dldata(md5::String)
r = try
HTTP.get(CITYDLURL)
catch
@error "Failed to download file from MaxMind, check network connectivity"
end

archive = ZipFile.Reader(IOBuffer(r.data))
dlcount = 0
for fn in archive.files
if contains(string(fn),BLOCKCSV)
GZip.open(joinpath(DATADIR, BLOCKCSVGZ), "w") do f
write(f, read(fn))
end
dlcount += 1
elseif contains(string(fn),CITYCSV)
GZip.open(joinpath(DATADIR, CITYCSVGZ), "w") do f
write(f, read(fn))
end
dlcount += 1
end
end

if dlcount == 2
open(MD5, "w") do f
write(f, md5)
end
else
@error "Problem with download: only $dlcount of 2 files downloaded"
end
end

function update()
dldata(getmd5())
global dataloaded = false
end
function load(; datadir = "",
blockcsvgz = "GeoLite2-City-Blocks-IPv4.csv.gz",
citycsvgz = "GeoLite2-City-Locations-en.csv.gz")
datadir = getdatadir(datadir)
blockfile = joinpath(datadir, blockcsvgz)
locfile = joinpath(datadir, citycsvgz)

function load(datadir = DATADIR)
blockfile = joinpath(datadir, BLOCKCSVGZ)
locfile = joinpath(datadir, CITYCSVGZ)
isfile(blockfile) || throw(ArgumentError("Unable to find blocks file in $(blockfile)"))
isfile(locfile) || throw(ArgumentError("Unable to find locations file in $(locfile)"))

local blocks
local locs
Expand All @@ -94,6 +30,7 @@ function load(datadir = DATADIR)
end
catch
@error "Geolocation data cannot be read. Data directory may be corrupt..."
rethrow()
end

# Clean up unneeded columns and map others to appropriate data structures
Expand All @@ -108,6 +45,5 @@ function load(datadir = DATADIR)

alldata = leftjoin(blocks, locs, on = :geoname_id)

global dataloaded = true
global geodata = sort(alldata, :v4net)
return sort!(alldata, :v4net)
end
21 changes: 5 additions & 16 deletions src/geoip-module.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,28 +24,17 @@ end
# Geolocation functions
########################################
"""
geolocate(ip, noupdate = true)
geolocate(geodata, ip)
Returns geolocation and other information determined by `ip`. If `noupdate` is `true`, then no updates check is performed and current data is used for the location lookup.
"""
function geolocate(ip::IPv4; noupdate = true)
if !noupdate
if updaterequired()
update()
end
end

if !(dataloaded)
@info "Geolocation data not in memory. Loading..."
load()
end

function geolocate(geodata, ip::IPv4)
ipnet = IPv4Net(ip, 32)

# only iterate over rows that actually make sense - this filter is
# less expensive than iteration with in().
found = 0
for i in 1:size(geodata, 1) # iterate over rows
for i in axes(geodata, 1) # iterate over rows
if geodata[i, :v4net] > ipnet
found = i - 1
break
Expand All @@ -62,5 +51,5 @@ function geolocate(ip::IPv4; noupdate = true)
return retdict
end

geolocate(ipstr::AbstractString; noupdate = true) = geolocate(IPv4(ipstr); noupdate = noupdate)
geolocate(ipint::Integer; noupdate = true) = geolocate(IPv4(ipint); noupdate = noupdate)
geolocate(geodata, ipstr::AbstractString) = geolocate(geodata, IPv4(ipstr))
geolocate(geodata, ipint::Integer) = geolocate(geodata, IPv4(ipint))
Loading

0 comments on commit e8f952b

Please sign in to comment.