Skip to content

Commit

Permalink
Merge pull request #11 from Ostorlab/feature/ip_range_vistor
Browse files Browse the repository at this point in the history
A draft of ip range visitor to minimize requests to geo range.
  • Loading branch information
najibraihan authored Aug 29, 2022
2 parents e27e67a + 0fbda7b commit 545d699
Show file tree
Hide file tree
Showing 8 changed files with 180 additions and 59 deletions.
11 changes: 5 additions & 6 deletions agent/ip2geo.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
"""Agent responsible for finding geolocation details of ip[v4-v6] addresses."""
from typing import Any, Dict
import logging
import ipaddress
import logging
from typing import Any, Dict

from agent import request_sender


GEO_LOCATION_FIELDS = [
'status', 'message', 'query', 'continent', 'continentCode', 'country',
'countryCode', 'region', 'regionName', 'city', 'zip', 'lat', 'lon', 'timezone',
'isp', 'org', 'asname', 'mobile', 'proxy', 'hosting'
]
GEO_LOCATION_API_ENDPOINT = 'http://ip-api.com/json/'

logger = logging.getLogger(__name__)


class Ip2GeoLocator:
"""Class responsible for detecting geolocation details of IP address."""
def __init__(self, endpoint: str) -> None:

def __init__(self, endpoint: str = GEO_LOCATION_API_ENDPOINT) -> None:
"""Instantiate the necessary attributes of the object
Args:
endpoint: to which the request will be sent.
"""
self._endpoint = endpoint


def _locate_ip(self, ip_address) -> Dict[str, Any]:
"""Get geolocation details of an IP address"""

Expand All @@ -34,7 +34,6 @@ def _locate_ip(self, ip_address) -> Dict[str, Any]:
response = request_sender.make_request('GET', path)
return response


def _parse_response(self, response: Dict[str, Any]) -> Dict[str, Any]:
"""Parse output of the geolocation request to the expected format of Ostorlab's geo-location proto message."""
if response.get('status') == 'success':
Expand Down
52 changes: 29 additions & 23 deletions agent/ip2geo_agent.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
"""Agent implementation for Ip2Geo : Detecting geolocation details of an IP address."""
import logging
import ipaddress

from rich import logging as rich_logging
from ostorlab.agent import agent
from ostorlab.agent import definitions as agent_definitions
from ostorlab.agent import message as m
from ostorlab.agent.mixins import agent_persist_mixin
from ostorlab.agent import definitions as agent_definitions
from ostorlab.runtimes import definitions as runtime_definitions

from agent import ip2geo

from rich import logging as rich_logging
from agent.utils.ip_range_visitor import IpRangeVisitor

logging.basicConfig(
format='%(message)s',
Expand All @@ -20,20 +19,20 @@
)
logger = logging.getLogger(__name__)


GEO_LOCATION_API_ENDPOINT = 'http://ip-api.com/json/'
STORAGE_NAME = 'agent_ipgeo_storage'
STORAGE_NAME = b'agent_ipgeo_storage'


class Ip2GeoAgent(agent.Agent, agent_persist_mixin.AgentPersistMixin):
"""Ip2Geo agent implementation."""

def __init__(self,
agent_definition: agent_definitions.AgentDefinition,
agent_settings: runtime_definitions.AgentSettings) -> None:
agent_definition: agent_definitions.AgentDefinition,
agent_settings: runtime_definitions.AgentSettings,
) -> None:

agent.Agent.__init__(self, agent_definition, agent_settings)
agent_persist_mixin.AgentPersistMixin.__init__(self, agent_settings)

self.ip_range_visitor = IpRangeVisitor()

def process(self, message: m.Message) -> None:
"""Process messages of type v3.asset.ip.v[4/6] and emits back the geolocation details.
Expand All @@ -42,19 +41,26 @@ def process(self, message: m.Message) -> None:
message: The received message.
"""
logger.info('processing message of selector : %s', message.selector)
ip_address = message.data['host']
if self.set_add(STORAGE_NAME, ip_address) is True:
ip_geo_locator = ip2geo.Ip2GeoLocator(GEO_LOCATION_API_ENDPOINT)
geolocation_details = ip_geo_locator.get_geolocation_details(ip_address)

if 'errors' in geolocation_details:
logger.info('skipping %s : %s', ip_address, geolocation_details['errors'])
else:
out_selector = f'{message.selector}.geolocation'
self.emit(selector=out_selector, data=geolocation_details)
else:
logger.info('%s has already been processed. skipping for now.', ip_address)

out_selector = f'{message.selector}.geolocation'
ip = message.data['host']
mask = message.data.get('mask', '32')
network = ipaddress.ip_network(f'{ip}/{mask}')

# classify ip range based on geolocation
for result in self.ip_range_visitor.dichotomy_ip_network_visit(
network, self.ip_range_visitor.is_first_last_ip_same_geolocation):

geolocation_details = result[0]
geolocation_network = result[2]
for ip in geolocation_network:
# check if ip not tested before
if self.add_ip_network(STORAGE_NAME, ipaddress.ip_network(ip)) is True:
# create geolocation details dict for each ip and emit it
geolocation_details['host'] = str(ip)
self.emit(selector=out_selector, data=geolocation_details)
else:
logger.info('%s has already been processed. skipping for now.', ip)


if __name__ == '__main__':
Expand Down
2 changes: 1 addition & 1 deletion agent/request_sender.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def make_request(method: str, path: str, data: Optional[Dict[str, str]] = None):
AuthenticationError if request is not successful.
"""
logger.info('request %s %s %s', method, path, data)
response = requests.request(method, path, data=json.dumps(data))
response = requests.request(method, path, data=json.dumps(data), timeout=10)
if response.status_code not in [200, 201, 204]:
logger.error('received %i %s', response.status_code, response.content)
raise AuthenticationError(response.reason)
Expand Down
Empty file added agent/utils/__init__.py
Empty file.
55 changes: 55 additions & 0 deletions agent/utils/ip_range_visitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
""" Module Responsible for sending ip range geolocation"""
import ipaddress
from typing import Callable, Tuple, Any

from agent import ip2geo
from agent.ip2geo import logger


class Error(Exception):
"""Base Error"""


class IPGeoError(Error):
"""Error getting the IP geolocation"""


class IpRangeVisitor:
"""Ip range visitor implementation."""

def dichotomy_ip_network_visit(self, ip_network: ipaddress.IPv4Network | ipaddress.IPv6Network,
accept: Callable[
[ipaddress.IPv4Network | ipaddress.IPv6Network], Tuple[bool, Any]]) -> Any:
"""get ip ranges based on geolocation"""

should_continue, result = accept(ip_network)
yield result
if should_continue is False:
return

subnets = list(ip_network.subnets())

if len(subnets) == 1:
# reached the last block.
return

for subnet in subnets:
yield from self.dichotomy_ip_network_visit(subnet, accept)

@staticmethod
def is_first_last_ip_same_geolocation(ip_network: ipaddress.IPv4Network | ipaddress.IPv6Network
) -> Tuple[bool, Any]:
"""Compare geolocation of network extremes"""

first, last = ip_network[0], ip_network[-1]
locator = ip2geo.Ip2GeoLocator()
first_location = locator.get_geolocation_details(str(first))
last_location = locator.get_geolocation_details(str(last))
try:
if first_location['latitude'] == last_location['latitude'] and \
first_location['longitude'] == last_location['longitude']:
return False, (first_location, last_location, ip_network)
else:
return True, (first_location, last_location, ip_network)
except IPGeoError as e:
logger.warning('Error happens in is_first_last_ip_same_geolocation process: %s', str(e))
36 changes: 7 additions & 29 deletions tests/agent_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,39 +23,17 @@ def testAgentIp2Geo_whenLocatesIpAddress_emitsBackFindings(ip2geo_agent, agent_m
status_code=200
)

msg = message.Message.from_data(selector='v3.asset.ip.v4', data={'host': '8.8.8.8', 'version':4})
msg = message.Message.from_data(selector='v3.asset.ip.v4', data={'host': '8.8.8.8', 'version': 4})
ip2geo_agent.process(msg)

assert len(agent_mock) == 1
assert agent_mock[0].selector =='v3.asset.ip.v4.geolocation'
assert agent_mock[0].data['longitude'] == pytest.approx(-73.5848)
assert agent_mock[0].selector == 'v3.asset.ip.v4.geolocation'
assert agent_mock[0].data['longitude'] == pytest.approx(-73.5848)
assert agent_mock[0].data['country_code'] == 'CA'


def testAgentIp2Geo_whenIpAddressIsInvalid_shouldSkip(ip2geo_agent,
agent_mock,
agent_persist_mock,
requests_mock):
"""Unittest for Ip2Geo Agent, when it receives an invalid ip address, the agent should skip it."""
del agent_persist_mock
matcher = re.compile('http://ip-api.com/json/')
requests_mock.get(
matcher,
json={
'query': '8.8.',
'status': 'fail',
'message': 'query is in wrong format.'
},
status_code=200
)

msg = message.Message.from_data(selector='v3.asset.ip.v4', data={'host': '8.8.', 'version':4})
ip2geo_agent.process(msg)

assert len(agent_mock) == 0


def testAgentIp2Geo_whenIpAddressHasAlreadyBeenProcessed_shouldSkip(ip2geo_agent,
mocker,
agent_mock,
agent_persist_mock,
requests_mock):
Expand All @@ -79,8 +57,8 @@ def testAgentIp2Geo_whenIpAddressHasAlreadyBeenProcessed_shouldSkip(ip2geo_agent
status_code=200
)

msg = message.Message.from_data(selector='v3.asset.ip.v4', data={'host': '8.8.', 'version':4})
ip2geo_agent.process(msg)
msg = message.Message.from_data(selector='v3.asset.ip.v4', data={'host': '8.8.8.0', 'mask': '24', 'version': 4})
ip2geo_agent.process(msg)

assert len(agent_mock) == 1
assert len(agent_mock) == 256
assert mocker is not None
Empty file added tests/utils/__init__.py
Empty file.
83 changes: 83 additions & 0 deletions tests/utils/test_ip_range_visitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Unittests for the IP range visitor."""

import ipaddress

from agent.utils.ip_range_visitor import IpRangeVisitor

ip_range_visitor = IpRangeVisitor()


def testVistor_withMatchingIPAndMaskRecieved_returnsLocations():
results = []
for result in ip_range_visitor.dichotomy_ip_network_visit(ipaddress.ip_network('8.8.8.0/22'),
ip_range_visitor.is_first_last_ip_same_geolocation):
results.append(result[0:2])

assert results == [({'host': '8.8.8.0', 'version': 4, 'continent': 'North America', 'continent_code': 'NA',
'country': 'United States', 'country_code': 'US', 'region': 'CA', 'region_name': 'California',
'city': 'Mountain View', 'zip': '94043', 'latitude': 37.4223, 'longitude': -122.085,
'timezone': 'America/Los_Angeles'},
{'host': '8.8.11.255', 'version': 4, 'continent': 'North America', 'continent_code': 'NA',
'country': 'United States', 'country_code': 'US', 'region': 'LA', 'region_name': 'Louisiana',
'city': 'Monroe', 'zip': '71203', 'latitude': 32.5896, 'longitude': -92.0669,
'timezone': 'America/Chicago'}), (
{'host': '8.8.8.0', 'version': 4, 'continent': 'North America', 'continent_code': 'NA',
'country': 'United States', 'country_code': 'US', 'region': 'CA',
'region_name': 'California',
'city': 'Mountain View', 'zip': '94043', 'latitude': 37.4223, 'longitude': -122.085,
'timezone': 'America/Los_Angeles'},
{'host': '8.8.9.255', 'version': 4, 'continent': 'North America', 'continent_code': 'NA',
'country': 'United States', 'country_code': 'US', 'region': 'FL', 'region_name': 'Florida',
'city': 'Fort Lauderdale', 'zip': '33309', 'latitude': 26.2018, 'longitude': -80.1699,
'timezone': 'America/New_York'}), (
{'host': '8.8.8.0', 'version': 4, 'continent': 'North America', 'continent_code': 'NA',
'country': 'United States', 'country_code': 'US', 'region': 'CA',
'region_name': 'California',
'city': 'Mountain View', 'zip': '94043', 'latitude': 37.4223, 'longitude': -122.085,
'timezone': 'America/Los_Angeles'},
{'host': '8.8.8.255', 'version': 4, 'continent': 'North America', 'continent_code': 'NA',
'country': 'United States', 'country_code': 'US', 'region': 'CA',
'region_name': 'California',
'city': 'Mountain View', 'zip': '94043', 'latitude': 37.4223, 'longitude': -122.085,
'timezone': 'America/Los_Angeles'}), (
{'host': '8.8.9.0', 'version': 4, 'continent': 'North America', 'continent_code': 'NA',
'country': 'United States', 'country_code': 'US', 'region': 'FL', 'region_name': 'Florida',
'city': 'Fort Lauderdale', 'zip': '33309', 'latitude': 26.2018, 'longitude': -80.1699,
'timezone': 'America/New_York'},
{'host': '8.8.9.255', 'version': 4, 'continent': 'North America', 'continent_code': 'NA',
'country': 'United States', 'country_code': 'US', 'region': 'FL', 'region_name': 'Florida',
'city': 'Fort Lauderdale', 'zip': '33309', 'latitude': 26.2018, 'longitude': -80.1699,
'timezone': 'America/New_York'}), (
{'host': '8.8.10.0', 'version': 4, 'continent': 'North America', 'continent_code': 'NA',
'country': 'United States', 'country_code': 'US', 'region': 'DC',
'region_name': 'District of Columbia', 'city': 'Washington', 'zip': '20068',
'latitude': 38.9072, 'longitude': -77.0369, 'timezone': 'America/New_York'},
{'host': '8.8.11.255', 'version': 4, 'continent': 'North America', 'continent_code': 'NA',
'country': 'United States', 'country_code': 'US', 'region': 'LA',
'region_name': 'Louisiana',
'city': 'Monroe', 'zip': '71203', 'latitude': 32.5896, 'longitude': -92.0669,
'timezone': 'America/Chicago'}), (
{'host': '8.8.10.0', 'version': 4, 'continent': 'North America', 'continent_code': 'NA',
'country': 'United States', 'country_code': 'US', 'region': 'DC',
'region_name': 'District of Columbia', 'city': 'Washington', 'zip': '20068',
'latitude': 38.9072, 'longitude': -77.0369, 'timezone': 'America/New_York'},
{'host': '8.8.10.255', 'version': 4, 'continent': 'North America', 'continent_code': 'NA',
'country': 'United States', 'country_code': 'US', 'region': 'DC',
'region_name': 'District of Columbia', 'city': 'Washington', 'zip': '20068',
'latitude': 38.9072, 'longitude': -77.0369, 'timezone': 'America/New_York'}), (
{'host': '8.8.11.0', 'version': 4, 'continent': 'North America', 'continent_code': 'NA',
'country': 'United States', 'country_code': 'US', 'region': 'LA',
'region_name': 'Louisiana',
'city': 'Monroe', 'zip': '71203', 'latitude': 32.5896, 'longitude': -92.0669,
'timezone': 'America/Chicago'},
{'host': '8.8.11.255', 'version': 4, 'continent': 'North America', 'continent_code': 'NA',
'country': 'United States', 'country_code': 'US', 'region': 'LA',
'region_name': 'Louisiana',
'city': 'Monroe', 'zip': '71203', 'latitude': 32.5896, 'longitude': -92.0669,
'timezone': 'America/Chicago'})]


def testVistor_withMaskNotRecieved_returnsIfFirstIPGeolocationEqualLastIPGeolocation():
for result in ip_range_visitor.dichotomy_ip_network_visit(ipaddress.ip_network('8.8.8.0/32'),
ip_range_visitor.is_first_last_ip_same_geolocation):
assert result[0] == result[1]

0 comments on commit 545d699

Please sign in to comment.