From 6548236c65204a3f686e5b25ad1dd1919f3b8944 Mon Sep 17 00:00:00 2001 From: thefosk Date: Thu, 26 May 2016 18:29:28 -0700 Subject: [PATCH 1/7] Bot Detection plugin --- kong-0.8.3-0.rockspec | 8 ++- kong/plugins/bot-detection/cache.lua | 27 ++++++++ kong/plugins/bot-detection/handler.lua | 81 ++++++++++++++++++++++ kong/plugins/bot-detection/hooks.lua | 14 ++++ kong/plugins/bot-detection/rules.lua | 18 +++++ kong/plugins/bot-detection/schema.lua | 7 ++ kong/tools/database_cache.lua | 7 +- spec/plugins/bot-detection/access_spec.lua | 62 +++++++++++++++++ spec/plugins/bot-detection/hooks_spec.lua | 70 +++++++++++++++++++ 9 files changed, 292 insertions(+), 2 deletions(-) create mode 100644 kong/plugins/bot-detection/cache.lua create mode 100644 kong/plugins/bot-detection/handler.lua create mode 100644 kong/plugins/bot-detection/hooks.lua create mode 100644 kong/plugins/bot-detection/rules.lua create mode 100644 kong/plugins/bot-detection/schema.lua create mode 100644 spec/plugins/bot-detection/access_spec.lua create mode 100644 spec/plugins/bot-detection/hooks_spec.lua diff --git a/kong-0.8.3-0.rockspec b/kong-0.8.3-0.rockspec index e798dddc4d78..eceffeb424d8 100644 --- a/kong-0.8.3-0.rockspec +++ b/kong-0.8.3-0.rockspec @@ -256,7 +256,13 @@ build = { ["kong.plugins.statsd.handler"] = "kong/plugins/statsd/handler.lua", ["kong.plugins.statsd.schema"] = "kong/plugins/statsd/schema.lua", - ["kong.plugins.statsd.statsd_logger"] = "kong/plugins/statsd/statsd_logger.lua" + ["kong.plugins.statsd.statsd_logger"] = "kong/plugins/statsd/statsd_logger.lua", + + ["kong.plugins.bot-detection.handler"] = "kong/plugins/bot-detection/handler.lua", + ["kong.plugins.bot-detection.schema"] = "kong/plugins/bot-detection/schema.lua", + ["kong.plugins.bot-detection.rules"] = "kong/plugins/bot-detection/rules.lua", + ["kong.plugins.bot-detection.cache"] = "kong/plugins/bot-detection/cache.lua", + ["kong.plugins.bot-detection.hooks"] = "kong/plugins/bot-detection/hooks.lua" }, install = { bin = { "bin/kong" } diff --git a/kong/plugins/bot-detection/cache.lua b/kong/plugins/bot-detection/cache.lua new file mode 100644 index 000000000000..325700879088 --- /dev/null +++ b/kong/plugins/bot-detection/cache.lua @@ -0,0 +1,27 @@ +local cache = require "kong.tools.database_cache" + +local _M = {} + +local INDEX = "bot_detection_index" + +function _M.set(key, value) + cache.set(cache.bot_detection_key(key), value) + local index_keys = cache.get(INDEX) + if not index_keys then index_keys = {} end + index_keys[#index_keys+1] = key + cache.set(INDEX, index_keys) +end + +function _M.get(key) + return cache.get(cache.bot_detection_key(key)) +end + +function _M.reset() + local index_keys = cache.get(INDEX) + for _, key in ipairs(index_keys) do + cache.delete(cache.bot_detection_key(key)) + end + cache.delete(INDEX) +end + +return _M \ No newline at end of file diff --git a/kong/plugins/bot-detection/handler.lua b/kong/plugins/bot-detection/handler.lua new file mode 100644 index 000000000000..ce20207cb65f --- /dev/null +++ b/kong/plugins/bot-detection/handler.lua @@ -0,0 +1,81 @@ +local BasePlugin = require "kong.plugins.base_plugin" +local responses = require "kong.tools.responses" +local rules = require "kong.plugins.bot-detection.rules" +local stringy = require "stringy" +local bot_cache = require "kong.plugins.bot-detection.cache" + +local ipairs = ipairs +local get_headers = ngx.req.get_headers +local re_match = ngx.re.match + +local BotDetectionHandler = BasePlugin:extend() + +BotDetectionHandler.PRIORITY = 2500 + +local function get_user_agent() + local user_agent = get_headers()["user-agent"] + if type(user_agent) == "table" then + return nil, "Only one User-Agent header allowed" + end + return user_agent +end + +function BotDetectionHandler:new() + BotDetectionHandler.super.new(self, "bot-detection") +end + +function BotDetectionHandler:access(conf) + BotDetectionHandler.super.access(self) + + local user_agent, err = get_user_agent() + if err then + return responses.send_HTTP_BAD_REQUEST(err) + end + + if user_agent then + user_agent = stringy.strip(user_agent) + + -- Cache key, per API + local cache_key = ngx.ctx.api.id..":"..user_agent + + -- The cache already has the user_agents that should be blocked + -- So we avoid matching the regexes everytime + local cached_match = bot_cache.get(cache_key) + if cached_match ~= nil then + if cached_match then + return + else + return responses.send_HTTP_FORBIDDEN() + end + end + + if conf.whitelist then + for _, rule in ipairs(conf.whitelist) do + if re_match(user_agent, rule) then + bot_cache.set(cache_key, true) + return + end + end + end + + if conf.blacklist then + for _, rule in ipairs(conf.blacklist) do + if re_match(user_agent, rule) then + bot_cache.set(cache_key, false) + return responses.send_HTTP_FORBIDDEN() + end + end + end + + for _, rule in ipairs(rules.bots) do + if re_match(user_agent, rule) then + bot_cache.set(cache_key, false) + return responses.send_HTTP_FORBIDDEN() + end + end + + bot_cache.set(cache_key, true) + end +end + +return BotDetectionHandler \ No newline at end of file diff --git a/kong/plugins/bot-detection/hooks.lua b/kong/plugins/bot-detection/hooks.lua new file mode 100644 index 000000000000..cf3e3d85d8bb --- /dev/null +++ b/kong/plugins/bot-detection/hooks.lua @@ -0,0 +1,14 @@ +local events = require "kong.core.events" +local bot_cache = require "kong.plugins.bot-detection.cache" + +local function invalidate(message_t) + if message_t.collection == "plugins" and message_t.entity.name == "bot-detection" then + bot_cache.reset() + end +end + +return { + [events.TYPES.ENTITY_UPDATED] = function(message_t) + invalidate(message_t) + end +} \ No newline at end of file diff --git a/kong/plugins/bot-detection/rules.lua b/kong/plugins/bot-detection/rules.lua new file mode 100644 index 000000000000..1bc9b87885f2 --- /dev/null +++ b/kong/plugins/bot-detection/rules.lua @@ -0,0 +1,18 @@ +-- List taken from https://github.com/ua-parser/uap-core/blob/master/regexes.yaml + +return { + bots = { + [[(Pingdom.com_bot_version_)(\d+)\.(\d+)]], -- Pingdom + [[(facebookexternalhit)/(\d+)\.(\d+)]], -- Facebook + [[Google.*/\+/web/snippet]], -- Google Plus + [[(Twitterbot)/(\d+)\.(\d+)]], -- Twitter + [[/((?:Ant-)?Nutch|[A-z]+[Bb]ot|[A-z]+[Ss]pider|Axtaris|fetchurl|Isara|ShopSalad|Tailsweep)[ \-](\d+)(?:\.(\d+)(?:\.(\d+))?)?]], -- Bots Pattern '/name-0.0' + [[(008|Altresium|Argus|BaiduMobaider|BoardReader|DNSGroup|DataparkSearch|EDI|Goodzer|Grub|INGRID|Infohelfer|LinkedInBot|LOOQ|Nutch|PathDefender|Peew|PostPost|Steeler|Twitterbot|VSE|WebCrunch|WebZIP|Y!J-BR[A-Z]|YahooSeeker|envolk|sproose|wminer)/(\d+)(?:\.(\d+)(?:\.(\d+))?)?]], --Bots Pattern 'name/0.0' + [[(MSIE) (\d+)\.(\d+)([a-z]\d?)?;.* MSIECrawler]], --MSIECrawler + [[(Google-HTTP-Java-Client|Apache-HttpClient|http%20client|Python-urllib|HttpMonitor|TLSProber|WinHTTP|JNLP)(?:[ /](\d+)(?:\.(\d+)(?:\.(\d+))?)?)?]], -- Downloader ... + [[(1470\.net crawler|50\.nu|8bo Crawler Bot|Aboundex|Accoona-[A-z]+-Agent|AdsBot-Google(?:-[a-z]+)?|altavista|AppEngine-Google|archive.*?\.org_bot|archiver|Ask Jeeves|[Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]+)*|bingbot|BingPreview|blitzbot|BlogBridge|BoardReader(?: [A-Za-z]+)*|boitho.com-dc|BotSeer|\b\w*favicon\w*\b|\bYeti(?:-[a-z]+)?|Catchpoint bot|[Cc]harlotte|Checklinks|clumboot|Comodo HTTP\(S\) Crawler|Comodo-Webinspector-Crawler|ConveraCrawler|CRAWL-E|CrawlConvera|Daumoa(?:-feedfetcher)?|Feed Seeker Bot|findlinks|Flamingo_SearchEngine|FollowSite Bot|furlbot|Genieo|gigabot|GomezAgent|gonzo1|(?:[a-zA-Z]+-)?Googlebot(?:-[a-zA-Z]+)?|Google SketchUp|grub-client|gsa-crawler|heritrix|HiddenMarket|holmes|HooWWWer|htdig|ia_archiver|ICC-Crawler|Icarus6j|ichiro(?:/mobile)?|IconSurf|IlTrovatore(?:-Setaccio)?|InfuzApp|Innovazion Crawler|InternetArchive|IP2[a-z]+Bot|jbot\b|KaloogaBot|Kraken|Kurzor|larbin|LEIA|LesnikBot|Linguee Bot|LinkAider|LinkedInBot|Lite Bot|Llaut|lycos|Mail\.RU_Bot|masidani_bot|Mediapartners-Google|Microsoft .*? Bot|mogimogi|mozDex|MJ12bot|msnbot(?:-media *)?|msrbot|netresearch|Netvibes|NewsGator[^/]*|^NING|Nutch[^/]*|Nymesis|ObjectsSearch|Orbiter|OOZBOT|PagePeeker|PagesInventory|PaxleFramework|Peeplo Screenshot Bot|PlantyNet_WebRobot|Pompos|Read%20Later|Reaper|RedCarpet|Retreiver|Riddler|Rival IQ|scooter|Scrapy|Scrubby|searchsight|seekbot|semanticdiscovery|Simpy|SimplePie|SEOstats|SimpleRSS|SiteCon|Slurp|snappy|Speedy Spider|Squrl Java|TheUsefulbot|ThumbShotsBot|Thumbshots\.ru|TwitterBot|URL2PNG|Vagabondo|VoilaBot|^vortex|Votay bot|^voyager|WASALive.Bot|Web-sniffer|WebThumb|WeSEE:[A-z]+|WhatWeb|WIRE|WordPress|Wotbox|www\.almaden\.ibm\.com|Xenu(?:.s)? Link Sleuth|Xerka [A-z]+Bot|yacy(?:bot)?|Yahoo[a-z]*Seeker|Yahoo! Slurp|Yandex\w+|YodaoBot(?:-[A-z]+)?|YottaaMonitor|Yowedo|^Zao|^Zao-Crawler|ZeBot_www\.ze\.bz|ZooShot|ZyBorg)(?:[ /]v?(\d+)(?:\.(\d+)(?:\.(\d+))?)?)?]], -- Bots + [[(?:\/[A-Za-z0-9\.]+)? *([A-Za-z0-9 \-_\!\[\]:]*(?:[Aa]rchiver|[Ii]ndexer|[Ss]craper|[Bb]ot|[Ss]pider|[Cc]rawl[a-z]*))/(\d+)(?:\.(\d+)(?:\.(\d+))?)?]], -- Bots General matcher 'name/0.0' + [[(?:\/[A-Za-z0-9\.]+)? *([A-Za-z0-9 _\!\[\]:]*(?:[Aa]rchiver|[Ii]ndexer|[Ss]craper|[Bb]ot|[Ss]pider|[Cc]rawl[a-z]*)) (\d+)(?:\.(\d+)(?:\.(\d+))?)?]], -- Bots General matcher 'name 0.0' + [[((?:[A-z0-9]+|[A-z\-]+ ?)?(?: the )?(?:[Ss][Pp][Ii][Dd][Ee][Rr]|[Ss]crape|[A-Za-z0-9-]*(?:[^C][^Uu])[Bb]ot|[Cc][Rr][Aa][Ww][Ll])[A-z0-9]*)(?:(?:[ /]| v)(\d+)(?:\.(\d+)(?:\.(\d+))?)?)?]] -- Bots containing spider|scrape|bot(but not CUBOT)|Crawl + } +} diff --git a/kong/plugins/bot-detection/schema.lua b/kong/plugins/bot-detection/schema.lua new file mode 100644 index 000000000000..48edfed83552 --- /dev/null +++ b/kong/plugins/bot-detection/schema.lua @@ -0,0 +1,7 @@ +return { + no_consumer = true, + fields = { + whitelist = { type = "array" }, + blacklist = { type = "array" } + } +} \ No newline at end of file diff --git a/kong/tools/database_cache.lua b/kong/tools/database_cache.lua index 7e77d238e3da..8a04c183823e 100644 --- a/kong/tools/database_cache.lua +++ b/kong/tools/database_cache.lua @@ -17,7 +17,8 @@ local CACHE_KEYS = { AUTOJOIN_RETRIES = "autojoin_retries", TIMERS = "timers", ALL_APIS_BY_DIC = "ALL_APIS_BY_DIC", - LDAP_CREDENTIAL = "ldap_credentials" + LDAP_CREDENTIAL = "ldap_credentials", + BOT_DETECTION = "bot_detection" } local _M = {} @@ -115,6 +116,10 @@ function _M.ssl_data(api_id) return CACHE_KEYS.SSL..":"..api_id end +function _M.bot_detection_key(key) + return CACHE_KEYS.BOT_DETECTION..":"..key +end + function _M.all_apis_by_dict_key() return CACHE_KEYS.ALL_APIS_BY_DIC end diff --git a/spec/plugins/bot-detection/access_spec.lua b/spec/plugins/bot-detection/access_spec.lua new file mode 100644 index 000000000000..b57e693f13f5 --- /dev/null +++ b/spec/plugins/bot-detection/access_spec.lua @@ -0,0 +1,62 @@ +local spec_helper = require "spec.spec_helpers" +local http_client = require "kong.tools.http_client" + +local PROXY_URL = spec_helper.PROXY_URL +local STUB_GET_URL = PROXY_URL.."/request" + +local HELLOWORLD = "HelloWorld" +local FACEBOOK = "facebookexternalhit/1.1" + +describe("Logging Plugins", function() + + setup(function() + spec_helper.prepare_db() + spec_helper.insert_fixtures { + api = { + { request_host = "bot.com", upstream_url = "http://mockbin.com" }, + { request_host = "bot2.com", upstream_url = "http://mockbin.com" }, + { request_host = "bot3.com", upstream_url = "http://mockbin.com" } + }, + plugin = { + { name = "bot-detection", config = {}, __api = 1 }, + { name = "bot-detection", config = {blacklist = HELLOWORLD}, __api = 2 }, + { name = "bot-detection", config = {whitelist = FACEBOOK}, __api = 3 } + } + } + + spec_helper.start_kong() + end) + + teardown(function() + spec_helper.stop_kong() + end) + + it("should not block regular requests", function() + local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com" }) + assert.are.equal(200, status) + local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36" }) + assert.are.equal(200, status) + local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = HELLOWORLD }) + assert.are.equal(200, status) + local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "curl/7.43.0" }) + assert.are.equal(200, status) + end) + + it("should block bots", function() + local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "Googlebot/2.1 (+http://www.google.com/bot.html)" }) + assert.are.equal(403, status) + local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = FACEBOOK }) + assert.are.equal(403, status) + end) + + it("should block blacklisted user-agents", function() + local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot3.com", ["user-agent"] = HELLOWORLD }) + assert.are.equal(200, status) + end) + + it("should allow whitelisted user-agents", function() + local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot3.com", ["user-agent"] = FACEBOOK }) + assert.are.equal(200, status) + end) + +end) diff --git a/spec/plugins/bot-detection/hooks_spec.lua b/spec/plugins/bot-detection/hooks_spec.lua new file mode 100644 index 000000000000..9047c9eccfb2 --- /dev/null +++ b/spec/plugins/bot-detection/hooks_spec.lua @@ -0,0 +1,70 @@ +local cjson = require "cjson" +local spec_helper = require "spec.spec_helpers" +local http_client = require "kong.tools.http_client" + +local PROXY_URL = spec_helper.PROXY_URL +local STUB_GET_URL = PROXY_URL.."/request" +local API_URL = spec_helper.API_URL + +describe("Hooks", function() + + local plugin_id + + setup(function() + spec_helper.prepare_db() + spec_helper.insert_fixtures { + api = { + { request_host = "bot.com", upstream_url = "http://mockbin.com" } + }, + plugin = { + { name = "bot-detection", config = {}, __api = 1 } + } + } + + spec_helper.start_kong() + + local response, status = http_client.get(API_URL.."/apis/bot.com/plugins/") + assert.equals(200, status) + plugin_id = cjson.decode(response).data[1].id + assert.truthy(plugin_id) + end) + + teardown(function() + spec_helper.stop_kong() + end) + + it("should block a newly entered user-agent", function() + local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "helloworld" }) + assert.are.equal(200, status) + + -- Update the plugin + local _, status = http_client.patch(API_URL.."/apis/bot.com/plugins/"..plugin_id, {["config.blacklist"] = "helloworld"}) + assert.are.equal(200, status) + + repeat + local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "helloworld" }) + os.execute("sleep 0.5") + until(status == 403) + + local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "helloworld" }) + assert.are.equal(403, status) + end) + + it("should allow a newly entered user-agent", function() + local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "facebookexternalhit/1.1" }) + assert.are.equal(403, status) + + -- Update the plugin + local _, status = http_client.patch(API_URL.."/apis/bot.com/plugins/"..plugin_id, {["config.whitelist"] = "facebookexternalhit/1.1"}) + assert.are.equal(200, status) + + repeat + local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "facebookexternalhit/1.1" }) + os.execute("sleep 0.5") + until(status == 200) + + local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "facebookexternalhit/1.1" }) + assert.are.equal(200, status) + end) + +end) From f632d124a03581ff9ee1caa25552df8f1bb784cd Mon Sep 17 00:00:00 2001 From: Thijs Schreijer Date: Sun, 17 Jul 2016 01:04:58 -0700 Subject: [PATCH 2/7] document wait_until --- kong/constants.lua | 2 +- spec/helpers.lua | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/kong/constants.lua b/kong/constants.lua index 660ddf0c6d37..063fd01797f1 100644 --- a/kong/constants.lua +++ b/kong/constants.lua @@ -3,7 +3,7 @@ local plugins = { "file-log", "http-log", "key-auth", "hmac-auth", "basic-auth", "ip-restriction", "galileo", "request-transformer", "response-transformer", "request-size-limiting", "rate-limiting", "response-ratelimiting", "syslog", - "loggly", "datadog", "runscope", "ldap-auth", "statsd" + "loggly", "datadog", "runscope", "ldap-auth", "statsd", "bot-detection" } local plugin_map = {} diff --git a/spec/helpers.lua b/spec/helpers.lua index 6174ae6ba586..4e61a66bb064 100644 --- a/spec/helpers.lua +++ b/spec/helpers.lua @@ -55,6 +55,14 @@ local function lookup(t, k) return nil, ok end +--- Waits until a specific condition is met. +-- The check function will repeatedly be called (with a fixed interval), until the condition is met, or the +-- timeout value is exceeded. +-- @param f check function that should return `thruthy` when the condition has been met +-- @param timeout maximum time to wait after which an error is thrown +-- @return nothing. It returns when the condition is met, or throws an error when it times out. +-- @usage -- wait 10 seconds for a file "myfilename" to appear +-- helpers.wait_until(function() return file_exist("myfilename") end, 10) local function wait_until(f, timeout) if type(f) ~= "function" then error("arg #1 must be a function", 2) From 56bdba38cb5da3e916d52ac742b2f8924aa2a4ea Mon Sep 17 00:00:00 2001 From: Thijs Schreijer Date: Sun, 17 Jul 2016 23:02:42 +0200 Subject: [PATCH 3/7] converted, fixed, and added tests --- kong/plugins/bot-detection/schema.lua | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/kong/plugins/bot-detection/schema.lua b/kong/plugins/bot-detection/schema.lua index 48edfed83552..e082119dcfcd 100644 --- a/kong/plugins/bot-detection/schema.lua +++ b/kong/plugins/bot-detection/schema.lua @@ -1,7 +1,21 @@ +local re_match = ngx.re.match + +local check_regex = function(value) + if value then + for _, rule in ipairs(value) do + local _, err = re_match("just a string to test", rule) + if err then + return false, "value '"..rule.."' is not a valid regex" + end + end + end + return true +end + return { no_consumer = true, fields = { - whitelist = { type = "array" }, - blacklist = { type = "array" } + whitelist = { type = "array", func = check_regex }, + blacklist = { type = "array", func = check_regex }, } } \ No newline at end of file From 8a4a777446d800ac11dba4af3735048a9b8e8c85 Mon Sep 17 00:00:00 2001 From: Thijs Schreijer Date: Sun, 17 Jul 2016 23:06:39 +0200 Subject: [PATCH 4/7] files added --- .../bot-detection/01-access_spec.lua | 150 ++++++++++++++++++ .../bot-detection/02-hooks_spec.lua | 118 ++++++++++++++ spec/03-plugins/bot-detection/03-api_spec.lua | 67 ++++++++ 3 files changed, 335 insertions(+) create mode 100644 spec/03-plugins/bot-detection/01-access_spec.lua create mode 100644 spec/03-plugins/bot-detection/02-hooks_spec.lua create mode 100644 spec/03-plugins/bot-detection/03-api_spec.lua diff --git a/spec/03-plugins/bot-detection/01-access_spec.lua b/spec/03-plugins/bot-detection/01-access_spec.lua new file mode 100644 index 000000000000..f3be758344f2 --- /dev/null +++ b/spec/03-plugins/bot-detection/01-access_spec.lua @@ -0,0 +1,150 @@ +local helpers = require "spec.helpers" + +local HELLOWORLD = "HelloWorld" -- just a test value +local FACEBOOK = "facebookexternalhit/1.1" -- matches a known bot in `rules.lua` + +describe("Plugin: bot-detection", function() + + local client + + setup(function() + helpers.kill_all() + helpers.prepare_prefix() + + local api1 = assert(helpers.dao.apis:insert { + request_host = "bot.com", + upstream_url = "http://mockbin.com" + }) + local api2 = assert(helpers.dao.apis:insert { + request_host = "bot2.com", + upstream_url = "http://mockbin.com" + }) + local api3 = assert(helpers.dao.apis:insert { + request_host = "bot3.com", + upstream_url = "http://mockbin.com" + }) + + -- plugin 1 + assert(helpers.dao.plugins:insert { + api_id = api1.id, + name = "bot-detection", + config = {}, + }) + -- plugin 2 + assert(helpers.dao.plugins:insert { + api_id = api2.id, + name = "bot-detection", + config = { + blacklist = HELLOWORLD + }, + }) + -- plugin 3 + assert(helpers.dao.plugins:insert { + api_id = api3.id, + name = "bot-detection", + config = { + whitelist = FACEBOOK + }, + }) + + assert(helpers.start_kong()) + end) + + teardown(function() + helpers.stop_kong() + end) + + before_each(function() + client = assert(helpers.proxy_client()) + end) + + after_each(function() + if client then client:close() end + end) + + it("allows regular requests", function() + local res = assert( client:send { + method = "GET", + path = "/request", + headers = { host = "bot.com" } + }) + assert.response(res).has.status(200) + + local res = assert( client:send { + method = "GET", + path = "/request", + headers = { + host = "bot.com", + ["user-agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36" + } + }) + assert.response(res).has.status(200) + + local res = assert( client:send { + method = "GET", + path = "/request", + headers = { + host = "bot.com", + ["user-agent"] = HELLOWORLD + } + }) + assert.response(res).has.status(200) + + local res = assert( client:send { + method = "GET", + path = "/request", + headers = { + host = "bot.com", + ["user-agent"] = "curl/7.43.0" + } + }) + assert.response(res).has.status(200) + end) + + it("blocks bots", function() + local res = assert( client:send { + method = "GET", + path = "/request", + headers = { + host = "bot.com", + ["user-agent"] = "Googlebot/2.1 (+http://www.google.com/bot.html)" + }, + }) + assert.response(res).has.status(403) + + local res = assert( client:send { + method = "GET", + path = "/request", + headers = { + host = "bot.com", + ["user-agent"] = FACEBOOK, + } + }) + assert.response(res).has.status(403) + end) + + it("blocks blacklisted user-agents", function() + local res = assert( client:send { + method = "GET", + path = "/request", + headers = { + host = "bot2.com", + ["user-agent"] = HELLOWORLD, + } + }) + assert.response(res).has.status(403) + end) + + it("allows whitelisted user-agents", function() + local res = assert( client:send { + method = "GET", + path = "/request", + headers = { + host = "bot3.com", + ["user-agent"] = FACEBOOK + } + }) + assert.response(res).has.status(200) + end) + +end) diff --git a/spec/03-plugins/bot-detection/02-hooks_spec.lua b/spec/03-plugins/bot-detection/02-hooks_spec.lua new file mode 100644 index 000000000000..a28c65abd6b3 --- /dev/null +++ b/spec/03-plugins/bot-detection/02-hooks_spec.lua @@ -0,0 +1,118 @@ +local helpers = require "spec.helpers" + +describe("Plugin: bot-detection (hooks)", function() + + local plugin, proxy_client, admin_client + + setup(function() + helpers.kill_all() + helpers.prepare_prefix() + + local api1 = assert(helpers.dao.apis:insert { + request_host = "bot.com", + upstream_url = "http://mockbin.com" + }) + plugin = assert(helpers.dao.plugins:insert { + api_id = api1.id, + name = "bot-detection", + config = {}, + }) + + assert(helpers.start_kong()) + end) + + teardown(function() + helpers.stop_kong() + end) + + before_each(function() + proxy_client = assert(helpers.proxy_client()) + admin_client = assert(helpers.admin_client()) + end) + + after_each(function() + if proxy_client then proxy_client:close() end + if admin_client then admin_client:close() end + end) + + it("blocks a newly entered user-agent", function() + local res + res = assert( proxy_client:send { + method = "GET", + path = "/request", + headers = { + host = "bot.com", + ["user-agent"] = "helloworld" + } + }) + assert.response(res).has.status(200) + + -- Update the plugin + res = assert(admin_client:send { + method = "PATCH", + path = "/apis/bot.com/plugins/"..plugin.id, + body = { + ["config.blacklist"] = "helloworld" + }, + headers = { + ["content-type"] = "application/json", + } + }) + assert.response(res).has.status(200) + + local check_status = function() + local res = assert(proxy_client:send { + mehod = "GET", + path = "/request", + headers = { + host = "bot.com", + ["user-agent"] = "helloworld", + }, + }) + res:read_body() -- must call read_body to complete call, otherwise next iteration fails + return res.status == 403 + end + helpers.wait_until(check_status, 10) + end) + + it("allows a newly entered user-agent", function() + local res + res = assert(proxy_client:send { + method = "GET", + path = "/request", + headers = { + host = "bot.com", + ["user-agent"] = "facebookexternalhit/1.1" + } + }) + assert.response(res).has.status(403) + + -- Update the plugin + res = assert(admin_client:send { + method = "PATCH", + path = "/apis/bot.com/plugins/"..plugin.id, + body = { + ["config.whitelist"] = "facebookexternalhit/1.1" + }, + headers = { + ["content-type"] = "application/json", + } + }) + assert.response(res).has.status(200) + + local check_status = function() + local res = assert(proxy_client:send { + mehod = "GET", + path = "/request", + headers = { + host = "bot.com", + ["user-agent"] = "facebookexternalhit/1.1", + }, + }) + res:read_body() -- must call read_body to complete call, otherwise next iteration fails + return res.status == 200 + end + helpers.wait_until(check_status, 10) + end) + +end) diff --git a/spec/03-plugins/bot-detection/03-api_spec.lua b/spec/03-plugins/bot-detection/03-api_spec.lua new file mode 100644 index 000000000000..93d68b3c6cf8 --- /dev/null +++ b/spec/03-plugins/bot-detection/03-api_spec.lua @@ -0,0 +1,67 @@ +local helpers = require "spec.helpers" + +local BAD_REGEX = [[(https?:\/\/.*]] -- illegal regex, errors out + +describe("Plugin: bot-detection", function() + + local client + + setup(function() + helpers.kill_all() + helpers.prepare_prefix() + + assert(helpers.dao.apis:insert { + request_host = "bot1.com", + upstream_url = "http://mockbin.com" + }) + assert(helpers.dao.apis:insert { + request_host = "bot2.com", + upstream_url = "http://mockbin.com" + }) + + assert(helpers.start_kong()) + end) + + teardown(function() + helpers.stop_kong() + end) + + before_each(function() + client = assert(helpers.admin_client()) + end) + + after_each(function() + if client then client:close() end + end) + + it("fails when whitelisting a bad regex", function() + local res = assert(client:send { + method = "POST", + path = "/apis/bot1.com/plugins/", + body = { + name = "bot-detection", + ["config.whitelist"] = { BAD_REGEX }, + }, + headers = { + ["content-type"] = "application/json", + } + }) + assert.response(res).has.status(400) + end) + + it("fails when blacklisting a bad regex", function() + local res = assert(client:send { + method = "POST", + path = "/apis/bot2.com/plugins/", + body = { + name = "bot-detection", + ["config.whitelist"] = { BAD_REGEX }, + }, + headers = { + ["content-type"] = "application/json", + } + }) + assert.response(res).has.status(400) + end) + +end) From 48f44e74bd0b123636f6c9b06ea1097135c1f2b1 Mon Sep 17 00:00:00 2001 From: Thijs Schreijer Date: Wed, 20 Jul 2016 23:45:11 +0200 Subject: [PATCH 5/7] updated rockspec to include new plugin --- kong-0.8.3-0.rockspec | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kong-0.8.3-0.rockspec b/kong-0.8.3-0.rockspec index eb2b5d51f277..b849eede0765 100644 --- a/kong-0.8.3-0.rockspec +++ b/kong-0.8.3-0.rockspec @@ -256,6 +256,12 @@ build = { ["kong.plugins.statsd.handler"] = "kong/plugins/statsd/handler.lua", ["kong.plugins.statsd.schema"] = "kong/plugins/statsd/schema.lua", - ["kong.plugins.statsd.statsd_logger"] = "kong/plugins/statsd/statsd_logger.lua" + ["kong.plugins.statsd.statsd_logger"] = "kong/plugins/statsd/statsd_logger.lua", + + ["kong.plugins.bot-detection.handler"] = "kong/plugins/bot-detection/handler.lua", + ["kong.plugins.bot-detection.schema"] = "kong/plugins/bot-detection/schema.lua", + ["kong.plugins.bot-detection.rules"] = "kong/plugins/bot-detection/rules.lua", + ["kong.plugins.bot-detection.cache"] = "kong/plugins/bot-detection/cache.lua", + ["kong.plugins.bot-detection.hooks"] = "kong/plugins/bot-detection/hooks.lua", } } From 4722ca0b274cf5322dc208682b20b5e441434ac6 Mon Sep 17 00:00:00 2001 From: Thijs Schreijer Date: Wed, 20 Jul 2016 23:53:09 +0200 Subject: [PATCH 6/7] removed old files --- spec/plugins/bot-detection/access_spec.lua | 62 ------------------- spec/plugins/bot-detection/hooks_spec.lua | 70 ---------------------- 2 files changed, 132 deletions(-) delete mode 100644 spec/plugins/bot-detection/access_spec.lua delete mode 100644 spec/plugins/bot-detection/hooks_spec.lua diff --git a/spec/plugins/bot-detection/access_spec.lua b/spec/plugins/bot-detection/access_spec.lua deleted file mode 100644 index b57e693f13f5..000000000000 --- a/spec/plugins/bot-detection/access_spec.lua +++ /dev/null @@ -1,62 +0,0 @@ -local spec_helper = require "spec.spec_helpers" -local http_client = require "kong.tools.http_client" - -local PROXY_URL = spec_helper.PROXY_URL -local STUB_GET_URL = PROXY_URL.."/request" - -local HELLOWORLD = "HelloWorld" -local FACEBOOK = "facebookexternalhit/1.1" - -describe("Logging Plugins", function() - - setup(function() - spec_helper.prepare_db() - spec_helper.insert_fixtures { - api = { - { request_host = "bot.com", upstream_url = "http://mockbin.com" }, - { request_host = "bot2.com", upstream_url = "http://mockbin.com" }, - { request_host = "bot3.com", upstream_url = "http://mockbin.com" } - }, - plugin = { - { name = "bot-detection", config = {}, __api = 1 }, - { name = "bot-detection", config = {blacklist = HELLOWORLD}, __api = 2 }, - { name = "bot-detection", config = {whitelist = FACEBOOK}, __api = 3 } - } - } - - spec_helper.start_kong() - end) - - teardown(function() - spec_helper.stop_kong() - end) - - it("should not block regular requests", function() - local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com" }) - assert.are.equal(200, status) - local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36" }) - assert.are.equal(200, status) - local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = HELLOWORLD }) - assert.are.equal(200, status) - local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "curl/7.43.0" }) - assert.are.equal(200, status) - end) - - it("should block bots", function() - local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "Googlebot/2.1 (+http://www.google.com/bot.html)" }) - assert.are.equal(403, status) - local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = FACEBOOK }) - assert.are.equal(403, status) - end) - - it("should block blacklisted user-agents", function() - local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot3.com", ["user-agent"] = HELLOWORLD }) - assert.are.equal(200, status) - end) - - it("should allow whitelisted user-agents", function() - local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot3.com", ["user-agent"] = FACEBOOK }) - assert.are.equal(200, status) - end) - -end) diff --git a/spec/plugins/bot-detection/hooks_spec.lua b/spec/plugins/bot-detection/hooks_spec.lua deleted file mode 100644 index 9047c9eccfb2..000000000000 --- a/spec/plugins/bot-detection/hooks_spec.lua +++ /dev/null @@ -1,70 +0,0 @@ -local cjson = require "cjson" -local spec_helper = require "spec.spec_helpers" -local http_client = require "kong.tools.http_client" - -local PROXY_URL = spec_helper.PROXY_URL -local STUB_GET_URL = PROXY_URL.."/request" -local API_URL = spec_helper.API_URL - -describe("Hooks", function() - - local plugin_id - - setup(function() - spec_helper.prepare_db() - spec_helper.insert_fixtures { - api = { - { request_host = "bot.com", upstream_url = "http://mockbin.com" } - }, - plugin = { - { name = "bot-detection", config = {}, __api = 1 } - } - } - - spec_helper.start_kong() - - local response, status = http_client.get(API_URL.."/apis/bot.com/plugins/") - assert.equals(200, status) - plugin_id = cjson.decode(response).data[1].id - assert.truthy(plugin_id) - end) - - teardown(function() - spec_helper.stop_kong() - end) - - it("should block a newly entered user-agent", function() - local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "helloworld" }) - assert.are.equal(200, status) - - -- Update the plugin - local _, status = http_client.patch(API_URL.."/apis/bot.com/plugins/"..plugin_id, {["config.blacklist"] = "helloworld"}) - assert.are.equal(200, status) - - repeat - local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "helloworld" }) - os.execute("sleep 0.5") - until(status == 403) - - local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "helloworld" }) - assert.are.equal(403, status) - end) - - it("should allow a newly entered user-agent", function() - local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "facebookexternalhit/1.1" }) - assert.are.equal(403, status) - - -- Update the plugin - local _, status = http_client.patch(API_URL.."/apis/bot.com/plugins/"..plugin_id, {["config.whitelist"] = "facebookexternalhit/1.1"}) - assert.are.equal(200, status) - - repeat - local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "facebookexternalhit/1.1" }) - os.execute("sleep 0.5") - until(status == 200) - - local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "facebookexternalhit/1.1" }) - assert.are.equal(200, status) - end) - -end) From 852cab788c36c172ce15ce5b0e5c41b8706197be Mon Sep 17 00:00:00 2001 From: Thijs Schreijer Date: Tue, 26 Jul 2016 08:45:54 +0200 Subject: [PATCH 7/7] fixed review comments --- kong/plugins/bot-detection/handler.lua | 14 ++++++-------- spec/03-plugins/bot-detection/01-access_spec.lua | 3 +-- spec/03-plugins/bot-detection/02-hooks_spec.lua | 1 - spec/03-plugins/bot-detection/03-api_spec.lua | 3 +-- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/kong/plugins/bot-detection/handler.lua b/kong/plugins/bot-detection/handler.lua index ce20207cb65f..aec8e9599e97 100644 --- a/kong/plugins/bot-detection/handler.lua +++ b/kong/plugins/bot-detection/handler.lua @@ -1,8 +1,8 @@ local BasePlugin = require "kong.plugins.base_plugin" local responses = require "kong.tools.responses" local rules = require "kong.plugins.bot-detection.rules" -local stringy = require "stringy" local bot_cache = require "kong.plugins.bot-detection.cache" +local strip = require("kong.tools.utils").strip local ipairs = ipairs local get_headers = ngx.req.get_headers @@ -33,7 +33,7 @@ function BotDetectionHandler:access(conf) end if user_agent then - user_agent = stringy.strip(user_agent) + user_agent = strip(user_agent) -- Cache key, per API local cache_key = ngx.ctx.api.id..":"..user_agent @@ -41,12 +41,10 @@ function BotDetectionHandler:access(conf) -- The cache already has the user_agents that should be blocked -- So we avoid matching the regexes everytime local cached_match = bot_cache.get(cache_key) - if cached_match ~= nil then - if cached_match then - return - else - return responses.send_HTTP_FORBIDDEN() - end + if cached_match then + return + elseif cached_match == false then + return responses.send_HTTP_FORBIDDEN() end if conf.whitelist then diff --git a/spec/03-plugins/bot-detection/01-access_spec.lua b/spec/03-plugins/bot-detection/01-access_spec.lua index f3be758344f2..f6480dd0f414 100644 --- a/spec/03-plugins/bot-detection/01-access_spec.lua +++ b/spec/03-plugins/bot-detection/01-access_spec.lua @@ -3,12 +3,11 @@ local helpers = require "spec.helpers" local HELLOWORLD = "HelloWorld" -- just a test value local FACEBOOK = "facebookexternalhit/1.1" -- matches a known bot in `rules.lua` -describe("Plugin: bot-detection", function() +describe("Plugin: bot-detection (access)", function() local client setup(function() - helpers.kill_all() helpers.prepare_prefix() local api1 = assert(helpers.dao.apis:insert { diff --git a/spec/03-plugins/bot-detection/02-hooks_spec.lua b/spec/03-plugins/bot-detection/02-hooks_spec.lua index a28c65abd6b3..f80304b69f8c 100644 --- a/spec/03-plugins/bot-detection/02-hooks_spec.lua +++ b/spec/03-plugins/bot-detection/02-hooks_spec.lua @@ -5,7 +5,6 @@ describe("Plugin: bot-detection (hooks)", function() local plugin, proxy_client, admin_client setup(function() - helpers.kill_all() helpers.prepare_prefix() local api1 = assert(helpers.dao.apis:insert { diff --git a/spec/03-plugins/bot-detection/03-api_spec.lua b/spec/03-plugins/bot-detection/03-api_spec.lua index 93d68b3c6cf8..428742a5ab49 100644 --- a/spec/03-plugins/bot-detection/03-api_spec.lua +++ b/spec/03-plugins/bot-detection/03-api_spec.lua @@ -2,12 +2,11 @@ local helpers = require "spec.helpers" local BAD_REGEX = [[(https?:\/\/.*]] -- illegal regex, errors out -describe("Plugin: bot-detection", function() +describe("Plugin: bot-detection (API)", function() local client setup(function() - helpers.kill_all() helpers.prepare_prefix() assert(helpers.dao.apis:insert {