Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log session ID instead of client IP during captcha flow #17956

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/17956.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Don't log client IP during captcha verification (privacy issue fix). Contibuted by Louis Gregg.
8 changes: 7 additions & 1 deletion synapse/handlers/ui_auth/checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,15 @@ async def check_auth(self, authdict: dict, clientip: str) -> Any:
raise LoginError(
400, "Captcha response is required", errcode=Codes.CAPTCHA_NEEDED
)
try:
_session = authdict["session"]
except KeyError:
# Client tried to provide captcha but didn't give the session ID:
# bad request.
raise LoginError(400, "Missing UIA session", Codes.MISSING_PARAM)

logger.info(
"Submitting recaptcha response %s with remoteip %s", user_response, clientip
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can certainly see that logging the client's IP in this line is redundant, because that should already be getting logged for the request's log line that Synapse logs for every request.

I could get behind just removing the client IP from this log line and even downgrading it to debug instead of info, since it seems a bit needless to just log the input from the client.
I don't personally see much benefit in logging the session for this flow on its own, so we could leave that off too.

However, I feel like I should mention the Synapse logs still contain more PII — User IDs, IP addresses on the request lines, etc — that I don't feel this PR really makes much difference on that particular front?

Copy link
Author

@louisgregg louisgregg Nov 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could get behind just removing the client IP from this log line and even downgrading it to debug instead of info, since it seems a bit needless to just log the input from the client.
I don't personally see much benefit in logging the session for this flow on its own, so we could leave that off too.

OK sure. I will modify my PR to avoid logging the session ID and change info to debug.

However, I feel like I should mention the Synapse logs still contain more PII — User IDs, IP addresses on the request lines, etc — that I don't feel this PR really makes much difference on that particular front?

Point taken about PII logging in other locations. To focus on client IPs specifically, I searched the codebase with the following regex and couldn't find locations where IPs are logged:
logger\.\w*\(\n*.*(?:ip|IP|Ip)
Are you aware of specific locations where the client IP ends up in the logs, either explicitly or as part of a larger data structure?

My overall goal in this PR is to minimize or eliminate logging of client IPs across the codebase.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have made the changes you suggested. Can you approve another run of the test suite please? Thanks

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The failing test, "Register with a recaptcha" in sytest, should now pass. Sytest, Complement and trial test suites are passing on my machine. Please approve another run of the test pipeline when you get a chance. Thanks!

"Submitting recaptcha response %s for session %s", user_response, _session
)

# TODO: get this from the homeserver rather than creating a new one for
Expand Down
126 changes: 126 additions & 0 deletions tests/handlers/test_checkers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
from typing import Dict
from unittest.mock import AsyncMock, Mock

from twisted.internet.defer import ensureDeferred
from twisted.test.proto_helpers import MemoryReactor
from twisted.trial import unittest
from twisted.web.client import PartialDownloadError

from synapse.api.errors import Codes, LoginError
from synapse.handlers.ui_auth.checkers import RecaptchaAuthChecker
from synapse.server import HomeServer
from synapse.util import json_encoder


class TestRecaptchaAuthChecker(unittest.TestCase):
def setUp(self: "TestRecaptchaAuthChecker") -> None:
self.hs = Mock(spec=HomeServer)
self.hs.config = Mock()
self.hs.config.captcha.recaptcha_private_key = "test_private_key"
self.hs.config.captcha.recaptcha_siteverify_api = (
"https://www.recaptcha.net/recaptcha/api/siteverify"
)
self.http_client = AsyncMock()
self.hs.get_proxied_http_client.return_value = self.http_client
self.recaptcha_checker = RecaptchaAuthChecker(self.hs)
self.reactor = MemoryReactor()

def test_is_enabled(self: "TestRecaptchaAuthChecker") -> None:
"""Test that the checker is enabled when a private key is configured."""
self.assertTrue(self.recaptcha_checker.is_enabled())

def test_is_disabled(self: "TestRecaptchaAuthChecker") -> None:
"""Test that the checker is disabled when no private key is configured."""
self.hs.config.captcha.recaptcha_private_key = None
recaptcha_checker = RecaptchaAuthChecker(self.hs)
self.assertFalse(recaptcha_checker.is_enabled())

def test_check_auth_success(self: "TestRecaptchaAuthChecker") -> None:
"""Test that authentication succeeds with a valid recaptcha response."""
_expected_response = {"success": True}
self.http_client.post_urlencoded_get_json = AsyncMock(
return_value=_expected_response
)

authdict = {"response": "captcha_solution", "session": "fake_session_id"}
clientip = "127.0.0.1"

d = ensureDeferred(self.recaptcha_checker.check_auth(authdict, clientip))

result = self.successResultOf(d)
self.assertTrue(result)

self.http_client.post_urlencoded_get_json.assert_called_once_with(
self.hs.config.captcha.recaptcha_siteverify_api,
args={
"secret": "test_private_key",
"response": "captcha_solution",
"remoteip": clientip,
},
)

def test_check_auth_failure(self: "TestRecaptchaAuthChecker") -> None:
"""Test that authentication fails with an invalid recaptcha response."""
_expected_response = {"success": False}
self.http_client.post_urlencoded_get_json = AsyncMock(
return_value=_expected_response
)

authdict = {"response": "invalid_response", "session": "fake_session_id"}
clientip = "127.0.0.1"

d = ensureDeferred(self.recaptcha_checker.check_auth(authdict, clientip))
f = self.failureResultOf(d, LoginError)
self.assertEqual(f.value.errcode, Codes.UNAUTHORIZED)

def test_check_missing_session(self: "TestRecaptchaAuthChecker") -> None:
"""Test that authentication fails when the session ID is missing."""

authdict = {"response": "invalid_response"}
clientip = "127.0.0.1"

d = ensureDeferred(self.recaptcha_checker.check_auth(authdict, clientip))
f = self.failureResultOf(d, LoginError)
self.assertEqual(f.value.errcode, Codes.MISSING_PARAM)

def test_check_auth_missing_response(self: "TestRecaptchaAuthChecker") -> None:
"""Test that authentication fails when the user captcha response is missing."""
authdict: Dict[str, str] = {}
clientip = "127.0.0.1"

d = ensureDeferred(self.recaptcha_checker.check_auth(authdict, clientip))
f = self.failureResultOf(d, LoginError)
self.assertEqual(f.value.errcode, Codes.CAPTCHA_NEEDED)

def test_check_auth_exception(self: "TestRecaptchaAuthChecker") -> None:
"""Test that authentication succeeds when a PartialDownloadError occurs during verification."""

partial_download_error = PartialDownloadError(500)
partial_download_error.response = json_encoder.encode({"success": True}).encode(
"utf-8"
)
self.http_client.post_urlencoded_get_json.side_effect = partial_download_error

authdict = {"response": "captcha_solution", "session": "fake_session_id"}
clientip = "127.0.0.1"

d = ensureDeferred(self.recaptcha_checker.check_auth(authdict, clientip))
result = self.successResultOf(d)
self.assertTrue(result)

def test_check_auth_logging(self: "TestRecaptchaAuthChecker") -> None:
"""Test that the client IP is not logged during authentication."""
with self.assertLogs(level="INFO") as cm:
authdict = {"response": "captcha_solution", "session": "fake_session_id"}
clientip = "127.0.0.1"

_expected_response = {"success": True}
self.http_client.post_urlencoded_get_json = AsyncMock(
return_value=_expected_response
)

d = ensureDeferred(self.recaptcha_checker.check_auth(authdict, clientip))
self.successResultOf(d)

logs = "\n".join(cm.output)
self.assertNotIn(clientip, logs)