Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance Audit Log Data Beyond Privacy Requests #3331

Merged
merged 18 commits into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .fides/db_dataset.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,41 @@ dataset:
data_categories:
- system.operations
data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified
- name: audit_log_resource
data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified
fields:
- name: created_at
data_categories:
- system.operations
data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified
- name: extra_data
data_categories:
- system.operations
data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified
- name: fides_keys
data_categories:
- system.operations
data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified
- name: id
data_categories:
- system.operations
data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified
- name: request_path
data_categories:
- system.operations
data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified
- name: request_type
data_categories:
- system.operations
data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified
- name: updated_at
data_categories:
- system.operations
data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified
- name: user_id
data_categories:
- user.unique_id
data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified
- name: client
data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified
fields:
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ The types of changes are:

## [Unreleased](https://github.com/ethyca/fides/compare/2.14.0...main)

### Added

- Added optional logging of resource changes on the server [#3331](https://github.com/ethyca/fides/pull/3331)

## [2.14.0](https://github.com/ethyca/fides/compare/2.13.0...2.14.0)

### Added
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""add audit_log_resource table

Revision ID: 587c53fe3e99
Revises: 8a71872089e4
Create Date: 2023-05-18 19:21:33.496435

"""
import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "587c53fe3e99"
down_revision = "8a71872089e4"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"audit_log_resource",
sa.Column("id", sa.String(length=255), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=True,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=True,
),
sa.Column("user_id", sa.String(), nullable=True),
sa.Column("request_path", sa.String(), nullable=True),
sa.Column("request_type", sa.String(), nullable=True),
sa.Column("fides_keys", sa.ARRAY(sa.String()), nullable=True),
sa.Column("extra_data", sa.JSON(), nullable=True),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
op.f("ix_audit_log_resource_id"), "audit_log_resource", ["id"], unique=False
)
op.create_index(
op.f("ix_audit_log_resource_user_id"),
"audit_log_resource",
["user_id"],
unique=False,
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(
op.f("ix_audit_log_resource_user_id"), table_name="audit_log_resource"
)
op.drop_index(op.f("ix_audit_log_resource_id"), table_name="audit_log_resource")
op.drop_table("audit_log_resource")
# ### end Alembic commands ###
12 changes: 12 additions & 0 deletions src/fides/api/ctl/sql_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,3 +579,15 @@ class CustomField(Base):
)

UniqueConstraint("resource_type", "resource_id", "custom_field_definition_id")


class AuditLogResource(Base):
"""The log of user actions against fides resources."""

__tablename__ = "audit_log_resource"

user_id = Column(String, nullable=True, index=True)
request_path = Column(String, nullable=True)
request_type = Column(String, nullable=True)
fides_keys = Column(ARRAY(String), nullable=True)
extra_data = Column(JSON, nullable=True)
22 changes: 22 additions & 0 deletions src/fides/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
match_route,
path_is_in_ui_directory,
)
from fides.api.middleware import handle_audit_log_resource
from fides.api.schemas.analytics import Event, ExtraData

# pylint: disable=wildcard-import, unused-wildcard-import
Expand All @@ -44,6 +45,8 @@
from fides.cli.utils import FIDES_ASCII_ART
from fides.core.config import CONFIG, check_required_webserver_config_values

IGNORED_AUDIT_LOG_RESOURCE_PATHS = {"/api/v1/login"}

VERSION = fides.__version__

app = create_fides_app()
Expand Down Expand Up @@ -239,3 +242,22 @@ def start_webserver(port: int = 8080) -> None:
server.config.log_level,
)
server.run()


@app.middleware("http")
async def action_to_audit_log(
request: Request,
call_next: Callable,
) -> Response:
"""Log basic information about every non-GET request handled by the server."""

if (
request.method != "GET"
and request.scope["path"] not in IGNORED_AUDIT_LOG_RESOURCE_PATHS
and CONFIG.security.enable_audit_log_resource_middleware
):
try:
await handle_audit_log_resource(request)
except Exception as exc:
logger.debug(exc)
return await call_next(request)
126 changes: 126 additions & 0 deletions src/fides/api/middleware.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import json
from typing import Any, Dict, List

from fastapi import Request
from loguru import logger
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import Session
from starlette.types import Message

from fides.api.api import deps
from fides.api.ctl.sql_models import AuditLogResource # type: ignore[attr-defined]
from fides.api.oauth.utils import extract_token_and_load_client


async def handle_audit_log_resource(request: Request) -> None:
"""
Handles the lifecycle of recording audit log resource data.

Attempts to track the WHO, WHEN, and beginning of WHAT for
traceability purposes.

WHO: User ID from the API request
WHEN: Timestamps related to the request
WHAT: The endpoint, request type, and (if applicable)
fides_key(s) associated with the request
"""

# details to be stored as a row on the server
audit_log_resource_data = {
"user_id": None,
"request_path": request.scope["path"],
"request_type": request.method,
"fides_keys": None,
"extra_data": None,
}
db: Session = deps.get_api_session()

# get the user id associated with the request
token = request.headers.get("authorization")
if token:
client = await get_client_user_id(db, token)
audit_log_resource_data["user_id"] = client

# Access request body to check for fides_keys
await set_body(request, await request.body())

body = await get_body(request)
fides_keys = await extract_data_from_body(body)
audit_log_resource_data["fides_keys"] = fides_keys

# write record to server
await write_audit_log_resource_record(db, audit_log_resource_data)


async def write_audit_log_resource_record(
db: Session, audit_log_resource_data: Dict[str, Any]
) -> None:
"""
Writes a record to the audit log resource table
"""
try:
AuditLogResource.create(db=db, data=audit_log_resource_data)
except SQLAlchemyError as err:
logger.debug(err)


async def get_client_user_id(db: Session, auth_token: str) -> str:
"""
Attempts to retrieve a client user_id
"""
stripped_token = auth_token.replace("Bearer ", "")
_, client = extract_token_and_load_client(stripped_token, db)
return client.user_id or "root"


async def extract_data_from_body(body: bytes) -> List:
"""
Attempts to retrieve any fides_keys associated with
the request found in the request body.
"""

fides_keys = []
if body:
body = json.loads(body)
if isinstance(body, dict):
fides_key = body.get("fides_key")
if fides_key:
fides_keys.append(fides_key)
if isinstance(body, list):
for body_item in body:
if isinstance(body_item, dict):
fides_key = body_item.get("fides_key")
if fides_key:
fides_keys.append(fides_key)
return fides_keys


async def set_body(request: Request, body: bytes) -> None:
"""
Sets the body return type for use in middleware

Required due to shortcomings in Starlette with awaiting request
body in middleware

Reference: https://github.com/tiangolo/fastapi/issues/394#issuecomment-883524819
"""

async def receive() -> Message:
return {"type": "http.request", "body": body}

request._receive = receive # pylint: disable=W0212


async def get_body(request: Request) -> bytes:
"""
Awaits and sets the request body for use in middleware


Required due to shortcomings in Starlette with awaiting request
body in middleware

Reference: https://github.com/tiangolo/fastapi/issues/394#issuecomment-883524819
"""
body = await request.body()
await set_body(request, body)
return body
4 changes: 4 additions & 0 deletions src/fides/core/config/security_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ class SecuritySettings(FidesSettings):
default=False,
description="Enables or disables the ability to import connector templates with custom functions. When enabled, custom functions which will be loaded in a restricted environment to minimize security risks.",
)
enable_audit_log_resource_middleware: Optional[bool] = Field(
default=False,
description="Either enables the collection of audit log resource data or bypasses the middleware",
)

@validator("app_encryption_key")
@classmethod
Expand Down
Loading