Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stats #168

Merged
merged 4 commits into from
Feb 18, 2025
Merged

Stats #168

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bedhost/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@

EXAMPLE_BED = "bbad85f21962bb8d972444f7f9a3a932"
EXAMPLE_BEDSET = "gse218680"

# how often to save usage data (in days)
USAGE_SAVE_DAYS = 7
70 changes: 70 additions & 0 deletions bedhost/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import os
from functools import wraps

from typing import Literal
import datetime
from bbconf.bbagent import BedBaseAgent
from bbconf.models.base_models import UsageModel
from starlette.responses import FileResponse, JSONResponse, RedirectResponse

from . import _LOGGER
Expand Down Expand Up @@ -74,3 +78,69 @@ def drs_response(status_code, msg):
"""Helper function to make quick DRS responses"""
content = {"status_code": status_code, "msg": msg}
return JSONResponse(status_code=status_code, content=content)


def count_requests(
usage_data: UsageModel,
event: Literal["bed_search", "bedset_search", "bed_meta", "bedset_meta", "files"],
):
"""
Decorator to count requests for different events

:param UsageModel usage_data: usage data model
:param str event: event type
"""

def decorator(func):
@wraps(func)
async def wrapper(*args, **kwargs):
if event == "files":
file_path = kwargs.get("file_path")
if "bed" in file_path or "bigbed" in file_path.lower():
if file_path in usage_data.files:
usage_data.files[file_path] += 1
else:
usage_data.files[file_path] = 1
elif event == "bed_search":
query = kwargs.get("query")
if query in usage_data.bed_search:
usage_data.bed_search[query] += 1
else:
usage_data.bed_search[query] = 1
elif event == "bedset_search":
query = kwargs.get("query")
if query in usage_data.bedset_search:
usage_data.bedset_search[query] += 1
else:
usage_data.bed_search[query] = 1
elif event == "bed_meta":
bed_id = kwargs.get("bed_id")
if bed_id in usage_data.bed_meta:
usage_data.bed_meta[bed_id] += 1
else:
usage_data.bed_meta[bed_id] = 1

elif event == "bedset_meta":
bedset_id = kwargs.get("bedset_id")
if bedset_id in usage_data.bedset_meta:
usage_data.bedset_meta[bedset_id] += 1
else:
usage_data.bedset_meta[bedset_id] = 1
else:
raise ValueError(f"Unknown event type: {event}")
return await func(*args, **kwargs)

return wrapper

return decorator


def init_model_usage():
return UsageModel(
bed_meta={},
bedset_meta={},
bed_search={},
bedset_search={},
files={},
date_from=datetime.datetime.now(),
)
32 changes: 30 additions & 2 deletions bedhost/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import sys
import datetime

import markdown
import uvicorn
Expand All @@ -17,8 +18,9 @@
from . import _LOGGER
from ._version import __version__ as bedhost_version
from .cli import build_parser
from .const import PKG_NAME, STATIC_PATH
from .helpers import attach_routers, configure, drs_response
from .const import PKG_NAME, STATIC_PATH, USAGE_SAVE_DAYS
from .helpers import attach_routers, configure, drs_response, init_model_usage
from apscheduler.schedulers.background import BackgroundScheduler

tags_metadata = [
{
Expand Down Expand Up @@ -156,9 +158,35 @@ def main():
_LOGGER.info(f"Running {PKG_NAME} app...")
bbconf_file_path = os.environ.get("BEDBASE_CONFIG") or None
global bbagent

global usage_data
usage_data = init_model_usage()
bbagent = configure(
bbconf_file_path
) # configure before attaching routers to avoid circular imports

scheduler = BackgroundScheduler()

def upload_usage():
"""
Upload usage data to the database and reset the usage data
"""

print("Running uploading of the usage")
usage_data.date_to = datetime.datetime.now()
bbagent.add_usage(usage_data)

usage_data.bed_meta = {}
usage_data.bedset_meta = {}
usage_data.bed_search = {}
usage_data.bedset_search = {}
usage_data.files = {}
usage_data.date_from = datetime.datetime.now()
usage_data.date_to = None

scheduler.add_job(upload_usage, "interval", days=USAGE_SAVE_DAYS)
scheduler.start()

attach_routers(app)
else:
raise EnvironmentError(
Expand Down
30 changes: 25 additions & 5 deletions bedhost/routers/base_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from platform import python_version

from bbconf import __version__ as bbconf_version
from bbconf.models.base_models import StatsReturn
from fastapi import APIRouter
from bbconf.models.base_models import StatsReturn, FileStats
from fastapi import APIRouter, Request
from geniml import __version__ as geniml_version

from .._version import __version__ as bedhost_version
Expand All @@ -20,17 +20,18 @@
ServiceInfoResponse,
Type,
)
from ..helpers import get_openapi_version
from ..main import app, bbagent
from ..helpers import get_openapi_version, count_requests
from ..main import app, bbagent, usage_data

router = APIRouter(prefix="/v1", tags=["base"])
from fastapi.responses import RedirectResponse

packages_versions = {}


@router.get(
"/stats",
summary="Get summary statistics for the DRS object store",
summary="Get summary statistics for BEDbase platform",
response_model=StatsReturn,
)
async def get_bedbase_db_stats():
Expand All @@ -40,6 +41,18 @@ async def get_bedbase_db_stats():
return bbagent.get_stats()


@router.get(
"/detailed-stats",
summary="Get detailed statistics for BEDbase platform, including number of files for each genome",
response_model=FileStats,
)
async def get_detailed_stats():
"""
Returns detailed statistics
"""
return bbagent.get_detailed_stats()


@router.get(
"/genomes",
summary="Get available genomes",
Expand Down Expand Up @@ -94,3 +107,10 @@ async def service_info():
text2vec=bbagent.config.config.path.text2vec,
),
)


@router.get("/files/{file_path:path}")
@count_requests(usage_data, event="files")
async def redirect_to_download(file_path: str, request: Request):
download_url = f"https://data2.bedbase.org/{file_path}"
return RedirectResponse(url=download_url)
9 changes: 6 additions & 3 deletions bedhost/routers/bed_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,15 @@
QdrantSearchResult,
RefGenValidReturnModel,
)
from fastapi import APIRouter, File, HTTPException, Query, UploadFile
from fastapi import APIRouter, File, HTTPException, Query, UploadFile, Request
from fastapi.responses import PlainTextResponse
from gtars.tokenizers import RegionSet

from .. import _LOGGER
from ..const import EXAMPLE_BED
from ..data_models import CROM_NUMBERS, BaseListResponse, BedDigest
from ..main import bbagent
from ..main import bbagent, usage_data
from ..helpers import count_requests

router = APIRouter(prefix="/v1/bed", tags=["bed"])

Expand Down Expand Up @@ -88,6 +89,7 @@ async def list_beds(
response_model_by_alias=False,
description=f"Example\n " f"bed_id: {EXAMPLE_BED}",
)
@count_requests(usage_data, event="bed_meta")
async def get_bed_metadata(
bed_id: str = BedDigest,
full: Optional[bool] = Query(
Expand Down Expand Up @@ -352,7 +354,8 @@ def get_regions_for_bedfile(
response_model=BedListSearchResult,
response_model_by_alias=False,
)
async def text_to_bed_search(query, limit: int = 10, offset: int = 0):
@count_requests(usage_data, event="bed_search")
async def text_to_bed_search(query: str, limit: int = 10, offset: int = 0):
"""
Search for a BedFile by a text query.
Example: query="cancer"
Expand Down
11 changes: 9 additions & 2 deletions bedhost/routers/bedset_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
from fastapi import APIRouter, HTTPException, Request, Response

from ..const import EXAMPLE_BEDSET, PKG_NAME
from ..main import bbagent
from ..main import bbagent, usage_data
from ..data_models import CreateBEDsetRequest
from ..utils import zip_pep
from ..helpers import count_requests

router = APIRouter(prefix="/v1/bedset", tags=["bedset"])

Expand All @@ -40,7 +41,12 @@ async def get_example_bedset_record():
tags=["search"],
response_model=BedSetListResult,
)
async def list_bedsets(query: str = None, limit: int = 1000, offset: int = 0):
@count_requests(usage_data, event="bedset_search")
async def list_bedsets(
query: str = None,
limit: int = 1000,
offset: int = 0,
):
"""
Returns a list of BEDset records in the database with optional filters and search.
"""
Expand All @@ -54,6 +60,7 @@ async def list_bedsets(query: str = None, limit: int = 1000, offset: int = 0):
description=f"Example\n bed_id: {EXAMPLE_BEDSET}",
response_model_by_alias=False,
)
@count_requests(usage_data, event="bedset_meta")
async def get_bedset_metadata(
bedset_id: str,
full: bool = True,
Expand Down
3 changes: 2 additions & 1 deletion deployment/config/api-dev.bedbase.org.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ access_methods:
http:
type: 'https'
description: HTTP compatible path
prefix: https://data2.bedbase.org/
# prefix: https://data2.bedbase.org/
prefix: https://api-dev.bedbase.org/v1/files/
s3:
type: 's3'
description: S3 compatible path
Expand Down
4 changes: 3 additions & 1 deletion requirements/requirements-all.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# bbconf @ git+https://github.com/databio/bbconf.git@dev#egg=bbconf
bbconf>=0.10.0
bbconf @ git+https://github.com/databio/bbconf.git@usage_stats#egg=bbconf
# bbconf>=0.10.0
fastapi>=0.103.0
logmuse>=0.2.7
markdown
Expand All @@ -10,3 +11,4 @@ yacman>=0.9.2
pephubclient>=0.4.1
psycopg[binary,pool]
python-multipart>=0.0.9
APScheduler>=3.11.0
34 changes: 34 additions & 0 deletions ui/src/pages/bed-splash.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,40 @@ export const BedSplash = () => {
return null;
}

if (k === 'global_sample_id') {
const parts = value.split(':');


if (parts[1].startsWith('gsm') || parts[0].startsWith('encode')) {
let link;
if (parts[1].startsWith('gsm')) {
link = (
<a href={`https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=${parts[1]}`}
target="_blank">
{value}
</a>
);
} else {
link = (
<a href={`https://www.encodeproject.org/experiments/${parts[1]}/`}
target="_blank">
{value}
</a>
);
}
return (
<tr key={k}>
<td style={{ maxWidth: '50px' }} className="fst-italic">
{snakeToTitleCase(k)}
</td>
<td style={{ maxWidth: '120px' }} className="truncate">
{link}
</td>
</tr>
);
}
}

return (
<tr key={k}>
<td style={{ maxWidth: '50px' }} className="fst-italic">
Expand Down