Skip to content

Commit

Permalink
make it async
Browse files Browse the repository at this point in the history
  • Loading branch information
maxzirps committed Dec 23, 2024
1 parent 484c348 commit 2d999e9
Show file tree
Hide file tree
Showing 6 changed files with 216 additions and 185 deletions.
104 changes: 0 additions & 104 deletions backend/src/api.py

This file was deleted.

24 changes: 24 additions & 0 deletions backend/src/api/SingletonAiohttp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# https://github.com/raphaelauv/fastAPI-aiohttp-example/blob/master/src/fastAPI_aiohttp/fastAPI.py

from typing import Optional
import aiohttp


class SingletonAiohttp:
aiohttp_client: Optional[aiohttp.ClientSession] = None

@classmethod
def get_aiohttp_client(cls, base_url:str=None,token:str=None) -> aiohttp.ClientSession:
if cls.aiohttp_client is None:
headers = {}
if token:
headers["Authorization"] = f"Bearer {token}"
cls.aiohttp_client = aiohttp.ClientSession(headers=headers, base_url=base_url, connector=aiohttp.TCPConnector(limit_per_host=5))

return cls.aiohttp_client

@classmethod
async def close_aiohttp_client(cls) -> None:
if cls.aiohttp_client:
await cls.aiohttp_client.close()
cls.aiohttp_client = None
106 changes: 106 additions & 0 deletions backend/src/api/genius.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
from fuzzywuzzy import fuzz
from api.SingletonAiohttp import SingletonAiohttp
from models import Song
from bs4 import BeautifulSoup
import urllib.parse
import logging
logger = logging.getLogger('uvicorn.error')

class API_Client():

def __init__(self, token: str):
self.api_endpoint = "https://api.genius.com"
if not token:
raise Exception("No token specified. Change .env/.env.local")
self.session = SingletonAiohttp.get_aiohttp_client(token=token)

async def close(self):
await SingletonAiohttp.close_aiohttp_client()

async def search(self, query: str) -> list[Song]:
async with self.session.get(f"{self.api_endpoint}/search?{urllib.parse.urlencode({'q': query})}") as response:
if response.status == 200:
try:
data = await response.json()
songs_data = data.get("response", {}).get("hits", [])

songs = [
Song(
id=song["result"]["id"],
title=song["result"]["title"],
artist=song["result"]["primary_artist"]["name"]
)
for song in songs_data
]

return songs
except ValueError:
raise ValueError("Response is not valid JSON:", await response.text())
elif response.status == 404:
try:
data = await response.json()
if "meta" in data:
print(f"Error {data['meta']['status']}: {data['meta']['message']}")
else:
raise Exception("Error 404: Resource not found")
except ValueError:
raise ValueError("Error 404: Resource not found (Non-JSON Response)", await response.text())
else:
logging.error(f"Request failed with status code {response.status}")
try:
raise Exception("Response:", await response.text())
except ValueError:
raise ValueError("Response (Non-JSON):", await response.text())




async def get_lyrics(self, song_id: int) -> str:
async with self.session.get(f"{self.api_endpoint}/songs/{song_id}") as response:
if response.status == 200:
try:
data = await response.json()
song_url = data.get("response", {}).get("song", {}).get("url", "")
return await self.scrape_lyrics(song_url)
except ValueError:
raise ValueError("Response is not valid JSON:", await response.text())
elif response.status == 404:
try:
data = await response.text()
if "meta" in data:
print(f"Error {data['meta']['status']}: {data['meta']['message']}")
else:
raise Exception("Error 404: Resource not found")
except ValueError:
raise ValueError("Error 404: Resource not found (Non-JSON Response)", await response.text())
else:
logging.error(f"Request failed with status code {response.status}")
try:
raise Exception("Response:", await response.text())
except ValueError:
raise ValueError("Response (Non-JSON):", await response.text())

async def scrape_lyrics(self, song_url) -> str:
async with self.session.get(song_url) as response:

soup = BeautifulSoup(await response.text(), "html.parser")

# Extract lyrics
lyrics_div = soup.find("div", class_="lyrics") # Older Genius pages
if not lyrics_div:
# Newer Genius pages use `data-lyrics-container`
lyrics_div = soup.find_all("div", attrs={"data-lyrics-container": "true"})

lyrics = "\n".join([line.get_text() for line in lyrics_div])
return lyrics


def get_best_match(self, search_results, query: str):
best_match = None
highest_score = 0
for result in search_results:
score = fuzz.ratio(result.artist.lower() + " " + result.title.lower(), query.lower())
if score > highest_score:
highest_score = score
best_match = result
return best_match
105 changes: 47 additions & 58 deletions backend/src/generate_data.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,59 @@
import asyncio
import os

import api
from api.genius import API_Client
from env import load_env
from model import AttachmentStyleProbabilities, TextGenerationModel
import csv
import json
from fuzzywuzzy import fuzz
from tqdm import tqdm
from tqdm.asyncio import tqdm_asyncio

load_env()

async def add_id_to_song(api_client: API_Client, song: dict) -> dict:
query = f"{song['artist']} - {song['title']}"
search_results = await api_client.search(query)
found_song = api_client.get_best_match(search_results, query)
if not found_song:
print(f"Song not found: {song}")
return song
song_dict = {
"query": query,
"id": found_song.id,
"artist": found_song.artist,
"title": found_song.title
}
if "attachment_style" in song:
song_dict["attachment_style"] = song["attachment_style"].lower()
return song_dict

def load_existing_data(path_to_data: str) -> list:
"""
Load existing data from a CSV file. The CSV file should have the following columns:
artist, title,attachment_style
"""
data = []
with open(path_to_data, mode='r', newline='') as file:
reader = csv.DictReader(file)
for row in reader:
data.append({"artist":row['artist'], "title":row['title'],"attachment_style": row['attachment_style']})
return data

def generate_data(song_query: str) -> list[AttachmentStyleProbabilities]:
async def query_song(api_client:API_Client, song_query: str) -> dict:
search_results = await api_client.search(song_query)
found_song = api_client.get_best_match(search_results, song_query)
if not found_song:
print(f"Song not found: {song_query}")
song_dict = {
"id": found_song.id,
"artist": found_song.artist,
"title": found_song.title,
}
return song_dict


return []

if __name__ == "__main__":
global api_client
token = os.getenv("TOKEN")
api_client = api.API_Client(token)
data = load_existing_data("./data/train.csv")
songs_with_ids = []

for song in tqdm(data, desc="Processing songs"):
search_results = api_client.search(f"{song['artist']} - {song['title']}")
if search_results:
found_song = None
best_match = None
highest_score = 0
for result in search_results:
artist_score = fuzz.ratio(result.artist.lower(), song['artist'].lower())
title_score = fuzz.ratio(result.title.lower(), song['title'].lower())
score = artist_score + title_score
if score > highest_score:
highest_score = score
best_match = result
if best_match:
found_song = best_match
if not found_song:
print(song)
continue
song_dict = {
"id": found_song.id,
"artist": found_song.artist,
"title": found_song.title,
"attachment_style": song['attachment_style'].lower(),
}
songs_with_ids.append(song_dict)

async def main():
token = os.getenv("TOKEN")
genius = API_Client(token)
with open("./data/songs.json", "r") as json_file:
songs = json.load(json_file)

songs_with_ids = await tqdm_asyncio.gather(*[add_id_to_song(genius, song) for song in songs])

with open("./data/songs_with_ids.json", "w") as json_file:
json.dump(songs_with_ids, json_file, indent=4)


#model = TextGenerationModel()
song_queries = []
#output = [generate_data(song_query) for song_query in song_queries]
#output += load_existing_data("../data/train.csv")

await genius.close()



if __name__ == "__main__":
asyncio.run(main())

14 changes: 10 additions & 4 deletions backend/src/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from collections import Counter
from typing import List, Optional
from urllib.request import Request
import api

from fastapi.concurrency import asynccontextmanager
import api.genius as genius
from src.env import load_env
from model import TextGenerationModel
from fastapi import FastAPI, Query, HTTPException
Expand Down Expand Up @@ -42,15 +44,19 @@ async def universal_exception_handler(request: Request, exc: Exception):
content={"message": "An unexpected error occurred"}
)

@app.on_event("startup")
async def init_model():
@asynccontextmanager
async def lifespan(app: FastAPI):
global model
global api_client
model_id = os.getenv("MODEL_ID")
logger.info(f"Using model {model_id}")
model = TextGenerationModel(model_id)
token = os.getenv("TOKEN")
api_client = api.API_Client(token)
api_client = genius.API_Client(token)

yield

await api_client.close()

class LyricsRequest(BaseModel):
lyrics: str
Expand Down
Loading

0 comments on commit 2d999e9

Please sign in to comment.