Skip to content

Commit

Permalink
Merge pull request #4 from ppirch/Tag-APi
Browse files Browse the repository at this point in the history
Implement Tag API And Word Vector API
  • Loading branch information
wannaphong authored Feb 9, 2021
2 parents 6554eb9 + d08db66 commit 7ce9d04
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 13 deletions.
4 changes: 3 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from fastapi import Depends, FastAPI, Header, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from routers import tag, tokenize
from routers import tag, tokenize, word_vector
import uvicorn
import pythainlp

Expand Down Expand Up @@ -29,6 +29,8 @@ def index():

app.include_router(tag.router, prefix="/tag", tags=["Tag"])
app.include_router(tokenize.router, prefix="/tokenize", tags=["Tokenize"])
app.include_router(word_vector.router,
prefix="/word-vector", tags=["Word Vector"])


if __name__ == "__main__":
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
fastapi
pythainlp==2.1.4
uvicorn
pytest
pytest
gensim
numpy
72 changes: 61 additions & 11 deletions routers/tag.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,65 @@
# -*- coding: utf-8 -*-
from fastapi import APIRouter
from pythainlp import tag
from fastapi import APIRouter, Query
from enum import Enum
from typing import List, Optional, Tuple
from pydantic import BaseModel
from pythainlp import tag, tokenize
from pythainlp.tag.named_entity import ThaiNameTagger

router = APIRouter()
ner = ThaiNameTagger()


@router.get("/tag/pos_tag", tags=["tag"])
def part_of_speech_tagging(q: str, engine: str = None, corpus: str = None):
words = pythainlp.tokenize.word_tokenize(q)
if not engine:
engine = "perceptron"
if not corpus:
corpus = "orchid"
return tag.pos_tag(words, engine, corpus)
class PosTagEngine(str, Enum):
perceptron = "perceptron"
unigram = "unigram"
artagger = "artagger"


class CorpusEngine(str, Enum):
orchid = "orchid"
orchid_ud = "orchid_ud"
pud = "pud"


class PosTag(BaseModel):
word: str
pos: str


class NERTag(BaseModel):
word: str
pos: str
ner: str


class PosTagResponse(BaseModel):
pos_tags: List[PosTag] = []


class NERResponse(BaseModel):
ner_tags: List[NERTag] = []


@router.get('/pos', response_model=PosTagResponse)
def pos_tag(q: str = "", words: List[str] = Query(None), engine: PosTagEngine = "perceptron", corpus: CorpusEngine = "orchid"):
if len(q) != 0:
words = tokenize.word_tokenize(q)
tags = tag.pos_tag(words, engine=engine, corpus=corpus)
res = [{"word": word, "pos": pos} for word, pos in tags]
return {"pos_tags": res}


@router.get('/provinces', response_model=PosTagResponse)
def tag_provinces(q: str = "", words: List[str] = Query(None)):
if len(q) != 0:
words = tokenize.word_tokenize(q)
tags = tag.tag_provinces(words)
res = [{"word": word, "pos": pos} for word, pos in tags]
return {"pos_tags": res}


@router.get('/ner', response_model=NERResponse)
def get_ner(q: str = ""):
tags = ner.get_ner(q)
res = [{"word": word, "pos": pos, "ner": ner} for word, pos, ner in tags]
return {"ner_tags": res}
63 changes: 63 additions & 0 deletions routers/word_vector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from fastapi import APIRouter, Query, HTTPException
from pythainlp import word_vector
from enum import Enum
from typing import List, Optional
from pydantic import BaseModel

router = APIRouter()


class MostSimilarCosmulWord(BaseModel):
word: str
score: float


class DoesntMatchResponse(BaseModel):
doesnt_match: str = ""


class MostSimilarCosmulResponse(BaseModel):
most_similar_cosmul: List[MostSimilarCosmulWord] = []


class SentenceVectorizerResponse(BaseModel):
sentence_vectorizer: List[List[float]] = []


class SimilarityResponse(BaseModel):
similarity: float = -1


@router.get('/doesnt-match', response_model=DoesntMatchResponse)
def doesnt_match(words: List[str] = Query(None)):
try:
return {"doesnt_match": word_vector.doesnt_match(words)}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e).replace("\"", ""))


@router.get('/most-similar-cosmul', response_model=MostSimilarCosmulResponse)
def most_similar_cosmul(listPositive: List[str] = Query([]), listNegative: List[str] = Query([])):
try:
words = word_vector.most_similar_cosmul(listPositive, listNegative)
res = [{"word": word, "score": score} for word, score in words]
return {"most_similar_cosmul": res}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e).replace("\"", ""))


@router.get('/sentence-vectorizer', response_model=SentenceVectorizerResponse)
def sentence_vectorizer(q: str = ""):
try:
vector = word_vector.sentence_vectorizer(q, use_mean=True).tolist()
return {"sentence_vectorizer": vector}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e).replace("\"", ""))


@router.get('/similarity', response_model=SimilarityResponse)
def similarity(word1: str = "", word2: str = ""):
try:
return {"similarity": word_vector.similarity(word1, word2)}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e).replace("\"", ""))

0 comments on commit 7ce9d04

Please sign in to comment.