Skip to content

Commit

Permalink
Rs ps integration (#37)
Browse files Browse the repository at this point in the history
  • Loading branch information
ugolowic authored May 18, 2021
1 parent f721afd commit 53cace5
Show file tree
Hide file tree
Showing 80 changed files with 36,715 additions and 6,892 deletions.
38 changes: 22 additions & 16 deletions prediction-service/app/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,41 +6,47 @@
class DataEncoder:
_gender = ['F', 'M']
_age = ['young', 'mid', 'old']
_categories = ['Boys', 'Girls', 'Men', 'Sports', 'Women']
_coupon_types = ['biy_all', 'boy_more', 'department', 'just_discount']
_departments = ['Boys', 'Girls', 'Men', 'Sports', 'Women']
_coupon_types = ['buy_all', 'buy_more', 'department', 'just_discount']

@classmethod
def encode(cls, input: PredictionInput) -> pandas.DataFrame:
rows = []
for coupon in input.coupons:
row = {
'customer_id': input.customer.customer_id,
'cust_credit': input.customer.credit,
'cust_mean_product_price': input.customer.mean_product_price,
'cust_unique_coupons_used': input.customer.unique_coupons_used,
'cust_mean_discount': input.customer.mean_discount_used,
'cust_unique_products_bought': input.customer.unique_items_bought,
'cust_total_products_bougth': input.customer.total_items_bought,
'cust_mean_buy_price': input.customer.mean_buy_price,
'cust_total_coupons': input.customer.total_coupons_used,
'cust_mean_discount': input.customer.mean_discount_received,
'cust_unique_products': input.customer.unique_products_bought,
'cust_unique_products_coupon': input.customer.unique_products_bought_with_coupons,
'cust_total_products': input.customer.total_items_bought,
'coupon_id': coupon.coupon_id,
'coupon_discount': coupon.coupon_discount,
'coupon_how_many': coupon.how_many_products,
'coupon_days_valid': coupon.days_valid,
'coupon_mean_prod_price': coupon.mean_item_selling_price
'coupon_discount': coupon.discount,
'coupon_how_many': coupon.how_many_products_required,
'coupon_mean_prod_price': coupon.product_mean_price,
'coupon_prods_avail': coupon.products_available
}
# row.update(cls._encode_category(coupon.item_category))
row.update(cls._encode_age(input.customer.age))
row.update(cls._encode_gender(input.customer.gender))
row.update(cls._encode_coupon_type(coupon.coupon_type))
row.update(cls._encode_department(coupon.department))
rows.append(row)
return pandas.DataFrame(rows)

@classmethod
def _encode_category(cls, category):
return {f'category_{c}': 1 if category == c else 0 for c in cls._categories}
def _encode_department(cls, department):
return {f'coupon_dpt_{d}': 1 if department == d else 0 for d in cls._departments}

@classmethod
def _encode_age(cls, age):
return {f'cust_age_{a}': 1 if age == a else 0 for a in cls._age}
if age < 30:
age_bracket = 'young'
elif age >= 30 and age < 50:
age_bracket = 'mid'
else:
age_bracket = 'old'
return {f'cust_age_{a}': 1 if age_bracket == a else 0 for a in cls._age}

@classmethod
def _encode_gender(cls, gender):
Expand Down
3 changes: 0 additions & 3 deletions prediction-service/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
from fastapi import Depends, FastAPI
from fastapi.responses import PlainTextResponse

# from .encoder import DataEncoder
# from .model import PredictionInput, PredictionOutput
# from .scorer import Scorer, get_scorer
from app.encoder import DataEncoder
from app.model import PredictionInput, PredictionOutput
from app.scorer import Scorer, get_scorer
Expand Down
29 changes: 15 additions & 14 deletions prediction-service/app/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,32 @@

class Coupon(BaseModel):
coupon_id: int
mean_item_selling_price: float
coupon_discount: float
category: str # TODO this is not used (for now)
how_many_products: int
coupon_type: str
days_valid: int
coupon_type: str # TODO Enum
department: str # TODO Enum
discount: int
how_many_products_required: int
product_mean_price: float
products_available: int


class Customer(BaseModel):
customer_id: int
age: str # TODO Enum
credit: int
gender: str # TODO Enum
mean_product_price: float
unique_coupons_used: int
mean_discount_used: float
unique_items_bought: int
age: int
mean_buy_price: float
total_coupons_used: int
mean_discount_received: float
unique_products_bought: int
unique_products_bought_with_coupons: int
total_items_bought: int


class PredictionInput(BaseModel):
customer: Customer
coupons: List[Coupon]


class PredictionOutput(BaseModel):
coupon_id: int
customer_id: int
coupon_id: str
customer_id: str
prediction: confloat(ge=0, le=1)
Binary file not shown.
Binary file not shown.
Binary file removed prediction-service/app/model_store/scikit_classifier
Binary file not shown.
Binary file removed prediction-service/app/model_store/scikit_regressor
Binary file not shown.
3 changes: 1 addition & 2 deletions prediction-service/app/scorer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import pickle

import pandas
Expand All @@ -19,6 +18,6 @@ def score(self, input_df: pandas.DataFrame) -> pandas.DataFrame:


def get_scorer():
model_path = 'app/model_store/pickled_model_gbm_no_balancing'
model_path = 'app/model_store/pickled_model_gbm_smote'
with open(model_path, 'rb') as f:
return Scorer(pickle.load(f))
Empty file.
46 changes: 46 additions & 0 deletions prediction-service/app/test/test_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from fastapi.testclient import TestClient
import pytest

from app.main import app


@pytest.fixture(scope='session')
def client():
return TestClient(app)


def test_healthcheck(client):
response = client.get('/healthcheck')
assert response.status_code == 200
assert response.text == 'OK'


def test_score_coupon(client):
body = {
'customer': {
'customer_id': '25',
'gender': 'F',
'age': 35,
'mean_buy_price': 15.22,
'total_coupons_used': 2009,
'mean_discount_received': 12.17,
'unique_products_bought': 2113,
'unique_products_bought_with_coupons': 924,
'total_items_bought': 5841
},
'coupons': [
{'coupon_id': '116', 'coupon_type': 'department', 'department': 'Boys', 'discount': 64,
'how_many_products_required': 1, 'product_mean_price': 11.53 , 'products_available': 609},
{'coupon_id': '203', 'coupon_type': 'buy_all', 'department': 'Boys', 'discount': 65,
'how_many_products_required': 4, 'product_mean_price': 7.85, 'products_available': 4},
{'coupon_id': '207', 'coupon_type': 'buy_all', 'department': 'Boys', 'discount': 69,
'how_many_products_required': 5, 'product_mean_price': 66.62, 'products_available': 5}
]
}
response = client.post('/score', json=body)
assert response.ok, response.text
expected_coupons = [c['coupon_id'] for c in body['coupons']]
response_coupons = [c['coupon_id'] for c in response.json()]
assert all([ec in response_coupons for ec in expected_coupons])
response_predictions = [i['prediction'] for i in response.json()]
assert response_predictions == sorted(response_predictions, reverse=True)
2 changes: 2 additions & 0 deletions prediction-service/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
fastapi==0.63.0
pandas==1.1.5
pytest==6.2.3
requests==2.25.1
scikit-learn==0.24.1
uvicorn[standard]==0.13.4
19 changes: 11 additions & 8 deletions recommendation-service/.environment.variables.sh
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
export ENTRY_EVENT_TOPIC_NAME='ENTRY_EVENTS'
export FOCUS_EVENT_TOPIC_NAME='FOCUS_EVENTS'
export COUPON_PREDICTION_TOPIC_NAME='PREDICTION_RESULTS'
export MQTT_HOST='localhost'
export MQTT_PORT='1883'

export GROUP_ID=grupa1
export ENTRY_EVENT_TOPIC_NAME='customer/enter'
export FOCUS_EVENT_TOPIC_NAME='customer/focus'
export COUPON_PREDICTION_TOPIC_NAME='customer/prediction'

export COUPON_SCORER_URL='http://127.0.0.1:8001/score'
export COUPON_SCORER_URL='http://127.0.0.1:8002/score'

export TESTING_NO_KAFKA=false
export TESTING_NO_MQTT=false
export TESTING_NO_POSTGRES=false
export TESTING_NO_SCORING_SERVICE=false

export DB_NAME='recommendation_cache'
export DB_NAME='cache_db'
export DB_USER='postgres'
export DB_PASSWORD='root'
export DB_HOST='127.0.0.1'
export DB_PASSWORD='postgres'
export DB_HOST='localhost'
export DB_PORT='5432'
85 changes: 50 additions & 35 deletions recommendation-service/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ JSON
"id": string # Customer ID
},
"coupon": {
"id": string, # Coupon ID
"id": int , # Coupon ID
"type": string, # Coupon type (one of: "buy_more", "buy_all", "just_discount", "department")
"department": string, # Department name (supported categories are: 'Boys', 'Girls', 'Men', 'Sports', 'Women')
"discount": float, # Coupon discount in percentage
Expand All @@ -119,17 +119,17 @@ JSON
"start_date": string, # Coupon valid from date
"end_date": string, # Coupon valid to date
"products": [{ # List of products covered by the coupon
"id": string, # Product id
"id": int, # Product id
"name": string, # Product name
"category": string, # Product category
"sizes": string, # Available sizes
"vendor": string, # Vendor
"description": str, # Item description
"buy_price": fload, # Regular item price
"buy_price": float, # Regular item price
"department": str # Product department
}]
},
"ts": int, # Timestamp (unix time)
"ts": int # Timestamp (unix time)
}
```

Expand Down Expand Up @@ -178,20 +178,22 @@ Dependencies of the project are contained in [requirements.txt](requirements.txt
available.

All the packages can be installed with:
`pip install -f requirements.txt`
`pip install -r requirements.txt`

## Service configuration

The service reads the following **environment variables**:

| Variable | Description | Default |
|------------------------|-----------------------------------------|--------------:|
| MQTT_HOST | comma-separated list of MQTT brokers | 127.0.0.1:1883|
| CLIENT_ID | optional identifier of a MQTT consumer | MQTTClient |
| MQTT_HOST | comma-separated list of MQTT brokers | - |
| MQTT_PORT | MQTT brokers' port | - |
| MQTT_USERNAME | MQTT user username | None |
| MQTT_PASSWORD | MQTT user password | None |
| MQTT_BROKER_CERT_FILE | path to MQTT ssl cert file | None |
| ENTRY_EVENT_TOPIC_NAME | topic for entry events | - |
| FOCUS_EVENT_TOPIC_NAME | topic for focus events | - |
| COUPON_PREDICTION_TOPIC_NAME | topic for sending prediction results | - |
| COUPON_SCORER_URL | URL of the scorer service | - |

(Parameters with `-` in "Default" column are required.)

Expand All @@ -206,7 +208,7 @@ there).

The code reads sensitive information (tokens, secrets) from environment variables. They need to be set accordingly in
advance.
`environment.variables.sh` can be used for that purpose. Then, in order to run the service the following commands can be
`.environment.variables.sh` can be used for that purpose. Then, in order to run the service the following commands can be
used:

```
Expand Down Expand Up @@ -304,47 +306,60 @@ This component uses PostgreSQL as a cache. It stores coupons and customer data.
DB tables:

```sql
CREATE TABLE coupon_categories (
id SERIAL,
coupon_id INT,
item_id INT,
category VARCHAR(50),
PRIMARY KEY (id)
);

CREATE TABLE coupon_info (
coupon_id INT,
mean_coupon_discount FLOAT,
mean_item_price FLOAT,
coupon_type VARCHAR(16),
department VARCHAR(10),
discount INT,
how_many_products_required INT,
start_date VARCHAR(10),
end_date VARCHAR(10),
product_mean_price REAL,
products_available INT,
PRIMARY KEY (coupon_id)
);

CREATE_TABLE product_info (
product_id INT,
name VARCHAR(256),
category VARCHAR(50),
sizes VARCHAR(50),
vendor VARCHAR(50),
description VARCHAR(256),
buy_price REAL,
department VARCHAR(10),
PRIMARY KEY (product_id)
);

CREATE_TABLE coupon_product (
coupon_id INT,
product_id INT,
FOREIGN KEY (coupon_id) REFERENCES coupon_info(coupon_id),
FOREIGN KEY (product_id) REFERENCES products(product_id)
)

CREATE TABLE customer_info (
customer_id INT,
age_range VARCHAR(6),
marital_status VARCHAR(10),
family_size INT,
no_of_children INT,
income_bracket INT,
ustomer_id INT,
gender VARCHAR(1),
mean_discount_used_by_cust FLOAT,
unique_items_bought_by_cust INT,
mean_selling_price_paid_by_cust FLOAT,
mean_quantity_bought_by_cust FLOAT,
total_discount_used_by_cust FLOAT,
total_coupons_used_by_cust INT,
total_price_paid_by_cust FLOAT,
total_quantity_bought_by_cust INT,
PRIMARY KEY (customer_id)
age INT,
mean_buy_price REAL,
total_coupons_used: INT,
mean_discount_received: REAL,
unique_products_bought INT,
unique_products_bought_with_coupons: INT,
total_items_bought:
INT, PRIMARY KEY (customer_id)
);
```

How to fill DB with data:

```sql
COPY coupon_categories(coupon_id, item_id, category) FROM '<<DATA_PATH>>/coupon_categories.csv' DELIMITER ',' CSV HEADER;
COPY coupon_info FROM '<<DATA_PATH>>/coupon_info.csv' DELIMITER ',' CSV HEADER;
COPY product_info FROM '<<DATA_PATH>>/products.csv' DELIMITER ',' CSV HEADER;
COPY coupon_product FROM '<<DATA_PATH>>/coupon_product.csv' DELIMITER ',' CSV HEADER;
COPY customer_info FROM '<<DATA_PATH>>/customer_info.csv' DELIMITER ',' CSV HEADER;
```

CSV files are available in the [../data-mining/coupon-based/csv_4_db/](../data-mining/coupon-based/csv_4_db/) path
CSV files are available in the [../training-with-artificial-data/data_0409_0/data4db/](../training-with-artificial-data/data_0409_0/data4db/) path
5 changes: 1 addition & 4 deletions recommendation-service/app/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1 @@
import logging

logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
from app.config.log_config import logger
Loading

0 comments on commit 53cace5

Please sign in to comment.