-
Notifications
You must be signed in to change notification settings - Fork 3
/
regression_2.py
30 lines (26 loc) · 867 Bytes
/
regression_2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
data = pd.read_csv('data.csv')
data = pd.DataFrame(data)
# Check is NULL
data = data.fillna(0)
data.isnull().sum()
y = data['y']
data = data.drop(columns=['y'], axis=1)
print(y.shape)
# # Check type
data.dtypes
for col in data.columns:
if(data[col].dtypes=='object'):
data = pd.concat([data.drop(columns=[col], axis=1), pd.get_dummies(data[col], prefix=col)], axis=1)
print(data.shape)
X = data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
lr = LinearRegression()
lr.fit(X_train, y_train)
print("MSE of Validation: ", mean_squared_error(y_val, lr.predict(X_val)))