Skip to content

Commit

Permalink
OHE on pred_bcat, tqdm
Browse files Browse the repository at this point in the history
  • Loading branch information
sungbeom0324 committed Dec 5, 2023
1 parent 87594d5 commit e16e27d
Showing 1 changed file with 20 additions and 10 deletions.
30 changes: 20 additions & 10 deletions dnn2.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,21 +197,31 @@ def gaussian(x, mu, sigma, A):
###################################################
# PreProcessing_2 #
###################################################
x_total = np.array(df_total.filter(items = inputvars_2))
# Case1 : One Hot Encoding for pred_bcat #
_x_total = df_total.filter(items = inputvars_2)
_x_ohe = pd.get_dummies(_x_total['pred_bcat'], prefix='pred_bcat', drop_first=True)
_x_total = _x_total.drop('pred_bcat', axis=1)
_x_total = pd.concat([_x_total, _x_ohe], axis = 1)
x_total = np.array(_x_total)
y_total = np.array(df_total.filter(items = ["category"]))
# Case1 : One Hot Encoding for pred_bcat #

'''
# Case 2 : No OHE, No pred_bcat #
_x_total = df_total.filter(items = inputvars_2)
_x_total = _x_total.drop('pred_bcat', axis=1)
x_total = np.array(_x_total)
y_total = np.array(df_total.filter(items = ["category"]))
# Case 2 : No OHE, No pred_bcat #
'''

print("Final x = ", x_total)
print("Final y = ", y_total)

# Data Set Partioning
ntotal = len(y_total)
train_len = int(0.7*ntotal)
x_train, x_val, y_train, y_val = train_test_split(x_total, y_total, test_size=0.3)
#x_train = np.array([data if data is not None else 0 for data in x_train])
#x_val = np.array([data if data is not None else 0 for data in x_val])
#y_train = np.array([data if data is not None else 0 for data in y_train])
#y_val = np.array([data if data is not None else 0 for data in y_val])
#x_train = np.nan_to_num(x_train, nan=0)
#x_val = np.nan_to_num(x_val, nan=0)


###################################################
Expand Down Expand Up @@ -299,18 +309,18 @@ def gaussian(x, mu, sigma, A):
test_loss = test_results[0]
test_acc = test_results[1]
print(f"Test accuracy: {test_acc * 100:.2f}%")

###################################################
# Feature Importance #
###################################################
print("# FEATURE IMPORTANCE #")
model_dir = outdir + '/best_model.h5'
plot_feature_importance(model_dir, x_val, inputvars_2, outdir)
plot_feature_importance(model_dir, x_val, _x_total.columns, outdir)

###################################################
# Time #
###################################################
print("Number of full data: ", ntrain*5)
colnames = df_total.columns; print("Columns :",colnames)
colnames = _x_total.columns; print("Columns :",colnames)
execution_time = end_time - start_time
print(f"execution time: {execution_time} second")
print("---Done---")

0 comments on commit e16e27d

Please sign in to comment.