From cdc0e5a316e7d8b92730bab82ecd3b506c818ebe Mon Sep 17 00:00:00 2001 From: Prakruti Singh Date: Wed, 21 Apr 2021 15:27:44 +0530 Subject: [PATCH 01/12] new scoring metrics added --- gramex/handlers/mlhandler.py | 58 ++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/gramex/handlers/mlhandler.py b/gramex/handlers/mlhandler.py index b0c2f39e4..467d6400a 100644 --- a/gramex/handlers/mlhandler.py +++ b/gramex/handlers/mlhandler.py @@ -20,6 +20,9 @@ from slugify import slugify from tornado.gen import coroutine from tornado.web import HTTPError +from sklearn.metrics import accuracy_score,f1_score,recall_score,roc_auc_score +from sklearn.metrics import r2_score, mean_squared_error + op = os.path MLCLASS_MODULES = [ @@ -40,6 +43,8 @@ 'nums': [], 'cats': [], 'target_col': None, + 'score_pref': None, + 'multiclass': None } ACTIONS = ['predict', 'score', 'append', 'train', 'retrain'] DEFAULT_TEMPLATE = op.join(op.dirname(__file__), '..', 'apps', 'mlhandler', 'template.html') @@ -103,12 +108,17 @@ def setup(cls, data=None, model={}, config_dir='', **kwargs): cls.set_opt('class', model.get('class')) cls.set_opt('params', model.get('params', {})) + target_col = cls.get_opt('target_col') + cls.score_pref = cls.get_opt('score_pref') + cls.multiclass = cls.get_opt('multiclass') if op.exists(cls.model_path): # If the pkl exists, load it cls.model = joblib.load(cls.model_path) elif data is not None: mclass = cls.get_opt('class', model.get('class', False)) - params = cls.get_opt('params', {}) + params = cls.get_opt('params', {}) + score_pref = cls.get_opt('score_pref') + multiclass = cls.get_opt('multiclass') data = cls._filtercols(data) data = cls._filterrows(data) cls.model = cls._assemble_pipeline(data, mclass=mclass, params=params) @@ -148,7 +158,7 @@ def set_opt(cls, key, value): transform[key] = value cls.config_store.dump('transform', transform) cls.config_store.update['transform'] = transform - elif key in ('class', 'params'): + elif key in ('class', 'params', 'score_pref'): model = cls.config_store.load('model', {}) model[key] = value if key == 'class': @@ -261,6 +271,46 @@ def _transform(self, data, **kwargs): data = self._filterrows(data, **kwargs) return data + def _chooseScore(self, data, target): + try: + print( 'What score are we reading here? -----> ', self.score_pref) + print('What class category are we reading here? -----> ', self.multiclass) + predict = self.model.predict(data) + print('PREDICT ----->', predict) + print('TARGET ------>', target) + + #FOR CLASSIFICATION + if(self.score_pref == 'f1-score'): + return f1_score(target, predict, average= 'weighted') # can change average value + + elif(self.score_pref == 'accuracy'): + return accuracy_score(target, predict) + + + elif(self.score_pref == 'recall'): + return recall_score(target, predict, average=None) + + elif(self.score_pref == 'roc_auc_score'): + result = roc_auc_score(target, predict) + if(self.multiclass == 'True'): + return roc_auc_score(target, predict, multi_class='ovr') + return result + + #FOR REGRESSION + #elif(self.score_pref == 'mean_squared_error'): + # return mean_squared_error(target, predict) + + elif(self.score_pref == 'r2'): + return r2_score(target, predict) + + else: + return self.model.score(data,target) + + except ValueError: + print('ValueError occured. Please check if the scoring metric is appropriate for the given data!') + + + def _predict(self, data=None, score_col=''): if data is None: data = self._parse_data(False) @@ -268,7 +318,9 @@ def _predict(self, data=None, score_col=''): self.model = cache.open(self.model_path, joblib.load) try: target = data.pop(score_col) - return self.model.score(data, target) + s = self._chooseScore(data, target) + print('s: ',s) + return s except KeyError: # Set data in the same order as the transformer requests try: From e737117495f846cdb4d046da08c5dfabee237604 Mon Sep 17 00:00:00 2001 From: Prakruti Singh Date: Fri, 23 Apr 2021 14:00:00 +0530 Subject: [PATCH 02/12] new changes --- gramex/handlers/mlhandler.py | 60 +++++------------------------------- 1 file changed, 7 insertions(+), 53 deletions(-) diff --git a/gramex/handlers/mlhandler.py b/gramex/handlers/mlhandler.py index 467d6400a..ffdb6ee12 100644 --- a/gramex/handlers/mlhandler.py +++ b/gramex/handlers/mlhandler.py @@ -20,9 +20,7 @@ from slugify import slugify from tornado.gen import coroutine from tornado.web import HTTPError -from sklearn.metrics import accuracy_score,f1_score,recall_score,roc_auc_score -from sklearn.metrics import r2_score, mean_squared_error - +from sklearn.metrics import get_scorer op = os.path MLCLASS_MODULES = [ @@ -42,9 +40,7 @@ 'pipeline': True, 'nums': [], 'cats': [], - 'target_col': None, - 'score_pref': None, - 'multiclass': None + 'target_col': None } ACTIONS = ['predict', 'score', 'append', 'train', 'retrain'] DEFAULT_TEMPLATE = op.join(op.dirname(__file__), '..', 'apps', 'mlhandler', 'template.html') @@ -109,16 +105,12 @@ def setup(cls, data=None, model={}, config_dir='', **kwargs): cls.set_opt('class', model.get('class')) cls.set_opt('params', model.get('params', {})) target_col = cls.get_opt('target_col') - cls.score_pref = cls.get_opt('score_pref') - cls.multiclass = cls.get_opt('multiclass') if op.exists(cls.model_path): # If the pkl exists, load it cls.model = joblib.load(cls.model_path) elif data is not None: mclass = cls.get_opt('class', model.get('class', False)) params = cls.get_opt('params', {}) - score_pref = cls.get_opt('score_pref') - multiclass = cls.get_opt('multiclass') data = cls._filtercols(data) data = cls._filterrows(data) cls.model = cls._assemble_pipeline(data, mclass=mclass, params=params) @@ -158,7 +150,7 @@ def set_opt(cls, key, value): transform[key] = value cls.config_store.dump('transform', transform) cls.config_store.update['transform'] = transform - elif key in ('class', 'params', 'score_pref'): + elif key in ('class', 'params'): model = cls.config_store.load('model', {}) model[key] = value if key == 'class': @@ -271,45 +263,6 @@ def _transform(self, data, **kwargs): data = self._filterrows(data, **kwargs) return data - def _chooseScore(self, data, target): - try: - print( 'What score are we reading here? -----> ', self.score_pref) - print('What class category are we reading here? -----> ', self.multiclass) - predict = self.model.predict(data) - print('PREDICT ----->', predict) - print('TARGET ------>', target) - - #FOR CLASSIFICATION - if(self.score_pref == 'f1-score'): - return f1_score(target, predict, average= 'weighted') # can change average value - - elif(self.score_pref == 'accuracy'): - return accuracy_score(target, predict) - - - elif(self.score_pref == 'recall'): - return recall_score(target, predict, average=None) - - elif(self.score_pref == 'roc_auc_score'): - result = roc_auc_score(target, predict) - if(self.multiclass == 'True'): - return roc_auc_score(target, predict, multi_class='ovr') - return result - - #FOR REGRESSION - #elif(self.score_pref == 'mean_squared_error'): - # return mean_squared_error(target, predict) - - elif(self.score_pref == 'r2'): - return r2_score(target, predict) - - else: - return self.model.score(data,target) - - except ValueError: - print('ValueError occured. Please check if the scoring metric is appropriate for the given data!') - - def _predict(self, data=None, score_col=''): if data is None: @@ -318,9 +271,10 @@ def _predict(self, data=None, score_col=''): self.model = cache.open(self.model_path, joblib.load) try: target = data.pop(score_col) - s = self._chooseScore(data, target) - print('s: ',s) - return s + metric = self.get_argument('_metric') + print('Metric: ',metric) + scorer = get_scorer(metric) + return scorer(self.model, data, target) except KeyError: # Set data in the same order as the transformer requests try: From 64e6c22dd9c81375dc34784e9b365436023b23d8 Mon Sep 17 00:00:00 2001 From: Prakruti Singh Date: Fri, 23 Apr 2021 22:42:03 +0530 Subject: [PATCH 03/12] made the requested changes --- env/pyvenv.cfg | 3 +++ env/share/man/man1/ipython.1.gz | Bin 0 -> 1039 bytes gramex/handlers/mlhandler.py | 11 ++++++----- 3 files changed, 9 insertions(+), 5 deletions(-) create mode 100644 env/pyvenv.cfg create mode 100644 env/share/man/man1/ipython.1.gz diff --git a/env/pyvenv.cfg b/env/pyvenv.cfg new file mode 100644 index 000000000..7e6d1db00 --- /dev/null +++ b/env/pyvenv.cfg @@ -0,0 +1,3 @@ +home = /Users/prakruti/opt/anaconda3/bin +include-system-site-packages = false +version = 3.8.5 diff --git a/env/share/man/man1/ipython.1.gz b/env/share/man/man1/ipython.1.gz new file mode 100644 index 0000000000000000000000000000000000000000..1134f76bd8431b34437b298432d09333285a1cba GIT binary patch literal 1039 zcmV+q1n~PGiwFp5uhCrs18H!1bZBpGE-?U&Rbg-2HW2;JUvUTqIPGM49yV+UiUMxp zASxEev%CVqJfxI#ve-(bKvMDa?{_36+iCM*h5^aEyLa#2-O=&x0{++M4*Lj}_lxWF z3Z`$TAhjw>_r_v^xE0!1Xu0MMTGSD&7Wc~tOr^TIfDD1xHRQZC-aExy$>Q8SU0!dK zht+w+eENF?4e#moFay(N26HQv^a#pUs80X}xb`{J0gEdzh!iO=e|r=8=ll(ew|9`N zAGdc8E11K@$FA;S{%Zt3|1_Upc*u2)+z?3drK3`8B@7hYQt}EVK2tW0&%y|~C!2?F z`5t2z8fsDvg4d+sxmG5EQn{ECHOd^Xyi+WzQO`mcp4A9d-%b;O=pjCcLs}yxjwFYx3zM=HOn(5I> zc&IIq2_e(SzXk`>!aMo?%t8J*2W(o~d)L6hsWpf?H7(|%6!}rgQ$u|Pd#CeLI0z}y zyOF~%B~T$Rjug}M%uJ)lb^n^eOx`P(}=yMru8vR7fj(5HvZw zv#O!_%Yn48inO~Fl8F0WsLs+WEn3k_-iVy3>Jzzn3a zm?`x&fD+K9%J5!SqHOPH$|U`z1@ zmb|MiBe);#9`2X3c&ts7a7ymdk-E;E$|#8cqw46wmtH1EBs2sIc;^!t=BX0+jZy4SV4;q9r;R}uYa zS*}l3H&^2zutJ?0KBR=4Za<5YFkDy&3zN4`Av$jR$p9P*bzIqr+ibvi8Ym4iEdC<< zCOzsvf*-W77A0i8FJZLY(7#o!+J;sEw#XZJS6SPdtJ!Q%I&@h~t(Xmj$LIT){RePY JU6%?7001;H`=I~; literal 0 HcmV?d00001 diff --git a/gramex/handlers/mlhandler.py b/gramex/handlers/mlhandler.py index ffdb6ee12..6b6b4ae4a 100644 --- a/gramex/handlers/mlhandler.py +++ b/gramex/handlers/mlhandler.py @@ -63,6 +63,7 @@ class MLHandler(FormHandler): @classmethod def setup(cls, data=None, model={}, config_dir='', **kwargs): + print('hellos') cls.slug = slugify(cls.name) # Create the config store directory if not config_dir: @@ -104,8 +105,6 @@ def setup(cls, data=None, model={}, config_dir='', **kwargs): cls.set_opt('class', model.get('class')) cls.set_opt('params', model.get('params', {})) - target_col = cls.get_opt('target_col') - if op.exists(cls.model_path): # If the pkl exists, load it cls.model = joblib.load(cls.model_path) elif data is not None: @@ -272,9 +271,11 @@ def _predict(self, data=None, score_col=''): try: target = data.pop(score_col) metric = self.get_argument('_metric') - print('Metric: ',metric) - scorer = get_scorer(metric) - return scorer(self.model, data, target) + if metric not None: + scorer = get_scorer(metric) + return scorer(self.model, data, target) + else: + return self.model.score(data, target) except KeyError: # Set data in the same order as the transformer requests try: From babc4bfe8a0320aceb63c890ceed028a9fb148eb Mon Sep 17 00:00:00 2001 From: Prakruti Singh Date: Sat, 24 Apr 2021 13:22:23 +0530 Subject: [PATCH 04/12] removed unnecessary files and print statements + fixed _predict() --- env/pyvenv.cfg | 3 --- env/share/man/man1/ipython.1.gz | Bin 1039 -> 0 bytes gramex/handlers/mlhandler.py | 5 ++--- 3 files changed, 2 insertions(+), 6 deletions(-) delete mode 100644 env/pyvenv.cfg delete mode 100644 env/share/man/man1/ipython.1.gz diff --git a/env/pyvenv.cfg b/env/pyvenv.cfg deleted file mode 100644 index 7e6d1db00..000000000 --- a/env/pyvenv.cfg +++ /dev/null @@ -1,3 +0,0 @@ -home = /Users/prakruti/opt/anaconda3/bin -include-system-site-packages = false -version = 3.8.5 diff --git a/env/share/man/man1/ipython.1.gz b/env/share/man/man1/ipython.1.gz deleted file mode 100644 index 1134f76bd8431b34437b298432d09333285a1cba..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1039 zcmV+q1n~PGiwFp5uhCrs18H!1bZBpGE-?U&Rbg-2HW2;JUvUTqIPGM49yV+UiUMxp zASxEev%CVqJfxI#ve-(bKvMDa?{_36+iCM*h5^aEyLa#2-O=&x0{++M4*Lj}_lxWF z3Z`$TAhjw>_r_v^xE0!1Xu0MMTGSD&7Wc~tOr^TIfDD1xHRQZC-aExy$>Q8SU0!dK zht+w+eENF?4e#moFay(N26HQv^a#pUs80X}xb`{J0gEdzh!iO=e|r=8=ll(ew|9`N zAGdc8E11K@$FA;S{%Zt3|1_Upc*u2)+z?3drK3`8B@7hYQt}EVK2tW0&%y|~C!2?F z`5t2z8fsDvg4d+sxmG5EQn{ECHOd^Xyi+WzQO`mcp4A9d-%b;O=pjCcLs}yxjwFYx3zM=HOn(5I> zc&IIq2_e(SzXk`>!aMo?%t8J*2W(o~d)L6hsWpf?H7(|%6!}rgQ$u|Pd#CeLI0z}y zyOF~%B~T$Rjug}M%uJ)lb^n^eOx`P(}=yMru8vR7fj(5HvZw zv#O!_%Yn48inO~Fl8F0WsLs+WEn3k_-iVy3>Jzzn3a zm?`x&fD+K9%J5!SqHOPH$|U`z1@ zmb|MiBe);#9`2X3c&ts7a7ymdk-E;E$|#8cqw46wmtH1EBs2sIc;^!t=BX0+jZy4SV4;q9r;R}uYa zS*}l3H&^2zutJ?0KBR=4Za<5YFkDy&3zN4`Av$jR$p9P*bzIqr+ibvi8Ym4iEdC<< zCOzsvf*-W77A0i8FJZLY(7#o!+J;sEw#XZJS6SPdtJ!Q%I&@h~t(Xmj$LIT){RePY JU6%?7001;H`=I~; diff --git a/gramex/handlers/mlhandler.py b/gramex/handlers/mlhandler.py index 6b6b4ae4a..9481728b6 100644 --- a/gramex/handlers/mlhandler.py +++ b/gramex/handlers/mlhandler.py @@ -63,7 +63,6 @@ class MLHandler(FormHandler): @classmethod def setup(cls, data=None, model={}, config_dir='', **kwargs): - print('hellos') cls.slug = slugify(cls.name) # Create the config store directory if not config_dir: @@ -270,8 +269,8 @@ def _predict(self, data=None, score_col=''): self.model = cache.open(self.model_path, joblib.load) try: target = data.pop(score_col) - metric = self.get_argument('_metric') - if metric not None: + metric = self.get_argument('_metric', False) + if metric: scorer = get_scorer(metric) return scorer(self.model, data, target) else: From b1683b8d0b80998f9004f6cc88eae392537c1e58 Mon Sep 17 00:00:00 2001 From: Prakruti Singh Date: Mon, 26 Apr 2021 13:09:13 +0530 Subject: [PATCH 05/12] removed empty lines and spaces + fixed if..else indentation --- gramex/handlers/mlhandler.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gramex/handlers/mlhandler.py b/gramex/handlers/mlhandler.py index 9481728b6..541eae1ee 100644 --- a/gramex/handlers/mlhandler.py +++ b/gramex/handlers/mlhandler.py @@ -108,7 +108,7 @@ def setup(cls, data=None, model={}, config_dir='', **kwargs): cls.model = joblib.load(cls.model_path) elif data is not None: mclass = cls.get_opt('class', model.get('class', False)) - params = cls.get_opt('params', {}) + params = cls.get_opt('params', {}) data = cls._filtercols(data) data = cls._filterrows(data) cls.model = cls._assemble_pipeline(data, mclass=mclass, params=params) @@ -261,7 +261,6 @@ def _transform(self, data, **kwargs): data = self._filterrows(data, **kwargs) return data - def _predict(self, data=None, score_col=''): if data is None: data = self._parse_data(False) @@ -271,10 +270,11 @@ def _predict(self, data=None, score_col=''): target = data.pop(score_col) metric = self.get_argument('_metric', False) if metric: - scorer = get_scorer(metric) - return scorer(self.model, data, target) - else: - return self.model.score(data, target) + scorer = get_scorer(metric) + return scorer(self.model, data, target) + + else: + return self.model.score(data, target) except KeyError: # Set data in the same order as the transformer requests try: From ddd5d73e5898ebb84b8edd293460758d28a4927e Mon Sep 17 00:00:00 2001 From: Prakruti Singh Date: Wed, 28 Apr 2021 14:52:35 +0530 Subject: [PATCH 06/12] new changes --- gramex/handlers/mlhandler.py | 4 +--- tests/test_mlhandler.py | 5 +++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/gramex/handlers/mlhandler.py b/gramex/handlers/mlhandler.py index 541eae1ee..ac35a2c9a 100644 --- a/gramex/handlers/mlhandler.py +++ b/gramex/handlers/mlhandler.py @@ -272,9 +272,7 @@ def _predict(self, data=None, score_col=''): if metric: scorer = get_scorer(metric) return scorer(self.model, data, target) - - else: - return self.model.score(data, target) + return self.model.score(data, target) except KeyError: # Set data in the same order as the transformer requests try: diff --git a/tests/test_mlhandler.py b/tests/test_mlhandler.py index fcc63d959..4008fc946 100644 --- a/tests/test_mlhandler.py +++ b/tests/test_mlhandler.py @@ -266,6 +266,11 @@ def test_get_bulk_score(self): data=self.df.to_json(orient='records'), headers={'Content-Type': 'application/json'}) self.assertGreaterEqual(resp.json()['score'], self.ACC_TOL) + resp = self.get( + '/mlhandler?_action=score&_metric=f1_weighted', method='post', + data=self.df.to_json(orient='records'), + headers={'Content-Type': 'application/json'}) + self.assertGreaterEqual(resp.json()['score'], self.ACC_TOL) def test_get_cache(self): df = pd.DataFrame.from_records(self.get('/mlhandler?_cache=true').json()) From 4b672cddece15050f104b35e160e91bfa2f28721 Mon Sep 17 00:00:00 2001 From: Prakruti Singh Date: Thu, 13 May 2021 17:22:22 +0530 Subject: [PATCH 07/12] added Cross Validation results --- gramex/handlers/mlhandler.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/gramex/handlers/mlhandler.py b/gramex/handlers/mlhandler.py index ac35a2c9a..74417d871 100644 --- a/gramex/handlers/mlhandler.py +++ b/gramex/handlers/mlhandler.py @@ -21,6 +21,7 @@ from tornado.gen import coroutine from tornado.web import HTTPError from sklearn.metrics import get_scorer +from sklearn.model_selection import cross_val_predict, cross_val_score op = os.path MLCLASS_MODULES = [ @@ -44,8 +45,7 @@ } ACTIONS = ['predict', 'score', 'append', 'train', 'retrain'] DEFAULT_TEMPLATE = op.join(op.dirname(__file__), '..', 'apps', 'mlhandler', 'template.html') -search_modelclass = lambda x: locate(x, MLCLASS_MODULES) # NOQA: E731 - +search_modelclass = lambda x: locate(x, MLCLASS_MODULES) # NOQA: E731\ def _fit(model, x, y, path=None, name=None): app_log.info('Starting training...') @@ -112,14 +112,26 @@ def setup(cls, data=None, model={}, config_dir='', **kwargs): data = cls._filtercols(data) data = cls._filterrows(data) cls.model = cls._assemble_pipeline(data, mclass=mclass, params=params) - # train the model target = data[target_col] train = data[[c for c in data if c != target_col]] + #ADD HERE + mod = cls.modelFunction() + CVscore = cross_val_score(mod, train, target) + CV = sum(CVscore)/len(CVscore) + print('CV score: ', CV) gramex.service.threadpool.submit( _fit, cls.model, train, target, cls.model_path, cls.name) cls.config_store.flush() + @classmethod + def modelFunction(cls, mclass = ''): + model_kwargs = cls.config_store.load('model', {}) + mclass = model_kwargs.get('class', False) + if mclass: + model = search_modelclass(mclass)(**model_kwargs.get('params', {})) + return model + @classmethod def load_data(cls, default=pd.DataFrame()): try: From 1063207611f55ce79f53ac363f90f75bf6c70813 Mon Sep 17 00:00:00 2001 From: Prakruti Singh Date: Thu, 13 May 2021 17:36:12 +0530 Subject: [PATCH 08/12] cross validation --- gramex/handlers/mlhandler.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gramex/handlers/mlhandler.py b/gramex/handlers/mlhandler.py index 74417d871..9997057fe 100644 --- a/gramex/handlers/mlhandler.py +++ b/gramex/handlers/mlhandler.py @@ -22,6 +22,7 @@ from tornado.web import HTTPError from sklearn.metrics import get_scorer from sklearn.model_selection import cross_val_predict, cross_val_score +from sklearn.model_selection import cross_val_predict, cross_val_score op = os.path MLCLASS_MODULES = [ @@ -45,7 +46,8 @@ } ACTIONS = ['predict', 'score', 'append', 'train', 'retrain'] DEFAULT_TEMPLATE = op.join(op.dirname(__file__), '..', 'apps', 'mlhandler', 'template.html') -search_modelclass = lambda x: locate(x, MLCLASS_MODULES) # NOQA: E731\ +search_modelclass = lambda x: locate(x, MLCLASS_MODULES) # NOQA: E731 + def _fit(model, x, y, path=None, name=None): app_log.info('Starting training...') @@ -115,7 +117,7 @@ def setup(cls, data=None, model={}, config_dir='', **kwargs): # train the model target = data[target_col] train = data[[c for c in data if c != target_col]] - #ADD HERE + # cross validation mod = cls.modelFunction() CVscore = cross_val_score(mod, train, target) CV = sum(CVscore)/len(CVscore) From 09b49f1ea82b971ceba5169e6a1e81d572639ff1 Mon Sep 17 00:00:00 2001 From: Prakruti Singh Date: Tue, 18 May 2021 12:52:53 +0530 Subject: [PATCH 09/12] requested changes made --- gramex/handlers/mlhandler.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/gramex/handlers/mlhandler.py b/gramex/handlers/mlhandler.py index 9997057fe..379ddf2bf 100644 --- a/gramex/handlers/mlhandler.py +++ b/gramex/handlers/mlhandler.py @@ -23,6 +23,7 @@ from sklearn.metrics import get_scorer from sklearn.model_selection import cross_val_predict, cross_val_score from sklearn.model_selection import cross_val_predict, cross_val_score +from ast import literal_eval op = os.path MLCLASS_MODULES = [ @@ -42,7 +43,9 @@ 'pipeline': True, 'nums': [], 'cats': [], - 'target_col': None + 'target_col': None, + 'CV': True, + 'CVargs': [] } ACTIONS = ['predict', 'score', 'append', 'train', 'retrain'] DEFAULT_TEMPLATE = op.join(op.dirname(__file__), '..', 'apps', 'mlhandler', 'template.html') @@ -118,10 +121,9 @@ def setup(cls, data=None, model={}, config_dir='', **kwargs): target = data[target_col] train = data[[c for c in data if c != target_col]] # cross validation - mod = cls.modelFunction() - CVscore = cross_val_score(mod, train, target) - CV = sum(CVscore)/len(CVscore) - print('CV score: ', CV) + print('yayyy we are here') + cls.CrossValidation(train,target) + print('should have printed') gramex.service.threadpool.submit( _fit, cls.model, train, target, cls.model_path, cls.name) cls.config_store.flush() @@ -133,7 +135,20 @@ def modelFunction(cls, mclass = ''): if mclass: model = search_modelclass(mclass)(**model_kwargs.get('params', {})) return model - + + @classmethod + def CrossValidation(cls,train,target): + mod = cls.modelFunction() + CV = cls.get_opt('CV') #can edit to make CV true/false etc. + if CV: + CVargs = cls.get_opt('CVargs') + if CVargs: + CVscore = cross_val_score(mod, X=train, y=target, **literal_eval(json.dumps(CVargs))) + else: + CVscore = cross_val_score(mod, train, target) + CV = sum(CVscore)/len(CVscore) + print('CV score: ', CV) + @classmethod def load_data(cls, default=pd.DataFrame()): try: @@ -365,6 +380,8 @@ def _train(self, data=None): target = data[target_col] train = data[[c for c in data if c != target_col]] self.model = self._assemble_pipeline(data, force=True) + print('IN TRAIN') + self.CrossValidation(train,target) _fit(self.model, train, target, self.model_path) return {'score': self.model.score(train, target)} @@ -375,6 +392,8 @@ def _score(self): self._check_model_path() data = self._parse_data(False) target_col = self.get_argument('target_col', self.get_opt('target_col')) + print('IN _SCORE') + #self.CrossValidation(data,target_col) self.set_opt('target_col', target_col) return {'score': self._predict(data, target_col)} From 5f2417327bfa40638e09aa1fd5efdb186b5f2aee Mon Sep 17 00:00:00 2001 From: Prakruti Singh Date: Thu, 20 May 2021 16:16:25 +0530 Subject: [PATCH 10/12] single input for CV --- gramex/handlers/mlhandler.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/gramex/handlers/mlhandler.py b/gramex/handlers/mlhandler.py index 379ddf2bf..45fb3d8be 100644 --- a/gramex/handlers/mlhandler.py +++ b/gramex/handlers/mlhandler.py @@ -22,7 +22,6 @@ from tornado.web import HTTPError from sklearn.metrics import get_scorer from sklearn.model_selection import cross_val_predict, cross_val_score -from sklearn.model_selection import cross_val_predict, cross_val_score from ast import literal_eval op = os.path @@ -45,7 +44,6 @@ 'cats': [], 'target_col': None, 'CV': True, - 'CVargs': [] } ACTIONS = ['predict', 'score', 'append', 'train', 'retrain'] DEFAULT_TEMPLATE = op.join(op.dirname(__file__), '..', 'apps', 'mlhandler', 'template.html') @@ -121,9 +119,7 @@ def setup(cls, data=None, model={}, config_dir='', **kwargs): target = data[target_col] train = data[[c for c in data if c != target_col]] # cross validation - print('yayyy we are here') cls.CrossValidation(train,target) - print('should have printed') gramex.service.threadpool.submit( _fit, cls.model, train, target, cls.model_path, cls.name) cls.config_store.flush() @@ -139,15 +135,11 @@ def modelFunction(cls, mclass = ''): @classmethod def CrossValidation(cls,train,target): mod = cls.modelFunction() - CV = cls.get_opt('CV') #can edit to make CV true/false etc. + CV = cls.get_opt('CV') if CV: - CVargs = cls.get_opt('CVargs') - if CVargs: - CVscore = cross_val_score(mod, X=train, y=target, **literal_eval(json.dumps(CVargs))) - else: - CVscore = cross_val_score(mod, train, target) - CV = sum(CVscore)/len(CVscore) - print('CV score: ', CV) + CVscore = cross_val_score(mod, X=train, y=target, **literal_eval(json.dumps(CV))) + CVavg = sum(CVscore)/len(CVscore) + print('Cross Validation Score : ',CVavg) @classmethod def load_data(cls, default=pd.DataFrame()): @@ -380,7 +372,6 @@ def _train(self, data=None): target = data[target_col] train = data[[c for c in data if c != target_col]] self.model = self._assemble_pipeline(data, force=True) - print('IN TRAIN') self.CrossValidation(train,target) _fit(self.model, train, target, self.model_path) return {'score': self.model.score(train, target)} @@ -392,8 +383,6 @@ def _score(self): self._check_model_path() data = self._parse_data(False) target_col = self.get_argument('target_col', self.get_opt('target_col')) - print('IN _SCORE') - #self.CrossValidation(data,target_col) self.set_opt('target_col', target_col) return {'score': self._predict(data, target_col)} From eaa58dcb06477e9e9d8f4bbd536b97ac20c7bcc9 Mon Sep 17 00:00:00 2001 From: Prakruti Singh Date: Thu, 27 May 2021 21:35:04 +0530 Subject: [PATCH 11/12] final suggested changes done --- gramex/handlers/mlhandler.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/gramex/handlers/mlhandler.py b/gramex/handlers/mlhandler.py index 45fb3d8be..1fbe1d47a 100644 --- a/gramex/handlers/mlhandler.py +++ b/gramex/handlers/mlhandler.py @@ -22,7 +22,6 @@ from tornado.web import HTTPError from sklearn.metrics import get_scorer from sklearn.model_selection import cross_val_predict, cross_val_score -from ast import literal_eval op = os.path MLCLASS_MODULES = [ @@ -43,7 +42,7 @@ 'nums': [], 'cats': [], 'target_col': None, - 'CV': True, + 'cv': True, } ACTIONS = ['predict', 'score', 'append', 'train', 'retrain'] DEFAULT_TEMPLATE = op.join(op.dirname(__file__), '..', 'apps', 'mlhandler', 'template.html') @@ -119,25 +118,16 @@ def setup(cls, data=None, model={}, config_dir='', **kwargs): target = data[target_col] train = data[[c for c in data if c != target_col]] # cross validation - cls.CrossValidation(train,target) + cls.cross_validation(train,target) gramex.service.threadpool.submit( _fit, cls.model, train, target, cls.model_path, cls.name) cls.config_store.flush() - - @classmethod - def modelFunction(cls, mclass = ''): - model_kwargs = cls.config_store.load('model', {}) - mclass = model_kwargs.get('class', False) - if mclass: - model = search_modelclass(mclass)(**model_kwargs.get('params', {})) - return model - + @classmethod - def CrossValidation(cls,train,target): - mod = cls.modelFunction() - CV = cls.get_opt('CV') - if CV: - CVscore = cross_val_score(mod, X=train, y=target, **literal_eval(json.dumps(CV))) + def cross_validation(cls,train,target): + cv = cls.get_opt('cv',True) + if cv: + CVscore = cross_val_score(cls.model.steps[-1][1], X=train, y=target, cv=cv) CVavg = sum(CVscore)/len(CVscore) print('Cross Validation Score : ',CVavg) @@ -372,7 +362,7 @@ def _train(self, data=None): target = data[target_col] train = data[[c for c in data if c != target_col]] self.model = self._assemble_pipeline(data, force=True) - self.CrossValidation(train,target) + self.cross_validation(train,target) _fit(self.model, train, target, self.model_path) return {'score': self.model.score(train, target)} From 5f631c108f7834e65febc80a7ef5c0ee7c1f9be8 Mon Sep 17 00:00:00 2001 From: Prakruti Singh Date: Thu, 3 Jun 2021 11:26:41 +0530 Subject: [PATCH 12/12] resolved conflicts --- .gitignore | 1 + gramex/handlers/mlhandler.py | 7 ------- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 1dc7b5b9e..2dac4bd7b 100644 --- a/.gitignore +++ b/.gitignore @@ -155,3 +155,4 @@ docs/_build # License copied to conda build_dir pkg/conda/LICENSE +env/* \ No newline at end of file diff --git a/gramex/handlers/mlhandler.py b/gramex/handlers/mlhandler.py index 4c6f6fdfb..1fbe1d47a 100644 --- a/gramex/handlers/mlhandler.py +++ b/gramex/handlers/mlhandler.py @@ -21,10 +21,7 @@ from tornado.gen import coroutine from tornado.web import HTTPError from sklearn.metrics import get_scorer -<<<<<<< HEAD from sklearn.model_selection import cross_val_predict, cross_val_score -======= ->>>>>>> 01cef31e4559be82862ae6b0977358ac661b6a70 op = os.path MLCLASS_MODULES = [ @@ -44,12 +41,8 @@ 'pipeline': True, 'nums': [], 'cats': [], -<<<<<<< HEAD 'target_col': None, 'cv': True, -======= - 'target_col': None ->>>>>>> 01cef31e4559be82862ae6b0977358ac661b6a70 } ACTIONS = ['predict', 'score', 'append', 'train', 'retrain'] DEFAULT_TEMPLATE = op.join(op.dirname(__file__), '..', 'apps', 'mlhandler', 'template.html')