javedsha · raj-adroit · Apr 30, 2019
diff --git a/Text+Classification+using+python,+scikit+and+nltk.ipynb b/Text+Classification+using+python,+scikit+and+nltk.ipynb
@@ -3,9 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "#Loading the data set - training data.\n",
@@ -15,10 +13,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 2,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -45,7 +41,7 @@
        " 'talk.religion.misc']"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -57,10 +53,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 3,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -78,18 +72,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 4,
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "(11314, 130107)"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -104,18 +96,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 5,
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "(11314, 130107)"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -130,10 +120,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 6,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Machine Learning\n",
@@ -144,10 +132,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 7,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Building a pipeline: We can write less code and do all of the above, by building a pipeline as follows:\n",
@@ -162,18 +148,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 8,
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "0.7738980350504514"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -188,26 +172,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 12,
+   "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\javedsha\\AppData\\Local\\Continuum\\Anaconda2\\lib\\site-packages\\sklearn\\linear_model\\stochastic_gradient.py:73: DeprecationWarning: n_iter parameter is deprecated in 0.19 and will be removed in 0.21. Use max_iter and tol instead.\n",
-      "  DeprecationWarning)\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
-       "0.82381837493361654"
+       "0.8238183749336165"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -217,7 +191,7 @@
     "\n",
     "from sklearn.linear_model import SGDClassifier\n",
     "text_clf_svm = Pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()),\n",
-    "                         ('clf-svm', SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, n_iter=5, random_state=42))])\n",
+    "                         ('clf-svm', SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, max_iter=5, tol=0.21, random_state=42))])\n",
     "\n",
     "text_clf_svm = text_clf_svm.fit(twenty_train.data, twenty_train.target)\n",
     "predicted_svm = text_clf_svm.predict(twenty_test.data)\n",
@@ -259,9 +233,7 @@
   {
    "cell_type": "code",
    "execution_count": 23,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -287,9 +259,7 @@
   {
    "cell_type": "code",
    "execution_count": 24,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -333,9 +303,7 @@
   {
    "cell_type": "code",
    "execution_count": 26,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -407,7 +375,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython2",
-   "version": "2.7.13"
+   "version": "2.7.16"
   }
  },
  "nbformat": 4,

diff --git a/Text+Classification+using+python,+scikit+and+nltk.py b/Text+Classification+using+python,+scikit+and+nltk.py
@@ -72,7 +72,7 @@
 
 from sklearn.linear_model import SGDClassifier
 text_clf_svm = Pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()),
-                         ('clf-svm', SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, n_iter=5, random_state=42))])
+                         ('clf-svm', SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, max_iter=5, tol=0.21, random_state=42))])
 
 text_clf_svm = text_clf_svm.fit(twenty_train.data, twenty_train.target)
 predicted_svm = text_clf_svm.predict(twenty_test.data)