diff --git a/Riya_Agrahari.ipynb b/Riya_Agrahari.ipynb new file mode 100644 index 0000000..0909934 --- /dev/null +++ b/Riya_Agrahari.ipynb @@ -0,0 +1,1273 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "MWuMU9_NxGPR" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import sklearn" + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.datasets import load_boston\n", + "df = load_boston()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "T2M8Ialmx7cN", + "outputId": "1a56c068-21ef-4335-f540-cea63954e856" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function load_boston is deprecated; `load_boston` is deprecated in 1.0 and will be removed in 1.2.\n", + "\n", + " The Boston housing prices dataset has an ethical problem. You can refer to\n", + " the documentation of this function for further details.\n", + "\n", + " The scikit-learn maintainers therefore strongly discourage the use of this\n", + " dataset unless the purpose of the code is to study and educate about\n", + " ethical issues in data science and machine learning.\n", + "\n", + " In this special case, you can fetch the dataset from the original\n", + " source::\n", + "\n", + " import pandas as pd\n", + " import numpy as np\n", + "\n", + "\n", + " data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n", + " raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n", + " data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n", + " target = raw_df.values[1::2, 2]\n", + "\n", + " Alternative datasets include the California housing dataset (i.e.\n", + " :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing\n", + " dataset. You can load the datasets as follows::\n", + "\n", + " from sklearn.datasets import fetch_california_housing\n", + " housing = fetch_california_housing()\n", + "\n", + " for the California housing dataset and::\n", + "\n", + " from sklearn.datasets import fetch_openml\n", + " housing = fetch_openml(name=\"house_prices\", as_frame=True)\n", + "\n", + " for the Ames housing dataset.\n", + " \n", + " warnings.warn(msg, category=FutureWarning)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "df.keys()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4Wz5kUKjypxx", + "outputId": "1d41e145-fd55-4cc2-dba9-bcc6c8f84413" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename', 'data_module'])" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(df.DESCR)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZtK71kP6zpz7", + "outputId": "9d1f63b5-c20e-4ea8-8bb3-4021e0a5359d" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + ".. _boston_dataset:\n", + "\n", + "Boston house prices dataset\n", + "---------------------------\n", + "\n", + "**Data Set Characteristics:** \n", + "\n", + " :Number of Instances: 506 \n", + "\n", + " :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.\n", + "\n", + " :Attribute Information (in order):\n", + " - CRIM per capita crime rate by town\n", + " - ZN proportion of residential land zoned for lots over 25,000 sq.ft.\n", + " - INDUS proportion of non-retail business acres per town\n", + " - CHAS Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)\n", + " - NOX nitric oxides concentration (parts per 10 million)\n", + " - RM average number of rooms per dwelling\n", + " - AGE proportion of owner-occupied units built prior to 1940\n", + " - DIS weighted distances to five Boston employment centres\n", + " - RAD index of accessibility to radial highways\n", + " - TAX full-value property-tax rate per $10,000\n", + " - PTRATIO pupil-teacher ratio by town\n", + " - B 1000(Bk - 0.63)^2 where Bk is the proportion of black people by town\n", + " - LSTAT % lower status of the population\n", + " - MEDV Median value of owner-occupied homes in $1000's\n", + "\n", + " :Missing Attribute Values: None\n", + "\n", + " :Creator: Harrison, D. and Rubinfeld, D.L.\n", + "\n", + "This is a copy of UCI ML housing dataset.\n", + "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/\n", + "\n", + "\n", + "This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.\n", + "\n", + "The Boston house-price data of Harrison, D. and Rubinfeld, D.L. 'Hedonic\n", + "prices and the demand for clean air', J. Environ. Economics & Management,\n", + "vol.5, 81-102, 1978. Used in Belsley, Kuh & Welsch, 'Regression diagnostics\n", + "...', Wiley, 1980. N.B. Various transformations are used in the table on\n", + "pages 244-261 of the latter.\n", + "\n", + "The Boston house-price data has been used in many machine learning papers that address regression\n", + "problems. \n", + " \n", + ".. topic:: References\n", + "\n", + " - Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.\n", + " - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(df.filename)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UxLYtQmEz6iy", + "outputId": "a6b6f3ce-ad31-4468-f6b4-7ca812004a3d" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "boston_house_prices.csv\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(df.target)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "EPQ5yW9m0NP6", + "outputId": "e8ebe20b-b097-4474-d516-f129f57a2322" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[24. 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 15. 18.9 21.7 20.4\n", + " 18.2 19.9 23.1 17.5 20.2 18.2 13.6 19.6 15.2 14.5 15.6 13.9 16.6 14.8\n", + " 18.4 21. 12.7 14.5 13.2 13.1 13.5 18.9 20. 21. 24.7 30.8 34.9 26.6\n", + " 25.3 24.7 21.2 19.3 20. 16.6 14.4 19.4 19.7 20.5 25. 23.4 18.9 35.4\n", + " 24.7 31.6 23.3 19.6 18.7 16. 22.2 25. 33. 23.5 19.4 22. 17.4 20.9\n", + " 24.2 21.7 22.8 23.4 24.1 21.4 20. 20.8 21.2 20.3 28. 23.9 24.8 22.9\n", + " 23.9 26.6 22.5 22.2 23.6 28.7 22.6 22. 22.9 25. 20.6 28.4 21.4 38.7\n", + " 43.8 33.2 27.5 26.5 18.6 19.3 20.1 19.5 19.5 20.4 19.8 19.4 21.7 22.8\n", + " 18.8 18.7 18.5 18.3 21.2 19.2 20.4 19.3 22. 20.3 20.5 17.3 18.8 21.4\n", + " 15.7 16.2 18. 14.3 19.2 19.6 23. 18.4 15.6 18.1 17.4 17.1 13.3 17.8\n", + " 14. 14.4 13.4 15.6 11.8 13.8 15.6 14.6 17.8 15.4 21.5 19.6 15.3 19.4\n", + " 17. 15.6 13.1 41.3 24.3 23.3 27. 50. 50. 50. 22.7 25. 50. 23.8\n", + " 23.8 22.3 17.4 19.1 23.1 23.6 22.6 29.4 23.2 24.6 29.9 37.2 39.8 36.2\n", + " 37.9 32.5 26.4 29.6 50. 32. 29.8 34.9 37. 30.5 36.4 31.1 29.1 50.\n", + " 33.3 30.3 34.6 34.9 32.9 24.1 42.3 48.5 50. 22.6 24.4 22.5 24.4 20.\n", + " 21.7 19.3 22.4 28.1 23.7 25. 23.3 28.7 21.5 23. 26.7 21.7 27.5 30.1\n", + " 44.8 50. 37.6 31.6 46.7 31.5 24.3 31.7 41.7 48.3 29. 24. 25.1 31.5\n", + " 23.7 23.3 22. 20.1 22.2 23.7 17.6 18.5 24.3 20.5 24.5 26.2 24.4 24.8\n", + " 29.6 42.8 21.9 20.9 44. 50. 36. 30.1 33.8 43.1 48.8 31. 36.5 22.8\n", + " 30.7 50. 43.5 20.7 21.1 25.2 24.4 35.2 32.4 32. 33.2 33.1 29.1 35.1\n", + " 45.4 35.4 46. 50. 32.2 22. 20.1 23.2 22.3 24.8 28.5 37.3 27.9 23.9\n", + " 21.7 28.6 27.1 20.3 22.5 29. 24.8 22. 26.4 33.1 36.1 28.4 33.4 28.2\n", + " 22.8 20.3 16.1 22.1 19.4 21.6 23.8 16.2 17.8 19.8 23.1 21. 23.8 23.1\n", + " 20.4 18.5 25. 24.6 23. 22.2 19.3 22.6 19.8 17.1 19.4 22.2 20.7 21.1\n", + " 19.5 18.5 20.6 19. 18.7 32.7 16.5 23.9 31.2 17.5 17.2 23.1 24.5 26.6\n", + " 22.9 24.1 18.6 30.1 18.2 20.6 17.8 21.7 22.7 22.6 25. 19.9 20.8 16.8\n", + " 21.9 27.5 21.9 23.1 50. 50. 50. 50. 50. 13.8 13.8 15. 13.9 13.3\n", + " 13.1 10.2 10.4 10.9 11.3 12.3 8.8 7.2 10.5 7.4 10.2 11.5 15.1 23.2\n", + " 9.7 13.8 12.7 13.1 12.5 8.5 5. 6.3 5.6 7.2 12.1 8.3 8.5 5.\n", + " 11.9 27.9 17.2 27.5 15. 17.2 17.9 16.3 7. 7.2 7.5 10.4 8.8 8.4\n", + " 16.7 14.2 20.8 13.4 11.7 8.3 10.2 10.9 11. 9.5 14.5 14.1 16.1 14.3\n", + " 11.7 13.4 9.6 8.7 8.4 12.8 10.5 17.1 18.4 15.4 10.8 11.8 14.9 12.6\n", + " 14.1 13. 13.4 15.2 16.1 17.8 14.9 14.1 12.7 13.5 14.9 20. 16.4 17.7\n", + " 19.5 20.2 21.4 19.9 19. 19.1 19.1 20.1 19.9 19.6 23.2 29.8 13.8 13.3\n", + " 16.7 12. 14.6 21.4 23. 23.7 25. 21.8 20.6 21.2 19.1 20.6 15.2 7.\n", + " 8.1 13.6 20.1 21.8 24.5 23.1 19.7 18.3 21.2 17.5 16.8 22.4 20.6 23.9\n", + " 22. 11.9]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "boston= pd.DataFrame(df.data, columns = df.feature_names)\n", + "boston.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 270 + }, + "id": "po5gzj1z0cYq", + "outputId": "4b56e29a-8fdf-4f77-fa9d-5528dd7a2451" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \\\n", + "0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \n", + "1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n", + "2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n", + "3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n", + "4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n", + "\n", + " PTRATIO B LSTAT \n", + "0 15.3 396.90 4.98 \n", + "1 17.8 396.90 9.14 \n", + "2 17.8 392.83 4.03 \n", + "3 18.7 394.63 2.94 \n", + "4 18.7 396.90 5.33 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTAT
00.0063218.02.310.00.5386.57565.24.09001.0296.015.3396.904.98
10.027310.07.070.00.4696.42178.94.96712.0242.017.8396.909.14
20.027290.07.070.00.4697.18561.14.96712.0242.017.8392.834.03
30.032370.02.180.00.4586.99845.86.06223.0222.018.7394.632.94
40.069050.02.180.00.4587.14754.26.06223.0222.018.7396.905.33
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "boston['MEDV']=df.target\n", + "boston.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 270 + }, + "id": "fs9Y3jHf1OoE", + "outputId": "1e1699fa-cf8d-460a-dc60-61df98bc658e" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \\\n", + "0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \n", + "1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n", + "2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n", + "3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n", + "4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n", + "\n", + " PTRATIO B LSTAT MEDV \n", + "0 15.3 396.90 4.98 24.0 \n", + "1 17.8 396.90 9.14 21.6 \n", + "2 17.8 392.83 4.03 34.7 \n", + "3 18.7 394.63 2.94 33.4 \n", + "4 18.7 396.90 5.33 36.2 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATMEDV
00.0063218.02.310.00.5386.57565.24.09001.0296.015.3396.904.9824.0
10.027310.07.070.00.4696.42178.94.96712.0242.017.8396.909.1421.6
20.027290.07.070.00.4697.18561.14.96712.0242.017.8392.834.0334.7
30.032370.02.180.00.4586.99845.86.06223.0222.018.7394.632.9433.4
40.069050.02.180.00.4587.14754.26.06223.0222.018.7396.905.3336.2
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "boston.isnull()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 488 + }, + "id": "e9kvXOra2Tjl", + "outputId": "44d091e4-25e6-4dd3-83fc-f75a3bd16f4e" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \\\n", + "0 False False False False False False False False False False \n", + "1 False False False False False False False False False False \n", + "2 False False False False False False False False False False \n", + "3 False False False False False False False False False False \n", + "4 False False False False False False False False False False \n", + ".. ... ... ... ... ... ... ... ... ... ... \n", + "501 False False False False False False False False False False \n", + "502 False False False False False False False False False False \n", + "503 False False False False False False False False False False \n", + "504 False False False False False False False False False False \n", + "505 False False False False False False False False False False \n", + "\n", + " PTRATIO B LSTAT MEDV \n", + "0 False False False False \n", + "1 False False False False \n", + "2 False False False False \n", + "3 False False False False \n", + "4 False False False False \n", + ".. ... ... ... ... \n", + "501 False False False False \n", + "502 False False False False \n", + "503 False False False False \n", + "504 False False False False \n", + "505 False False False False \n", + "\n", + "[506 rows x 14 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATMEDV
0FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
1FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
2FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
3FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
4FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
.............................................
501FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
502FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
503FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
504FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
505FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
\n", + "

506 rows × 14 columns

\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "boston.isnull().sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fxs_L1_H2qOk", + "outputId": "1292dad1-b35c-43cb-b1c5-46c24a180dc0" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "CRIM 0\n", + "ZN 0\n", + "INDUS 0\n", + "CHAS 0\n", + "NOX 0\n", + "RM 0\n", + "AGE 0\n", + "DIS 0\n", + "RAD 0\n", + "TAX 0\n", + "PTRATIO 0\n", + "B 0\n", + "LSTAT 0\n", + "MEDV 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X = boston.drop('MEDV', axis=1)\n", + "Y = boston['MEDV']\n", + "X_train,X_test,Y_train,Y_test= train_test_split(X,Y,test_size =0.15, random_state=5)\n", + "print(X_train.shape)\n", + "print(X_test.shape)\n", + "print(Y_train.shape)\n", + "print(Y_test.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fvRAOtIt21Oq", + "outputId": "e4c53baa-77e0-436a-dcd5-f944e36e0018" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(430, 13)\n", + "(76, 13)\n", + "(430,)\n", + "(76,)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import mean_squared_error" + ], + "metadata": { + "id": "5gUqttGU5Bhe" + }, + "execution_count": 22, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "lin_model = LinearRegression()\n", + "lin_model.fit(X_train, Y_train)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "U1Tsfl995ypK", + "outputId": "7a44db4c-4408-4d2c-ada6-0af156ec78dc" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y_train_predict = lin_model.predict(X_train)\n", + "rmse = (np.sqrt(mean_squared_error(Y_train, y_train_predict)))\n", + "\n", + "print(\"The model performance for training set\")\n", + "print('RMSE IS {}'.format(rmse))\n", + "print(\"\\n\")\n", + "\n", + "y_test_predict = lin_model.predict(X_test)\n", + "rmse = (np.sqrt(mean_squared_error(Y_test,y_test_predict)))\n", + "\n", + "print(\"The model performance for testing set\")\n", + "print('RMSE is {}'.format(rmse))\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "A9KNyTeS6dOG", + "outputId": "54828583-4676-4aed-aa55-68d84fa28dd7" + }, + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The model performance for training set\n", + "RMSE IS 4.710901797319796\n", + "\n", + "\n", + "The model performance for testing set\n", + "RMSE is 4.687543527902972\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "uHz8EHFK8sEf" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file