From 0e5d5aa1d51c8effd1693512b68967120d35fa76 Mon Sep 17 00:00:00 2001 From: gsingh58 <gurmail-singh@wisc.edu> Date: Thu, 11 Apr 2024 06:48:36 -0500 Subject: [PATCH] lec19 and lec20 updated --- .../19-regression-1/19-regression1.ipynb | 223 ++-- .../19-regression-1/19-regression1_001.ipynb | 3 +- .../19-regression-1/19-regression1_002.ipynb | 3 +- .../20-regression-2/20-regression2.ipynb | 1009 ++++++++++++++--- ...on1_001.ipynb => 20-regression2_001.ipynb} | 299 ++++- ...on1_002.ipynb => 20-regression2_002.ipynb} | 299 ++++- 6 files changed, 1558 insertions(+), 278 deletions(-) rename lecture_material/20-regression-2/{20-regression1_001.ipynb => 20-regression2_001.ipynb} (98%) rename lecture_material/20-regression-2/{20-regression1_002.ipynb => 20-regression2_002.ipynb} (98%) diff --git a/lecture_material/19-regression-1/19-regression1.ipynb b/lecture_material/19-regression-1/19-regression1.ipynb index 80f8f5b..84fbd59 100644 --- a/lecture_material/19-regression-1/19-regression1.ipynb +++ b/lecture_material/19-regression-1/19-regression1.ipynb @@ -1953,124 +1953,124 @@ " </thead>\n", " <tbody>\n", " <tr>\n", - " <th>1203</th>\n", + " <th>1002</th>\n", " <td>2022-04-07 19:00:00+00:00</td>\n", - " <td>55079070100</td>\n", + " <td>55117000900</td>\n", " <td>None</td>\n", - " <td>962</td>\n", - " <td>0</td>\n", - " <td>0.86</td>\n", + " <td>1620</td>\n", " <td>0</td>\n", + " <td>0.29</td>\n", + " <td>15</td>\n", " <td>0</td>\n", " <td>0.0</td>\n", " <td>0</td>\n", " <td>...</td>\n", - " <td>432</td>\n", - " <td>518</td>\n", + " <td>736</td>\n", + " <td>809</td>\n", " <td>2022-04-06 06:00:00+00:00</td>\n", - " <td>4312</td>\n", - " <td>174.0</td>\n", - " <td>2957</td>\n", + " <td>5217</td>\n", + " <td>306.0</td>\n", + " <td>2755</td>\n", " <td>Census tract</td>\n", - " <td>3.024857e+06</td>\n", - " <td>10099.546360</td>\n", - " <td>POLYGON ((-87.90633 43.13312, -87.90820 43.133...</td>\n", + " <td>1.721632e+07</td>\n", + " <td>20245.615434</td>\n", + " <td>POLYGON ((-87.72310 43.73206, -87.72307 43.732...</td>\n", " </tr>\n", " <tr>\n", - " <th>1191</th>\n", + " <th>550</th>\n", " <td>2022-04-07 19:00:00+00:00</td>\n", - " <td>55079185700</td>\n", + " <td>55093960100</td>\n", " <td>None</td>\n", - " <td>595</td>\n", + " <td>697</td>\n", " <td>0</td>\n", - " <td>0.00</td>\n", - " <td>2</td>\n", + " <td>0.14</td>\n", + " <td>4</td>\n", " <td>0</td>\n", " <td>0.0</td>\n", " <td>0</td>\n", " <td>...</td>\n", - " <td>298</td>\n", - " <td>266</td>\n", + " <td>460</td>\n", + " <td>215</td>\n", " <td>2022-04-06 06:00:00+00:00</td>\n", - " <td>1877</td>\n", - " <td>383.0</td>\n", - " <td>2945</td>\n", + " <td>2895</td>\n", + " <td>154.0</td>\n", + " <td>2238</td>\n", " <td>Census tract</td>\n", - " <td>1.254436e+06</td>\n", - " <td>4482.212537</td>\n", - " <td>POLYGON ((-87.91698 43.07116, -87.91726 43.071...</td>\n", + " <td>3.401782e+08</td>\n", + " <td>79266.666247</td>\n", + " <td>POLYGON ((-92.13636 44.85786, -92.14036 44.857...</td>\n", " </tr>\n", " <tr>\n", - " <th>389</th>\n", + " <th>777</th>\n", " <td>2022-04-07 19:00:00+00:00</td>\n", - " <td>55079101200</td>\n", + " <td>55017010100</td>\n", " <td>None</td>\n", - " <td>915</td>\n", - " <td>0</td>\n", - " <td>0.29</td>\n", - " <td>33</td>\n", + " <td>822</td>\n", + " <td>1</td>\n", + " <td>0.43</td>\n", + " <td>2</td>\n", " <td>0</td>\n", " <td>0.0</td>\n", " <td>0</td>\n", " <td>...</td>\n", - " <td>379</td>\n", - " <td>469</td>\n", + " <td>581</td>\n", + " <td>221</td>\n", " <td>2022-04-06 06:00:00+00:00</td>\n", - " <td>3184</td>\n", - " <td>202.0</td>\n", - " <td>1363</td>\n", + " <td>2992</td>\n", + " <td>153.0</td>\n", + " <td>2529</td>\n", " <td>Census tract</td>\n", - " <td>2.511556e+06</td>\n", - " <td>6796.635429</td>\n", - " <td>POLYGON ((-88.01756 42.98824, -88.01756 42.988...</td>\n", + " <td>3.245227e+07</td>\n", + " <td>25883.820359</td>\n", + " <td>POLYGON ((-91.43747 44.85741, -91.43761 44.861...</td>\n", " </tr>\n", " <tr>\n", - " <th>448</th>\n", + " <th>669</th>\n", " <td>2022-04-07 19:00:00+00:00</td>\n", - " <td>55079009500</td>\n", + " <td>55059001900</td>\n", " <td>None</td>\n", - " <td>547</td>\n", - " <td>3</td>\n", - " <td>0.43</td>\n", + " <td>549</td>\n", " <td>1</td>\n", + " <td>0.14</td>\n", + " <td>7</td>\n", " <td>0</td>\n", " <td>0.0</td>\n", " <td>0</td>\n", " <td>...</td>\n", - " <td>277</td>\n", - " <td>254</td>\n", + " <td>155</td>\n", + " <td>374</td>\n", " <td>2022-04-06 06:00:00+00:00</td>\n", - " <td>2113</td>\n", - " <td>150.0</td>\n", - " <td>1874</td>\n", + " <td>2717</td>\n", + " <td>377.0</td>\n", + " <td>2392</td>\n", " <td>Census tract</td>\n", - " <td>9.560618e+05</td>\n", - " <td>4005.196264</td>\n", - " <td>POLYGON ((-87.97244 43.05711, -87.97267 43.057...</td>\n", + " <td>2.687141e+06</td>\n", + " <td>9958.692789</td>\n", + " <td>POLYGON ((-87.81235 42.58064, -87.81474 42.580...</td>\n", " </tr>\n", " <tr>\n", - " <th>1012</th>\n", + " <th>1331</th>\n", " <td>2022-04-07 19:00:00+00:00</td>\n", - " <td>55027960300</td>\n", + " <td>55079080100</td>\n", " <td>None</td>\n", - " <td>3879</td>\n", + " <td>547</td>\n", " <td>0</td>\n", - " <td>0.57</td>\n", - " <td>29</td>\n", + " <td>1.14</td>\n", + " <td>2</td>\n", " <td>0</td>\n", " <td>0.0</td>\n", " <td>0</td>\n", " <td>...</td>\n", - " <td>1363</td>\n", - " <td>2386</td>\n", + " <td>231</td>\n", + " <td>307</td>\n", " <td>2022-04-06 06:00:00+00:00</td>\n", - " <td>7928</td>\n", - " <td>40.0</td>\n", - " <td>2766</td>\n", + " <td>2700</td>\n", + " <td>152.0</td>\n", + " <td>3191</td>\n", " <td>Census tract</td>\n", - " <td>2.580188e+07</td>\n", - " <td>27162.474970</td>\n", - " <td>POLYGON ((-88.77561 43.59656, -88.77558 43.596...</td>\n", + " <td>1.869090e+06</td>\n", + " <td>6262.002770</td>\n", + " <td>POLYGON ((-87.89722 43.10022, -87.89845 43.100...</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -2079,46 +2079,46 @@ ], "text/plain": [ " RptDt GEOID GEOName POS_CUM_CP POS_NEW_CP \\\n", - "1203 2022-04-07 19:00:00+00:00 55079070100 None 962 0 \n", - "1191 2022-04-07 19:00:00+00:00 55079185700 None 595 0 \n", - "389 2022-04-07 19:00:00+00:00 55079101200 None 915 0 \n", - "448 2022-04-07 19:00:00+00:00 55079009500 None 547 3 \n", - "1012 2022-04-07 19:00:00+00:00 55027960300 None 3879 0 \n", + "1002 2022-04-07 19:00:00+00:00 55117000900 None 1620 0 \n", + "550 2022-04-07 19:00:00+00:00 55093960100 None 697 0 \n", + "777 2022-04-07 19:00:00+00:00 55017010100 None 822 1 \n", + "669 2022-04-07 19:00:00+00:00 55059001900 None 549 1 \n", + "1331 2022-04-07 19:00:00+00:00 55079080100 None 547 0 \n", "\n", " POS_7DAYAVG_CP DTH_CUM_CP DTH_NEW_CP DTH_7DAYAVG_CP \\\n", - "1203 0.86 0 0 0.0 \n", - "1191 0.00 2 0 0.0 \n", - "389 0.29 33 0 0.0 \n", - "448 0.43 1 0 0.0 \n", - "1012 0.57 29 0 0.0 \n", + "1002 0.29 15 0 0.0 \n", + "550 0.14 4 0 0.0 \n", + "777 0.43 2 0 0.0 \n", + "669 0.14 7 0 0.0 \n", + "1331 1.14 2 0 0.0 \n", "\n", " DTH_OVER_30DAYS_CP ... HOSP_NO_CP HOSP_UNK_CP \\\n", - "1203 0 ... 432 518 \n", - "1191 0 ... 298 266 \n", - "389 0 ... 379 469 \n", - "448 0 ... 277 254 \n", - "1012 0 ... 1363 2386 \n", + "1002 0 ... 736 809 \n", + "550 0 ... 460 215 \n", + "777 0 ... 581 221 \n", + "669 0 ... 155 374 \n", + "1331 0 ... 231 307 \n", "\n", " Date POP POP_MOE OBJECTID GEO \\\n", - "1203 2022-04-06 06:00:00+00:00 4312 174.0 2957 Census tract \n", - "1191 2022-04-06 06:00:00+00:00 1877 383.0 2945 Census tract \n", - "389 2022-04-06 06:00:00+00:00 3184 202.0 1363 Census tract \n", - "448 2022-04-06 06:00:00+00:00 2113 150.0 1874 Census tract \n", - "1012 2022-04-06 06:00:00+00:00 7928 40.0 2766 Census tract \n", + "1002 2022-04-06 06:00:00+00:00 5217 306.0 2755 Census tract \n", + "550 2022-04-06 06:00:00+00:00 2895 154.0 2238 Census tract \n", + "777 2022-04-06 06:00:00+00:00 2992 153.0 2529 Census tract \n", + "669 2022-04-06 06:00:00+00:00 2717 377.0 2392 Census tract \n", + "1331 2022-04-06 06:00:00+00:00 2700 152.0 3191 Census tract \n", "\n", " ShapeSTArea ShapeSTLength \\\n", - "1203 3.024857e+06 10099.546360 \n", - "1191 1.254436e+06 4482.212537 \n", - "389 2.511556e+06 6796.635429 \n", - "448 9.560618e+05 4005.196264 \n", - "1012 2.580188e+07 27162.474970 \n", + "1002 1.721632e+07 20245.615434 \n", + "550 3.401782e+08 79266.666247 \n", + "777 3.245227e+07 25883.820359 \n", + "669 2.687141e+06 9958.692789 \n", + "1331 1.869090e+06 6262.002770 \n", "\n", " geometry \n", - "1203 POLYGON ((-87.90633 43.13312, -87.90820 43.133... \n", - "1191 POLYGON ((-87.91698 43.07116, -87.91726 43.071... \n", - "389 POLYGON ((-88.01756 42.98824, -88.01756 42.988... \n", - "448 POLYGON ((-87.97244 43.05711, -87.97267 43.057... \n", - "1012 POLYGON ((-88.77561 43.59656, -88.77558 43.596... \n", + "1002 POLYGON ((-87.72310 43.73206, -87.72307 43.732... \n", + "550 POLYGON ((-92.13636 44.85786, -92.14036 44.857... \n", + "777 POLYGON ((-91.43747 44.85741, -91.43761 44.861... \n", + "669 POLYGON ((-87.81235 42.58064, -87.81474 42.580... \n", + "1331 POLYGON ((-87.89722 43.10022, -87.89845 43.100... \n", "\n", "[5 rows x 90 columns]" ] @@ -2143,7 +2143,7 @@ { "data": { "text/plain": [ - "0.16858602389122834" + "0.15656136953145716" ] }, "execution_count": 42, @@ -2195,7 +2195,7 @@ { "data": { "text/plain": [ - "array([0.22428886, 0.09466345, 0.25987694, 0.12149025, 0.2110579 ])" + "array([0.18224501, 0.17362946, 0.15636298, 0.17604786, 0.16674181])" ] }, "execution_count": 43, @@ -2220,7 +2220,7 @@ { "data": { "text/plain": [ - "0.1822754791451171" + "0.1710054227127618" ] }, "execution_count": 44, @@ -2265,7 +2265,7 @@ { "data": { "text/plain": [ - "0.1822754791451171" + "0.1710054227127618" ] }, "execution_count": 46, @@ -2286,7 +2286,7 @@ { "data": { "text/plain": [ - "0.21847807530968777" + "0.22423438769788911" ] }, "execution_count": 47, @@ -2324,7 +2324,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "<Figure size 300x300 with 1 Axes>" ] @@ -2356,7 +2356,7 @@ { "data": { "text/plain": [ - "0.06322381180319962" + "0.008845559484288846" ] }, "execution_count": 49, @@ -2377,7 +2377,7 @@ { "data": { "text/plain": [ - "0.06703675246344569" + "0.029473832647323676" ] }, "execution_count": 50, @@ -2398,8 +2398,8 @@ { "data": { "text/plain": [ - "model1 0.063224\n", - "model2 0.067037\n", + "model1 0.008846\n", + "model2 0.029474\n", "dtype: float64" ] }, @@ -2432,7 +2432,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "<Figure size 300x300 with 1 Axes>" ] @@ -2471,7 +2471,7 @@ { "data": { "text/plain": [ - "0.675922025742224" + "0.6354908636406671" ] }, "execution_count": 53, @@ -2507,9 +2507,8 @@ { "data": { "text/plain": [ - "array([-3.07843136e-02, 2.30136853e-04, -9.79624336e-04, 1.38921261e-02,\n", - " 1.65612818e-02, -1.14307223e-02, 6.32603501e-03, 2.87701547e-02,\n", - " 1.77580170e-01, 2.86019994e-01])" + "array([-0.0246031 , -0.0023333 , 0.00082983, 0.00863443, 0.02272604,\n", + " -0.01537279, 0.01147649, 0.02750062, 0.16175569, 0.30770274])" ] }, "execution_count": 54, @@ -2539,7 +2538,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "<Figure size 300x200 with 1 Axes>" ] diff --git a/lecture_material/19-regression-1/19-regression1_001.ipynb b/lecture_material/19-regression-1/19-regression1_001.ipynb index 2edd0e8..1606fe4 100644 --- a/lecture_material/19-regression-1/19-regression1_001.ipynb +++ b/lecture_material/19-regression-1/19-regression1_001.ipynb @@ -754,7 +754,8 @@ "\n", "- requires `from sklearn.model_selection import cross_val_score`\n", "- do many different train/test splits of the values, fitting and scoring the model across each combination\n", - "- documentation: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_score.html" + "- cross validation documentation: https://scikit-learn.org/stable/modules/cross_validation.html\n", + "- function documentation: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_score.html" ] }, { diff --git a/lecture_material/19-regression-1/19-regression1_002.ipynb b/lecture_material/19-regression-1/19-regression1_002.ipynb index 2edd0e8..1606fe4 100644 --- a/lecture_material/19-regression-1/19-regression1_002.ipynb +++ b/lecture_material/19-regression-1/19-regression1_002.ipynb @@ -754,7 +754,8 @@ "\n", "- requires `from sklearn.model_selection import cross_val_score`\n", "- do many different train/test splits of the values, fitting and scoring the model across each combination\n", - "- documentation: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_score.html" + "- cross validation documentation: https://scikit-learn.org/stable/modules/cross_validation.html\n", + "- function documentation: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_score.html" ] }, { diff --git a/lecture_material/20-regression-2/20-regression2.ipynb b/lecture_material/20-regression-2/20-regression2.ipynb index 46516ba..fbfe4df 100644 --- a/lecture_material/20-regression-2/20-regression2.ipynb +++ b/lecture_material/20-regression-2/20-regression2.ipynb @@ -44,6 +44,29 @@ "df = gpd.read_file(dataset_file)" ] }, + { + "cell_type": "markdown", + "id": "64736014-bca3-4bc5-8f99-d08c5dc14bad", + "metadata": {}, + "source": [ + "### How well does our model fit the data?\n", + "- explained variance score\n", + "- R^2 (\"r squared\")" + ] + }, + { + "cell_type": "markdown", + "id": "2b452eac-c695-4e65-be5d-c4938e848944", + "metadata": {}, + "source": [ + "#### `sklearn.metrics.explained_variance_score(y_true, y_pred)`\n", + "- requires `import sklearn`\n", + "- calculates the explained variance score given:\n", + " - y_true: actual death values in our example\n", + " - y_pred: prediction of deaths in our example\n", + "- documentation: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.explained_variance_score.html" + ] + }, { "cell_type": "markdown", "id": "e3e73632", @@ -64,6 +87,328 @@ "df = df[df[\"GEOID\"].str.fullmatch(r\"\\d+\")]" ] }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4ec84d65-7b80-41af-8a35-579c15ac07ad", + "metadata": {}, + "outputs": [], + "source": [ + "xcols = [\"POP\"]\n", + "ycol = \"DTH_CUM_CP\"" + ] + }, + { + "cell_type": "markdown", + "id": "5a1a80c2-afa9-4a80-83e7-3471f31fce47", + "metadata": {}, + "source": [ + "### Let's use `LinearRegression` model.\n", + "\n", + "- `from sklearn.linear_model import LinearRegression`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3656144c-d475-4f85-8e32-949dd0b32f7b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LinearRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div>" + ], + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = LinearRegression()\n", + "model.fit(df[xcols], df[ycol])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f3dda9d9-9331-4ffa-a0da-e6769730b85e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([10.66799181, 8.62647161, 12.86546147, ..., 9.7606495 ,\n", + " 15.96419749, 8.59001589])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's now make predictions for the known data\n", + "predictions = model.predict(df[xcols])\n", + "predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0c99727d-bcf7-4e4a-a719-8c808ec063f6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.19407871463171344" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sklearn.metrics.explained_variance_score(df[ycol], predictions)" + ] + }, + { + "cell_type": "markdown", + "id": "2dc5c8e8-e6d0-42a1-9a1e-cb1eb2405cad", + "metadata": {}, + "source": [ + "#### Explained variance score\n", + "\n", + "- `explained_variance_score = (known_var - explained_variance) / known_var`\n", + " - where `known_var = y_true.var()` and `explained_variance = (y_true - y_pred).var()`" + ] + }, + { + "cell_type": "markdown", + "id": "19c29c12-4310-448a-87cc-0e8e227fcc3f", + "metadata": {}, + "source": [ + "What is the variation in known deaths?" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1eb001ad-c942-4d54-a042-a042da4321c3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "64.45856160704695" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compute variance of \"DTH_CUM_CP\" column\n", + "known_var = df[ycol].var()\n", + "known_var" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "23c1a630-53b3-4df1-928c-ea4b58401fc6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "51.94852682334216" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# explained_variance\n", + "explained_variance = (df[ycol] - predictions).var() \n", + "explained_variance" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "7faed709-1853-4a1a-9dd0-31c2543e11e6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.19407871463171342" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# explained_variance score\n", + "explained_variance_score = (known_var - explained_variance) / known_var\n", + "explained_variance_score" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "5256c668-af84-47f6-a85e-ca2dd62996b8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.19407871463171344" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# For comparison here is the explained variance score from sklearn\n", + "sklearn.metrics.explained_variance_score(df[ycol], predictions)" + ] + }, + { + "cell_type": "markdown", + "id": "8218a426-4270-4fcb-94af-0434396aa6d5", + "metadata": {}, + "source": [ + "#### `sklearn.metrics.r2_score(y_true, y_pred)`\n", + "\n", + "- requires `import sklearn`\n", + "- calculates the explained variance score given:\n", + " - y_true: actual death values in our example\n", + " - y_pred: prediction of deaths in our example\n", + "- documentation: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "8771e0f6-7ef5-46b2-bf2b-02c72643d106", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.19407871463171344" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sklearn.metrics.r2_score(df[ycol], predictions)" + ] + }, + { + "cell_type": "markdown", + "id": "6d33a3f8-fb48-4da5-833b-88f317af8bb9", + "metadata": {}, + "source": [ + "#### R^2 score (aka coefficient of determination) approximation\n", + "\n", + "- `r2_score = (known_var - r2_val) / known_var`\n", + " - where `known_var = y_true.var()` and `r2_val = ((y_true - y_pred) ** 2).mean()`" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "1a8f1cda-2958-4733-8e6b-e2c60e0e8adf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "51.911207479359874" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# r2_val\n", + "r2_val = ((df[ycol] - predictions) ** 2).mean()\n", + "r2_val" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "17e8027f-afd4-437a-95e5-9d72c90aaaec", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.1946576810723516" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r2_score = (known_var - r2_val) / known_var\n", + "r2_score # there might be minor rounding off differences" + ] + }, + { + "cell_type": "markdown", + "id": "4ff1ab9d-47fb-499a-b556-743f1234dc5c", + "metadata": {}, + "source": [ + "#### `model.score(X, y)`\n", + "- invokes `predict` method for calculating predictions (`y`) based on features (`X`) and compares the predictions with true values of y" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "1cb9dc46-a90d-4add-a7e4-aacd1424cfbf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.19407871463171344" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(df[xcols], df[ycol]) " + ] + }, { "cell_type": "markdown", "id": "1768f9a9", @@ -74,47 +419,403 @@ "- Split data into train and test" ] }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f3ff44f1-f7e6-44e6-863d-5e594b11c65f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "696" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Split the data into two equal parts\n", + "len(df) // 2" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "4860cfb4-b699-423d-98e4-1e1bf1e5b6af", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(696, 696)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Manual way of splitting train and test data\n", + "train, test = df.iloc[:len(df)//2], df.iloc[len(df)//2:]\n", + "len(train), len(test)" + ] + }, { "cell_type": "markdown", "id": "3a781391", "metadata": {}, "source": [ - "#### `train_test_split(<dataframe>, test_size=<val>)`\n", - "\n", - "- requires `from sklearn.model_selection import train_test_split`\n", - "- shuffles the data and then splits based on 75%-25% split between train and test\n", - " - produces new train and test data every single time\n", - "- `test_size` parameter can take two kind of values:\n", - " - actual number of rows that we want in test data\n", - " - fractional number representing the ratio of train versus test data\n", - " - default value is `0.25`\n", - "- documentation: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html" + "#### `train_test_split(<dataframe>, test_size=<val>)`\n", + "\n", + "- requires `from sklearn.model_selection import train_test_split`\n", + "- shuffles the data and then splits based on 75%-25% split between train and test\n", + " - produces new train and test data every single time\n", + "- `test_size` parameter can take two kind of values:\n", + " - actual number of rows that we want in test data\n", + " - fractional number representing the ratio of train versus test data\n", + " - default value is `0.25`\n", + "- documentation: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "577b5b14-05aa-47f6-bd05-ad72f6aaeee0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(['POP'], 'DTH_CUM_CP')" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xcols, ycol" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "213d6462-feda-471c-b36c-2ebcd20a3cd3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1044, 348)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train, test = train_test_split(df)\n", + "len(train), len(test)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "fc00271a-ebe5-4db7-b6a2-0fae2853a704", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1272, 120)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Test size using row count\n", + "train, test = train_test_split(df, test_size=120)\n", + "len(train), len(test)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "c69b74b1-31b6-4c0a-b2d0-80b48603a18a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(696, 696)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Test size using fraction\n", + "train, test = train_test_split(df, test_size=0.5)\n", + "len(train), len(test)" ] }, { "cell_type": "code", - "execution_count": 4, - "id": "5c3de0c2", + "execution_count": 22, + "id": "2a54abb7-3ffb-40ca-bab1-7743cef019f2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>RptDt</th>\n", + " <th>GEOID</th>\n", + " <th>GEOName</th>\n", + " <th>POS_CUM_CP</th>\n", + " <th>POS_NEW_CP</th>\n", + " <th>POS_7DAYAVG_CP</th>\n", + " <th>DTH_CUM_CP</th>\n", + " <th>DTH_NEW_CP</th>\n", + " <th>DTH_7DAYAVG_CP</th>\n", + " <th>DTH_OVER_30DAYS_CP</th>\n", + " <th>...</th>\n", + " <th>HOSP_NO_CP</th>\n", + " <th>HOSP_UNK_CP</th>\n", + " <th>Date</th>\n", + " <th>POP</th>\n", + " <th>POP_MOE</th>\n", + " <th>OBJECTID</th>\n", + " <th>GEO</th>\n", + " <th>ShapeSTArea</th>\n", + " <th>ShapeSTLength</th>\n", + " <th>geometry</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1345</th>\n", + " <td>2022-04-07 19:00:00+00:00</td>\n", + " <td>55079090600</td>\n", + " <td>None</td>\n", + " <td>1233</td>\n", + " <td>2</td>\n", + " <td>0.43</td>\n", + " <td>19</td>\n", + " <td>0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " <td>...</td>\n", + " <td>375</td>\n", + " <td>808</td>\n", + " <td>2022-04-06 06:00:00+00:00</td>\n", + " <td>4605</td>\n", + " <td>321.0</td>\n", + " <td>3275</td>\n", + " <td>Census tract</td>\n", + " <td>5.748098e+06</td>\n", + " <td>12281.856069</td>\n", + " <td>POLYGON ((-88.03158 43.03837, -88.03172 43.038...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>201</th>\n", + " <td>2022-04-07 19:00:00+00:00</td>\n", + " <td>55139000800</td>\n", + " <td>None</td>\n", + " <td>985</td>\n", + " <td>0</td>\n", + " <td>0.29</td>\n", + " <td>6</td>\n", + " <td>0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " <td>...</td>\n", + " <td>439</td>\n", + " <td>519</td>\n", + " <td>2022-04-06 06:00:00+00:00</td>\n", + " <td>3111</td>\n", + " <td>219.0</td>\n", + " <td>1153</td>\n", + " <td>Census tract</td>\n", + " <td>2.682861e+06</td>\n", + " <td>7374.033238</td>\n", + " <td>POLYGON ((-88.54256 44.03214, -88.54257 44.033...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>652</th>\n", + " <td>2022-04-07 19:00:00+00:00</td>\n", + " <td>55063010203</td>\n", + " <td>None</td>\n", + " <td>526</td>\n", + " <td>0</td>\n", + " <td>0.14</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " <td>...</td>\n", + " <td>251</td>\n", + " <td>261</td>\n", + " <td>2022-04-06 06:00:00+00:00</td>\n", + " <td>2173</td>\n", + " <td>248.0</td>\n", + " <td>2340</td>\n", + " <td>Census tract</td>\n", + " <td>2.292166e+07</td>\n", + " <td>37445.149081</td>\n", + " <td>POLYGON ((-91.25735 43.91612, -91.25784 43.916...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>889</th>\n", + " <td>2022-04-07 19:00:00+00:00</td>\n", + " <td>55137960400</td>\n", + " <td>None</td>\n", + " <td>662</td>\n", + " <td>0</td>\n", + " <td>0.00</td>\n", + " <td>12</td>\n", + " <td>0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " <td>...</td>\n", + " <td>438</td>\n", + " <td>193</td>\n", + " <td>2022-04-06 06:00:00+00:00</td>\n", + " <td>3307</td>\n", + " <td>153.0</td>\n", + " <td>2641</td>\n", + " <td>Census tract</td>\n", + " <td>6.855749e+08</td>\n", + " <td>110225.598212</td>\n", + " <td>POLYGON ((-89.38503 44.15584, -89.38705 44.155...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1248</th>\n", + " <td>2022-04-07 19:00:00+00:00</td>\n", + " <td>55079012400</td>\n", + " <td>None</td>\n", + " <td>605</td>\n", + " <td>0</td>\n", + " <td>0.00</td>\n", + " <td>4</td>\n", + " <td>0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " <td>...</td>\n", + " <td>297</td>\n", + " <td>283</td>\n", + " <td>2022-04-06 06:00:00+00:00</td>\n", + " <td>2456</td>\n", + " <td>270.0</td>\n", + " <td>3002</td>\n", + " <td>Census tract</td>\n", + " <td>2.465252e+06</td>\n", + " <td>7607.108441</td>\n", + " <td>POLYGON ((-87.97882 43.04910, -87.97905 43.049...</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 90 columns</p>\n", + "</div>" + ], + "text/plain": [ + " RptDt GEOID GEOName POS_CUM_CP POS_NEW_CP \\\n", + "1345 2022-04-07 19:00:00+00:00 55079090600 None 1233 2 \n", + "201 2022-04-07 19:00:00+00:00 55139000800 None 985 0 \n", + "652 2022-04-07 19:00:00+00:00 55063010203 None 526 0 \n", + "889 2022-04-07 19:00:00+00:00 55137960400 None 662 0 \n", + "1248 2022-04-07 19:00:00+00:00 55079012400 None 605 0 \n", + "\n", + " POS_7DAYAVG_CP DTH_CUM_CP DTH_NEW_CP DTH_7DAYAVG_CP \\\n", + "1345 0.43 19 0 0.0 \n", + "201 0.29 6 0 0.0 \n", + "652 0.14 1 0 0.0 \n", + "889 0.00 12 0 0.0 \n", + "1248 0.00 4 0 0.0 \n", + "\n", + " DTH_OVER_30DAYS_CP ... HOSP_NO_CP HOSP_UNK_CP \\\n", + "1345 0 ... 375 808 \n", + "201 0 ... 439 519 \n", + "652 0 ... 251 261 \n", + "889 0 ... 438 193 \n", + "1248 0 ... 297 283 \n", + "\n", + " Date POP POP_MOE OBJECTID GEO \\\n", + "1345 2022-04-06 06:00:00+00:00 4605 321.0 3275 Census tract \n", + "201 2022-04-06 06:00:00+00:00 3111 219.0 1153 Census tract \n", + "652 2022-04-06 06:00:00+00:00 2173 248.0 2340 Census tract \n", + "889 2022-04-06 06:00:00+00:00 3307 153.0 2641 Census tract \n", + "1248 2022-04-06 06:00:00+00:00 2456 270.0 3002 Census tract \n", + "\n", + " ShapeSTArea ShapeSTLength \\\n", + "1345 5.748098e+06 12281.856069 \n", + "201 2.682861e+06 7374.033238 \n", + "652 2.292166e+07 37445.149081 \n", + "889 6.855749e+08 110225.598212 \n", + "1248 2.465252e+06 7607.108441 \n", + "\n", + " geometry \n", + "1345 POLYGON ((-88.03158 43.03837, -88.03172 43.038... \n", + "201 POLYGON ((-88.54256 44.03214, -88.54257 44.033... \n", + "652 POLYGON ((-91.25735 43.91612, -91.25784 43.916... \n", + "889 POLYGON ((-89.38503 44.15584, -89.38705 44.155... \n", + "1248 POLYGON ((-87.97882 43.04910, -87.97905 43.049... \n", + "\n", + "[5 rows x 90 columns]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "xcols = [\"POP\"]\n", - "ycol = \"DTH_CUM_CP\"" + "# Running this cell twice will give you two different train datasets\n", + "train, test = train_test_split(df)\n", + "train.head()" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 23, "id": "0fe05a2e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.1504426295511987" + "0.21891170867219" ] }, - "execution_count": 5, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -157,17 +858,17 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 24, "id": "bfa17fce", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([0.10684052, 0.33530335, 0.3062585 , 0.11900059, 0.17557615])" + "array([0.21248762, 0.16242777, 0.13919094, 0.28071399, 0.23917723])" ] }, - "execution_count": 6, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -182,17 +883,17 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 25, "id": "284f776f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.20859582270735122" + "0.20679950928518975" ] }, - "execution_count": 7, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -214,7 +915,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 26, "id": "ffd9791b", "metadata": {}, "outputs": [], @@ -227,17 +928,17 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 27, "id": "60f0bf73", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.20859582270735122" + "0.20679950928518975" ] }, - "execution_count": 9, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -248,17 +949,17 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 28, "id": "e3070bf6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.24634023216945558" + "0.2465092965565187" ] }, - "execution_count": 10, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -277,7 +978,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 29, "id": "dfedd8d4", "metadata": {}, "outputs": [ @@ -287,13 +988,13 @@ "<Axes: >" ] }, - "execution_count": 11, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "<Figure size 300x300 with 1 Axes>" ] @@ -318,17 +1019,17 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 30, "id": "5123c3a9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.09493633774546946" + "0.05115426004890505" ] }, - "execution_count": 12, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -339,17 +1040,17 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 31, "id": "230b9dc9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.09217633613807436" + "0.05964084644394735" ] }, - "execution_count": 13, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -360,19 +1061,19 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 32, "id": "484c7af9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "model1 0.094936\n", - "model2 0.092176\n", + "model1 0.051154\n", + "model2 0.059641\n", "dtype: float64" ] }, - "execution_count": 14, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -385,7 +1086,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 33, "id": "233cd91d", "metadata": {}, "outputs": [ @@ -395,13 +1096,13 @@ "<Axes: >" ] }, - "execution_count": 15, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "<Figure size 300x300 with 1 Axes>" ] @@ -435,7 +1136,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 34, "id": "d3016079", "metadata": {}, "outputs": [ @@ -467,7 +1168,7 @@ " dtype='object')" ] }, - "execution_count": 16, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -478,17 +1179,17 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 35, "id": "2538534d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.6687962834257137" + "0.6969249590929576" ] }, - "execution_count": 17, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -514,19 +1215,19 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 36, "id": "68e3d21a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([-2.52154412e-02, 2.46452751e-04, -1.20827425e-03, 1.33674297e-02,\n", - " 1.20108729e-02, -5.77172447e-03, 4.75485875e-03, 2.54843687e-02,\n", - " 1.69087610e-01, 2.86734406e-01])" + "array([-2.75210584e-02, -8.60005368e-04, 9.33529282e-06, 1.21603948e-02,\n", + " 1.62902754e-02, -1.32283956e-02, 1.58114831e-02, 1.88835471e-02,\n", + " 1.58450769e-01, 3.14462893e-01])" ] }, - "execution_count": 18, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -537,27 +1238,27 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 37, "id": "98fa5442", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0 -0.025215\n", - "1 0.000246\n", - "2 -0.001208\n", - "3 0.013367\n", - "4 0.012011\n", - "5 -0.005772\n", - "6 0.004755\n", - "7 0.025484\n", - "8 0.169088\n", - "9 0.286734\n", + "0 -0.027521\n", + "1 -0.000860\n", + "2 0.000009\n", + "3 0.012160\n", + "4 0.016290\n", + "5 -0.013228\n", + "6 0.015811\n", + "7 0.018884\n", + "8 0.158451\n", + "9 0.314463\n", "dtype: float64" ] }, - "execution_count": 19, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -568,7 +1269,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 38, "id": "44bd5b07", "metadata": {}, "outputs": [ @@ -578,13 +1279,13 @@ "<Axes: >" ] }, - "execution_count": 20, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "<Figure size 300x200 with 1 Axes>" ] @@ -624,7 +1325,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 39, "id": "a3109957", "metadata": { "scrolled": true @@ -756,7 +1457,7 @@ "4 06/04/2018 9:00 AM OhioStreetBeach201806040900 " ] }, - "execution_count": 21, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -768,7 +1469,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 40, "id": "18d71eb8", "metadata": {}, "outputs": [ @@ -781,7 +1482,7 @@ " dtype='object')" ] }, - "execution_count": 22, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -800,7 +1501,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 41, "id": "149aa67c", "metadata": {}, "outputs": [ @@ -810,7 +1511,7 @@ "<Axes: xlabel='Wave Period', ylabel='Wave Height'>" ] }, - "execution_count": 23, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" }, @@ -848,7 +1549,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 42, "id": "3f045b08", "metadata": {}, "outputs": [ @@ -863,7 +1564,7 @@ " 'Rainbow Beach']" ] }, - "execution_count": 24, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -875,7 +1576,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 43, "id": "bc626c9d", "metadata": {}, "outputs": [ @@ -935,7 +1636,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 44, "id": "a454b7ec", "metadata": {}, "outputs": [ @@ -1065,7 +1766,7 @@ "30642 07/29/2017 7:00 PM OhioStreetBeach201707291900 " ] }, - "execution_count": 26, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -1094,7 +1795,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 45, "id": "199a5ca6", "metadata": {}, "outputs": [ @@ -1105,7 +1806,7 @@ " 0.0045598 , 0.00501976, 0.00087213, 0.00324963, 0.00139934])" ] }, - "execution_count": 27, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -1121,7 +1822,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 46, "id": "15bec5d5", "metadata": {}, "outputs": [ @@ -1131,7 +1832,7 @@ "0.0028549034281171947" ] }, - "execution_count": 28, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -1150,7 +1851,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 47, "id": "306e21bd", "metadata": {}, "outputs": [ @@ -1160,7 +1861,7 @@ "0.0016835509327158373" ] }, - "execution_count": 29, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } @@ -1211,7 +1912,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 48, "id": "5b6f8a12", "metadata": {}, "outputs": [ @@ -1273,7 +1974,7 @@ "30642 3.0" ] }, - "execution_count": 30, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } @@ -1297,7 +1998,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 49, "id": "54f8d249", "metadata": {}, "outputs": [ @@ -1377,7 +2078,7 @@ "30642 3.0 9.0 27.0 1.732051" ] }, - "execution_count": 31, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } @@ -1406,7 +2107,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 50, "id": "e6c43483", "metadata": {}, "outputs": [], @@ -1416,20 +2117,20 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 51, "id": "6f81a519", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>PolynomialFeatures(degree=4, include_bias=False)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">PolynomialFeatures</label><div class=\"sk-toggleable__content\"><pre>PolynomialFeatures(degree=4, include_bias=False)</pre></div></div></div></div></div>" + "<style>#sk-container-id-2 {color: black;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>PolynomialFeatures(degree=4, include_bias=False)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">PolynomialFeatures</label><div class=\"sk-toggleable__content\"><pre>PolynomialFeatures(degree=4, include_bias=False)</pre></div></div></div></div></div>" ], "text/plain": [ "PolynomialFeatures(degree=4, include_bias=False)" ] }, - "execution_count": 33, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } @@ -1441,7 +2142,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 52, "id": "8d6d2e1c", "metadata": {}, "outputs": [ @@ -1457,7 +2158,7 @@ " [7.000e+00, 4.900e+01, 3.430e+02, 2.401e+03]])" ] }, - "execution_count": 34, + "execution_count": 52, "metadata": {}, "output_type": "execute_result" } @@ -1469,7 +2170,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 53, "id": "ecfc2493", "metadata": {}, "outputs": [ @@ -1600,7 +2301,7 @@ "[29871 rows x 4 columns]" ] }, - "execution_count": 35, + "execution_count": 53, "metadata": {}, "output_type": "execute_result" } @@ -1620,7 +2321,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 54, "id": "1ab1eaa3", "metadata": {}, "outputs": [ @@ -1631,7 +2332,7 @@ " dtype=object)" ] }, - "execution_count": 36, + "execution_count": 54, "metadata": {}, "output_type": "execute_result" } @@ -1642,7 +2343,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 55, "id": "2d62b3d3", "metadata": {}, "outputs": [ @@ -1773,7 +2474,7 @@ "[29871 rows x 4 columns]" ] }, - "execution_count": 37, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -1785,7 +2486,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 56, "id": "4beebe68", "metadata": {}, "outputs": [ @@ -1865,7 +2566,7 @@ "4 3.0 9.0 27.0 81.0" ] }, - "execution_count": 38, + "execution_count": 56, "metadata": {}, "output_type": "execute_result" } @@ -1887,7 +2588,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 57, "id": "91c3a781", "metadata": {}, "outputs": [ @@ -1967,7 +2668,7 @@ "4 3.0 9.0 27.0 81.0" ] }, - "execution_count": 39, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } @@ -1992,23 +2693,23 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 58, "id": "bb509930", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "<style>#sk-container-id-2 {color: black;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('pf', PolynomialFeatures(include_bias=False)),\n", - " ('lr', LinearRegression())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[('pf', PolynomialFeatures(include_bias=False)),\n", - " ('lr', LinearRegression())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">PolynomialFeatures</label><div class=\"sk-toggleable__content\"><pre>PolynomialFeatures(include_bias=False)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div></div></div>" + "<style>#sk-container-id-3 {color: black;}#sk-container-id-3 pre{padding: 0;}#sk-container-id-3 div.sk-toggleable {background-color: white;}#sk-container-id-3 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-3 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-3 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-3 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-3 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-3 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-3 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-3 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-3 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-3 div.sk-item {position: relative;z-index: 1;}#sk-container-id-3 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-3 div.sk-item::before, #sk-container-id-3 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-3 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-3 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-3 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-3 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-3 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-3 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-3 div.sk-label-container {text-align: center;}#sk-container-id-3 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-3 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('pf', PolynomialFeatures(include_bias=False)),\n", + " ('lr', LinearRegression())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[('pf', PolynomialFeatures(include_bias=False)),\n", + " ('lr', LinearRegression())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">PolynomialFeatures</label><div class=\"sk-toggleable__content\"><pre>PolynomialFeatures(include_bias=False)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div></div></div>" ], "text/plain": [ "Pipeline(steps=[('pf', PolynomialFeatures(include_bias=False)),\n", " ('lr', LinearRegression())])" ] }, - "execution_count": 40, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" } @@ -2023,7 +2724,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 59, "id": "ccf76663", "metadata": {}, "outputs": [ @@ -2033,7 +2734,7 @@ "0.048992293142027354" ] }, - "execution_count": 41, + "execution_count": 59, "metadata": {}, "output_type": "execute_result" } @@ -2061,7 +2762,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 60, "id": "76a31355", "metadata": {}, "outputs": [ @@ -2073,7 +2774,7 @@ " dtype=object)" ] }, - "execution_count": 42, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } @@ -2108,7 +2809,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 61, "id": "9c01ad37-ea34-4c35-9cf8-74552a54eec6", "metadata": {}, "outputs": [], @@ -2118,7 +2819,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 62, "id": "ef069c7b", "metadata": {}, "outputs": [ @@ -2129,7 +2830,7 @@ "\twith 29871 stored elements in Compressed Sparse Row format>" ] }, - "execution_count": 44, + "execution_count": 62, "metadata": {}, "output_type": "execute_result" } @@ -2141,7 +2842,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 63, "id": "5cb24e10", "metadata": {}, "outputs": [ @@ -2152,7 +2853,7 @@ "\twith 29871 stored elements in Compressed Sparse Row format>" ] }, - "execution_count": 45, + "execution_count": 63, "metadata": {}, "output_type": "execute_result" } @@ -2163,7 +2864,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 64, "id": "d5303636", "metadata": {}, "outputs": [ @@ -2344,7 +3045,7 @@ "[29871 rows x 6 columns]" ] }, - "execution_count": 46, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } @@ -2357,20 +3058,20 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 65, "id": "625a8edc", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "<style>#sk-container-id-3 {color: black;}#sk-container-id-3 pre{padding: 0;}#sk-container-id-3 div.sk-toggleable {background-color: white;}#sk-container-id-3 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-3 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-3 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-3 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-3 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-3 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-3 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-3 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-3 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-3 div.sk-item {position: relative;z-index: 1;}#sk-container-id-3 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-3 div.sk-item::before, #sk-container-id-3 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-3 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-3 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-3 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-3 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-3 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-3 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-3 div.sk-label-container {text-align: center;}#sk-container-id-3 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-3 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('oh', OneHotEncoder()), ('lr', LinearRegression())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[('oh', OneHotEncoder()), ('lr', LinearRegression())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder()</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div></div></div>" + "<style>#sk-container-id-4 {color: black;}#sk-container-id-4 pre{padding: 0;}#sk-container-id-4 div.sk-toggleable {background-color: white;}#sk-container-id-4 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-4 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-4 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-4 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-4 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-4 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-4 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-4 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-4 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-4 div.sk-item {position: relative;z-index: 1;}#sk-container-id-4 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-4 div.sk-item::before, #sk-container-id-4 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-4 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-4 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-4 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-4 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-4 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-4 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-4 div.sk-label-container {text-align: center;}#sk-container-id-4 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-4 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('oh', OneHotEncoder()), ('lr', LinearRegression())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[('oh', OneHotEncoder()), ('lr', LinearRegression())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder()</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div></div></div>" ], "text/plain": [ "Pipeline(steps=[('oh', OneHotEncoder()), ('lr', LinearRegression())])" ] }, - "execution_count": 47, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" } @@ -2385,7 +3086,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 66, "id": "5494c1fa", "metadata": {}, "outputs": [ @@ -2395,7 +3096,7 @@ "0.04564310407563701" ] }, - "execution_count": 48, + "execution_count": 66, "metadata": {}, "output_type": "execute_result" } @@ -2450,20 +3151,20 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 67, "id": "40378fb3", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "<style>#sk-container-id-4 {color: black;}#sk-container-id-4 pre{padding: 0;}#sk-container-id-4 div.sk-toggleable {background-color: white;}#sk-container-id-4 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-4 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-4 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-4 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-4 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-4 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-4 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-4 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-4 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-4 div.sk-item {position: relative;z-index: 1;}#sk-container-id-4 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-4 div.sk-item::before, #sk-container-id-4 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-4 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-4 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-4 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-4 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-4 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-4 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-4 div.sk-label-container {text-align: center;}#sk-container-id-4 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-4 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ColumnTransformer(transformers=[('polynomialfeatures', PolynomialFeatures(),\n", + "<style>#sk-container-id-5 {color: black;}#sk-container-id-5 pre{padding: 0;}#sk-container-id-5 div.sk-toggleable {background-color: white;}#sk-container-id-5 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-5 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-5 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-5 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-5 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-5 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-5 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-5 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-5 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-5 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-5 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-5 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-5 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-5 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-5 div.sk-item {position: relative;z-index: 1;}#sk-container-id-5 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-5 div.sk-item::before, #sk-container-id-5 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-5 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-5 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-5 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-5 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-5 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-5 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-5 div.sk-label-container {text-align: center;}#sk-container-id-5 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-5 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-5\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ColumnTransformer(transformers=[('polynomialfeatures', PolynomialFeatures(),\n", " ['Wave Period']),\n", " ('onehotencoder', OneHotEncoder(),\n", - " ['Beach Name'])])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(transformers=[('polynomialfeatures', PolynomialFeatures(),\n", + " ['Beach Name'])])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(transformers=[('polynomialfeatures', PolynomialFeatures(),\n", " ['Wave Period']),\n", " ('onehotencoder', OneHotEncoder(),\n", - " ['Beach Name'])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">polynomialfeatures</label><div class=\"sk-toggleable__content\"><pre>['Wave Period']</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">PolynomialFeatures</label><div class=\"sk-toggleable__content\"><pre>PolynomialFeatures()</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-11\" type=\"checkbox\" ><label for=\"sk-estimator-id-11\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">onehotencoder</label><div class=\"sk-toggleable__content\"><pre>['Beach Name']</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-12\" type=\"checkbox\" ><label for=\"sk-estimator-id-12\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder()</pre></div></div></div></div></div></div></div></div></div></div>" + " ['Beach Name'])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">polynomialfeatures</label><div class=\"sk-toggleable__content\"><pre>['Wave Period']</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-11\" type=\"checkbox\" ><label for=\"sk-estimator-id-11\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">PolynomialFeatures</label><div class=\"sk-toggleable__content\"><pre>PolynomialFeatures()</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-12\" type=\"checkbox\" ><label for=\"sk-estimator-id-12\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">onehotencoder</label><div class=\"sk-toggleable__content\"><pre>['Beach Name']</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-13\" type=\"checkbox\" ><label for=\"sk-estimator-id-13\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder()</pre></div></div></div></div></div></div></div></div></div></div>" ], "text/plain": [ "ColumnTransformer(transformers=[('polynomialfeatures', PolynomialFeatures(),\n", @@ -2472,7 +3173,7 @@ " ['Beach Name'])])" ] }, - "execution_count": 49, + "execution_count": 67, "metadata": {}, "output_type": "execute_result" } @@ -2487,31 +3188,31 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 68, "id": "2d6e0ba2", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "<style>#sk-container-id-5 {color: black;}#sk-container-id-5 pre{padding: 0;}#sk-container-id-5 div.sk-toggleable {background-color: white;}#sk-container-id-5 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-5 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-5 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-5 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-5 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-5 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-5 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-5 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-5 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-5 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-5 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-5 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-5 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-5 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-5 div.sk-item {position: relative;z-index: 1;}#sk-container-id-5 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-5 div.sk-item::before, #sk-container-id-5 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-5 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-5 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-5 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-5 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-5 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-5 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-5 div.sk-label-container {text-align: center;}#sk-container-id-5 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-5 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-5\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('transformers',\n", + "<style>#sk-container-id-6 {color: black;}#sk-container-id-6 pre{padding: 0;}#sk-container-id-6 div.sk-toggleable {background-color: white;}#sk-container-id-6 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-6 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-6 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-6 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-6 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-6 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-6 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-6 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-6 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-6 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-6 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-6 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-6 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-6 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-6 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-6 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-6 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-6 div.sk-item {position: relative;z-index: 1;}#sk-container-id-6 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-6 div.sk-item::before, #sk-container-id-6 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-6 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-6 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-6 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-6 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-6 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-6 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-6 div.sk-label-container {text-align: center;}#sk-container-id-6 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-6 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-6\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('transformers',\n", " ColumnTransformer(transformers=[('polynomialfeatures',\n", " PolynomialFeatures(),\n", " ['Wave Period']),\n", " ('onehotencoder',\n", " OneHotEncoder(),\n", " ['Beach Name'])])),\n", - " ('lr', LinearRegression())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-13\" type=\"checkbox\" ><label for=\"sk-estimator-id-13\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[('transformers',\n", + " ('lr', LinearRegression())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-14\" type=\"checkbox\" ><label for=\"sk-estimator-id-14\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[('transformers',\n", " ColumnTransformer(transformers=[('polynomialfeatures',\n", " PolynomialFeatures(),\n", " ['Wave Period']),\n", " ('onehotencoder',\n", " OneHotEncoder(),\n", " ['Beach Name'])])),\n", - " ('lr', LinearRegression())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-14\" type=\"checkbox\" ><label for=\"sk-estimator-id-14\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">transformers: ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(transformers=[('polynomialfeatures', PolynomialFeatures(),\n", + " ('lr', LinearRegression())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-15\" type=\"checkbox\" ><label for=\"sk-estimator-id-15\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">transformers: ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(transformers=[('polynomialfeatures', PolynomialFeatures(),\n", " ['Wave Period']),\n", " ('onehotencoder', OneHotEncoder(),\n", - " ['Beach Name'])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-15\" type=\"checkbox\" ><label for=\"sk-estimator-id-15\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">polynomialfeatures</label><div class=\"sk-toggleable__content\"><pre>['Wave Period']</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-16\" type=\"checkbox\" ><label for=\"sk-estimator-id-16\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">PolynomialFeatures</label><div class=\"sk-toggleable__content\"><pre>PolynomialFeatures()</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-17\" type=\"checkbox\" ><label for=\"sk-estimator-id-17\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">onehotencoder</label><div class=\"sk-toggleable__content\"><pre>['Beach Name']</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-18\" type=\"checkbox\" ><label for=\"sk-estimator-id-18\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder()</pre></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-19\" type=\"checkbox\" ><label for=\"sk-estimator-id-19\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div></div></div>" + " ['Beach Name'])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-16\" type=\"checkbox\" ><label for=\"sk-estimator-id-16\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">polynomialfeatures</label><div class=\"sk-toggleable__content\"><pre>['Wave Period']</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-17\" type=\"checkbox\" ><label for=\"sk-estimator-id-17\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">PolynomialFeatures</label><div class=\"sk-toggleable__content\"><pre>PolynomialFeatures()</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-18\" type=\"checkbox\" ><label for=\"sk-estimator-id-18\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">onehotencoder</label><div class=\"sk-toggleable__content\"><pre>['Beach Name']</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-19\" type=\"checkbox\" ><label for=\"sk-estimator-id-19\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder()</pre></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-20\" type=\"checkbox\" ><label for=\"sk-estimator-id-20\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div></div></div>" ], "text/plain": [ "Pipeline(steps=[('transformers',\n", @@ -2524,7 +3225,7 @@ " ('lr', LinearRegression())])" ] }, - "execution_count": 50, + "execution_count": 68, "metadata": {}, "output_type": "execute_result" } @@ -2539,7 +3240,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 69, "id": "4248010b", "metadata": {}, "outputs": [], @@ -2549,7 +3250,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 70, "id": "57b9ffc8", "metadata": {}, "outputs": [ @@ -2559,7 +3260,7 @@ "0.08853953873940563" ] }, - "execution_count": 52, + "execution_count": 70, "metadata": {}, "output_type": "execute_result" } @@ -2587,7 +3288,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 71, "id": "e7e58157", "metadata": {}, "outputs": [ @@ -2597,7 +3298,7 @@ "0.08646767678269851" ] }, - "execution_count": 53, + "execution_count": 71, "metadata": {}, "output_type": "execute_result" } @@ -2617,7 +3318,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 72, "id": "50c7e102", "metadata": {}, "outputs": [ @@ -2634,7 +3335,7 @@ " 'onehotencoder__Beach Name_Rainbow Beach'], dtype=object)" ] }, - "execution_count": 54, + "execution_count": 72, "metadata": {}, "output_type": "execute_result" } @@ -2645,7 +3346,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 73, "id": "28ccbf92", "metadata": {}, "outputs": [ @@ -2656,7 +3357,7 @@ " 0.02296935, -0.01070481, 0.00846144, 0.02463752])" ] }, - "execution_count": 55, + "execution_count": 73, "metadata": {}, "output_type": "execute_result" } @@ -2667,7 +3368,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 74, "id": "03863a66-3a16-4147-9a56-793f91a33ce2", "metadata": {}, "outputs": [ @@ -2677,7 +3378,7 @@ "<Axes: >" ] }, - "execution_count": 56, + "execution_count": 74, "metadata": {}, "output_type": "execute_result" }, @@ -2699,7 +3400,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 75, "id": "acd00dc3", "metadata": {}, "outputs": [ @@ -2709,7 +3410,7 @@ "<Axes: >" ] }, - "execution_count": 57, + "execution_count": 75, "metadata": {}, "output_type": "execute_result" }, diff --git a/lecture_material/20-regression-2/20-regression1_001.ipynb b/lecture_material/20-regression-2/20-regression2_001.ipynb similarity index 98% rename from lecture_material/20-regression-2/20-regression1_001.ipynb rename to lecture_material/20-regression-2/20-regression2_001.ipynb index 2a7959a..6d7f4e3 100644 --- a/lecture_material/20-regression-2/20-regression1_001.ipynb +++ b/lecture_material/20-regression-2/20-regression2_001.ipynb @@ -41,12 +41,35 @@ "source": [ "# Read the \"covid.geojson\" file\n", "dataset_file = \"covid.geojson\"\n", - "df = " + "df = gpd.read_file(dataset_file)" ] }, { "cell_type": "markdown", - "id": "e3e73632", + "id": "b37d6f28-fabd-4418-8c85-8e7cd24331f5", + "metadata": {}, + "source": [ + "### How well does our model fit the data?\n", + "- explained variance score\n", + "- R^2 (\"r squared\")" + ] + }, + { + "cell_type": "markdown", + "id": "a2bf3bc0-03b8-4c41-93a9-bef8a76bf3de", + "metadata": {}, + "source": [ + "#### `sklearn.metrics.explained_variance_score(y_true, y_pred)`\n", + "- requires `import sklearn`\n", + "- calculates the explained variance score given:\n", + " - y_true: actual death values in our example\n", + " - y_pred: prediction of deaths in our example\n", + "- documentation: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.explained_variance_score.html" + ] + }, + { + "cell_type": "markdown", + "id": "c7fbd149-0c50-46ec-bfb1-f33e9fece60e", "metadata": {}, "source": [ "### Predicting \"DTH_CUM_CP\"" @@ -55,7 +78,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0c57bc47", + "id": "cebb790b-9374-4d02-afe0-40e1efc8f176", "metadata": {}, "outputs": [], "source": [ @@ -64,6 +87,203 @@ "df = df[df[\"GEOID\"].str.fullmatch(r\"\\d+\")]" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f4a4dd37-d7d3-41ca-8319-1442b56b2a24", + "metadata": {}, + "outputs": [], + "source": [ + "xcols = [\"POP\"]\n", + "ycol = \"DTH_CUM_CP\"" + ] + }, + { + "cell_type": "markdown", + "id": "87c4b295-39ca-445c-865d-4829d77f55ce", + "metadata": {}, + "source": [ + "### Let's use `LinearRegression` model.\n", + "\n", + "- `from sklearn.linear_model import LinearRegression`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78c2c891-caed-4359-bc91-a97de21db812", + "metadata": {}, + "outputs": [], + "source": [ + "model = LinearRegression()\n", + "model.fit(df[xcols], df[ycol])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7787a889-c693-40b3-8e66-9a266bb0c683", + "metadata": {}, + "outputs": [], + "source": [ + "# Let's now make predictions for the known data\n", + "predictions = model.predict(df[xcols])\n", + "predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7952f67c-57b3-4ca8-bb42-b6287f012157", + "metadata": {}, + "outputs": [], + "source": [ + "sklearn.metrics.explained_variance_score(df[ycol], predictions)" + ] + }, + { + "cell_type": "markdown", + "id": "39caf7a5-c782-4c02-8c3e-8385eddb108f", + "metadata": {}, + "source": [ + "#### Explained variance score\n", + "\n", + "- `explained_variance_score = (known_var - explained_variance) / known_var`\n", + " - where `known_var = y_true.var()` and `explained_variance = (y_true - y_pred).var()`" + ] + }, + { + "cell_type": "markdown", + "id": "908b8af9-6ba1-4e92-9569-36168b253fae", + "metadata": {}, + "source": [ + "What is the variation in known deaths?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5fc126c4-de9a-4a35-abe6-8d5820227f6f", + "metadata": {}, + "outputs": [], + "source": [ + "# Compute variance of \"DTH_CUM_CP\" column\n", + "known_var = df[ycol].var()\n", + "known_var" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2f489d5-1325-4870-b6f8-d2b7949f12cf", + "metadata": {}, + "outputs": [], + "source": [ + "# explained_variance\n", + "explained_variance = (df[ycol] - predictions).var() \n", + "explained_variance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e049b2bd-35e8-4a75-9d6d-d70c2b998541", + "metadata": {}, + "outputs": [], + "source": [ + "# explained_variance score\n", + "explained_variance_score = (known_var - explained_variance) / known_var\n", + "explained_variance_score" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ced64de4-d1dc-464e-8855-3341a8f3b15a", + "metadata": {}, + "outputs": [], + "source": [ + "# For comparison here is the explained variance score from sklearn\n", + "sklearn.metrics.explained_variance_score(df[ycol], predictions)" + ] + }, + { + "cell_type": "markdown", + "id": "e85ad41f-8d4b-471f-b689-333346d7d660", + "metadata": {}, + "source": [ + "#### `sklearn.metrics.r2_score(y_true, y_pred)`\n", + "\n", + "- requires `import sklearn`\n", + "- calculates the explained variance score given:\n", + " - y_true: actual death values in our example\n", + " - y_pred: prediction of deaths in our example\n", + "- documentation: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "557a7646-7843-4956-8d72-9104aa7113de", + "metadata": {}, + "outputs": [], + "source": [ + "sklearn.metrics.r2_score(df[ycol], predictions)" + ] + }, + { + "cell_type": "markdown", + "id": "3ceeca6f-e7d4-47fa-b53e-8c3bbba41594", + "metadata": {}, + "source": [ + "#### R^2 score (aka coefficient of determination) approximation\n", + "\n", + "- `r2_score = (known_var - r2_val) / known_var`\n", + " - where `known_var = y_true.var()` and `r2_val = ((y_true - y_pred) ** 2).mean()`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1ea5c04-3dd7-412c-9cbf-32f4a69bdb20", + "metadata": {}, + "outputs": [], + "source": [ + "# r2_val\n", + "r2_val = ((df[ycol] - predictions) ** 2).mean()\n", + "r2_val" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3e83156-8e02-4216-9738-bed6b0052608", + "metadata": {}, + "outputs": [], + "source": [ + "r2_score = (known_var - r2_val) / known_var\n", + "r2_score # there might be minor rounding off differences" + ] + }, + { + "cell_type": "markdown", + "id": "d1795241-d9d7-42c5-8bf4-614ec7d972d7", + "metadata": {}, + "source": [ + "#### `model.score(X, y)`\n", + "- invokes `predict` method for calculating predictions (`y`) based on features (`X`) and compares the predictions with true values of y" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a50b84d0-5c1b-4f84-9f41-d6157057d645", + "metadata": {}, + "outputs": [], + "source": [ + "model.score(df[xcols], df[ycol]) " + ] + }, { "cell_type": "markdown", "id": "1768f9a9", @@ -74,6 +294,29 @@ "- Split data into train and test" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "80cef390-831c-443a-9149-9141c8d34ef2", + "metadata": {}, + "outputs": [], + "source": [ + "# Split the data into two equal parts\n", + "len(df) // 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68c16194-1c7b-4f3a-81a3-4b0352f36872", + "metadata": {}, + "outputs": [], + "source": [ + "# Manual way of splitting train and test data\n", + "train, test = df.iloc[:len(df)//2], df.iloc[len(df)//2:]\n", + "len(train), len(test)" + ] + }, { "cell_type": "markdown", "id": "3a781391", @@ -98,8 +341,54 @@ "metadata": {}, "outputs": [], "source": [ - "xcols = [\"POP\"]\n", - "ycol = \"DTH_CUM_CP\"" + "xcols, ycol" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c52c86c4-8afe-4d49-97a5-e2fcb18242c5", + "metadata": {}, + "outputs": [], + "source": [ + "train, test = train_test_split(df)\n", + "len(train), len(test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3ccdf1a-0958-402c-80e1-2eed12abdf88", + "metadata": {}, + "outputs": [], + "source": [ + "# Test size using row count\n", + "train, test = train_test_split(df, test_size=120)\n", + "len(train), len(test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adeea246-f80a-46c2-b62b-9e95bef5d02f", + "metadata": {}, + "outputs": [], + "source": [ + "# Test size using fraction\n", + "train, test = train_test_split(df, test_size=0.5)\n", + "len(train), len(test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba4c6310-0f35-44ac-9250-79b6eb04f7cc", + "metadata": {}, + "outputs": [], + "source": [ + "# Running this cell twice will give you two different train datasets\n", + "train, test = train_test_split(df)\n", + "train.head()" ] }, { diff --git a/lecture_material/20-regression-2/20-regression1_002.ipynb b/lecture_material/20-regression-2/20-regression2_002.ipynb similarity index 98% rename from lecture_material/20-regression-2/20-regression1_002.ipynb rename to lecture_material/20-regression-2/20-regression2_002.ipynb index 2a7959a..6d7f4e3 100644 --- a/lecture_material/20-regression-2/20-regression1_002.ipynb +++ b/lecture_material/20-regression-2/20-regression2_002.ipynb @@ -41,12 +41,35 @@ "source": [ "# Read the \"covid.geojson\" file\n", "dataset_file = \"covid.geojson\"\n", - "df = " + "df = gpd.read_file(dataset_file)" ] }, { "cell_type": "markdown", - "id": "e3e73632", + "id": "b37d6f28-fabd-4418-8c85-8e7cd24331f5", + "metadata": {}, + "source": [ + "### How well does our model fit the data?\n", + "- explained variance score\n", + "- R^2 (\"r squared\")" + ] + }, + { + "cell_type": "markdown", + "id": "a2bf3bc0-03b8-4c41-93a9-bef8a76bf3de", + "metadata": {}, + "source": [ + "#### `sklearn.metrics.explained_variance_score(y_true, y_pred)`\n", + "- requires `import sklearn`\n", + "- calculates the explained variance score given:\n", + " - y_true: actual death values in our example\n", + " - y_pred: prediction of deaths in our example\n", + "- documentation: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.explained_variance_score.html" + ] + }, + { + "cell_type": "markdown", + "id": "c7fbd149-0c50-46ec-bfb1-f33e9fece60e", "metadata": {}, "source": [ "### Predicting \"DTH_CUM_CP\"" @@ -55,7 +78,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0c57bc47", + "id": "cebb790b-9374-4d02-afe0-40e1efc8f176", "metadata": {}, "outputs": [], "source": [ @@ -64,6 +87,203 @@ "df = df[df[\"GEOID\"].str.fullmatch(r\"\\d+\")]" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f4a4dd37-d7d3-41ca-8319-1442b56b2a24", + "metadata": {}, + "outputs": [], + "source": [ + "xcols = [\"POP\"]\n", + "ycol = \"DTH_CUM_CP\"" + ] + }, + { + "cell_type": "markdown", + "id": "87c4b295-39ca-445c-865d-4829d77f55ce", + "metadata": {}, + "source": [ + "### Let's use `LinearRegression` model.\n", + "\n", + "- `from sklearn.linear_model import LinearRegression`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78c2c891-caed-4359-bc91-a97de21db812", + "metadata": {}, + "outputs": [], + "source": [ + "model = LinearRegression()\n", + "model.fit(df[xcols], df[ycol])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7787a889-c693-40b3-8e66-9a266bb0c683", + "metadata": {}, + "outputs": [], + "source": [ + "# Let's now make predictions for the known data\n", + "predictions = model.predict(df[xcols])\n", + "predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7952f67c-57b3-4ca8-bb42-b6287f012157", + "metadata": {}, + "outputs": [], + "source": [ + "sklearn.metrics.explained_variance_score(df[ycol], predictions)" + ] + }, + { + "cell_type": "markdown", + "id": "39caf7a5-c782-4c02-8c3e-8385eddb108f", + "metadata": {}, + "source": [ + "#### Explained variance score\n", + "\n", + "- `explained_variance_score = (known_var - explained_variance) / known_var`\n", + " - where `known_var = y_true.var()` and `explained_variance = (y_true - y_pred).var()`" + ] + }, + { + "cell_type": "markdown", + "id": "908b8af9-6ba1-4e92-9569-36168b253fae", + "metadata": {}, + "source": [ + "What is the variation in known deaths?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5fc126c4-de9a-4a35-abe6-8d5820227f6f", + "metadata": {}, + "outputs": [], + "source": [ + "# Compute variance of \"DTH_CUM_CP\" column\n", + "known_var = df[ycol].var()\n", + "known_var" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2f489d5-1325-4870-b6f8-d2b7949f12cf", + "metadata": {}, + "outputs": [], + "source": [ + "# explained_variance\n", + "explained_variance = (df[ycol] - predictions).var() \n", + "explained_variance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e049b2bd-35e8-4a75-9d6d-d70c2b998541", + "metadata": {}, + "outputs": [], + "source": [ + "# explained_variance score\n", + "explained_variance_score = (known_var - explained_variance) / known_var\n", + "explained_variance_score" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ced64de4-d1dc-464e-8855-3341a8f3b15a", + "metadata": {}, + "outputs": [], + "source": [ + "# For comparison here is the explained variance score from sklearn\n", + "sklearn.metrics.explained_variance_score(df[ycol], predictions)" + ] + }, + { + "cell_type": "markdown", + "id": "e85ad41f-8d4b-471f-b689-333346d7d660", + "metadata": {}, + "source": [ + "#### `sklearn.metrics.r2_score(y_true, y_pred)`\n", + "\n", + "- requires `import sklearn`\n", + "- calculates the explained variance score given:\n", + " - y_true: actual death values in our example\n", + " - y_pred: prediction of deaths in our example\n", + "- documentation: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "557a7646-7843-4956-8d72-9104aa7113de", + "metadata": {}, + "outputs": [], + "source": [ + "sklearn.metrics.r2_score(df[ycol], predictions)" + ] + }, + { + "cell_type": "markdown", + "id": "3ceeca6f-e7d4-47fa-b53e-8c3bbba41594", + "metadata": {}, + "source": [ + "#### R^2 score (aka coefficient of determination) approximation\n", + "\n", + "- `r2_score = (known_var - r2_val) / known_var`\n", + " - where `known_var = y_true.var()` and `r2_val = ((y_true - y_pred) ** 2).mean()`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1ea5c04-3dd7-412c-9cbf-32f4a69bdb20", + "metadata": {}, + "outputs": [], + "source": [ + "# r2_val\n", + "r2_val = ((df[ycol] - predictions) ** 2).mean()\n", + "r2_val" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3e83156-8e02-4216-9738-bed6b0052608", + "metadata": {}, + "outputs": [], + "source": [ + "r2_score = (known_var - r2_val) / known_var\n", + "r2_score # there might be minor rounding off differences" + ] + }, + { + "cell_type": "markdown", + "id": "d1795241-d9d7-42c5-8bf4-614ec7d972d7", + "metadata": {}, + "source": [ + "#### `model.score(X, y)`\n", + "- invokes `predict` method for calculating predictions (`y`) based on features (`X`) and compares the predictions with true values of y" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a50b84d0-5c1b-4f84-9f41-d6157057d645", + "metadata": {}, + "outputs": [], + "source": [ + "model.score(df[xcols], df[ycol]) " + ] + }, { "cell_type": "markdown", "id": "1768f9a9", @@ -74,6 +294,29 @@ "- Split data into train and test" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "80cef390-831c-443a-9149-9141c8d34ef2", + "metadata": {}, + "outputs": [], + "source": [ + "# Split the data into two equal parts\n", + "len(df) // 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68c16194-1c7b-4f3a-81a3-4b0352f36872", + "metadata": {}, + "outputs": [], + "source": [ + "# Manual way of splitting train and test data\n", + "train, test = df.iloc[:len(df)//2], df.iloc[len(df)//2:]\n", + "len(train), len(test)" + ] + }, { "cell_type": "markdown", "id": "3a781391", @@ -98,8 +341,54 @@ "metadata": {}, "outputs": [], "source": [ - "xcols = [\"POP\"]\n", - "ycol = \"DTH_CUM_CP\"" + "xcols, ycol" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c52c86c4-8afe-4d49-97a5-e2fcb18242c5", + "metadata": {}, + "outputs": [], + "source": [ + "train, test = train_test_split(df)\n", + "len(train), len(test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3ccdf1a-0958-402c-80e1-2eed12abdf88", + "metadata": {}, + "outputs": [], + "source": [ + "# Test size using row count\n", + "train, test = train_test_split(df, test_size=120)\n", + "len(train), len(test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adeea246-f80a-46c2-b62b-9e95bef5d02f", + "metadata": {}, + "outputs": [], + "source": [ + "# Test size using fraction\n", + "train, test = train_test_split(df, test_size=0.5)\n", + "len(train), len(test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba4c6310-0f35-44ac-9250-79b6eb04f7cc", + "metadata": {}, + "outputs": [], + "source": [ + "# Running this cell twice will give you two different train datasets\n", + "train, test = train_test_split(df)\n", + "train.head()" ] }, { -- GitLab