diff --git a/sum23/lecture_materials/20_Databases/lec_20_database3_notes.ipynb b/sum23/lecture_materials/20_Databases/lec_20_database3_notes.ipynb index fdcc3ea64e8b40eb2d4b3fe684864160fe0d8dab..71ac70831a00e41e1a0c38d11c33ba3744240551 100644 --- a/sum23/lecture_materials/20_Databases/lec_20_database3_notes.ipynb +++ b/sum23/lecture_materials/20_Databases/lec_20_database3_notes.ipynb @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -48,7 +48,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -63,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -135,7 +135,7 @@ "2 CREATE TABLE \"species\" (\\n\"code\" TEXT,\\n \"spe... " ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -146,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -158,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -249,7 +249,7 @@ "4 E 50 4 m 12 99" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -268,7 +268,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -317,7 +317,7 @@ "1 p pine" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -330,7 +330,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -431,7 +431,7 @@ "5 2018 blue Pacer public 1109 1" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -457,21 +457,135 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>tree</th>\n", + " <th>x</th>\n", + " <th>y</th>\n", + " <th>species</th>\n", + " <th>diameter</th>\n", + " <th>priority</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>B</td>\n", + " <td>20</td>\n", + " <td>4</td>\n", + " <td>m</td>\n", + " <td>10</td>\n", + " <td>100</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>E</td>\n", + " <td>50</td>\n", + " <td>4</td>\n", + " <td>m</td>\n", + " <td>12</td>\n", + " <td>99</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " tree x y species diameter priority\n", + "1 B 20 4 m 10 100\n", + "4 E 50 4 m 12 99" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#trees[trees[\"priority\"] > 90] " + "trees[trees[\"priority\"] > 90] " ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>x</th>\n", + " <th>y</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>20</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>50</td>\n", + " <td>4</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " x y\n", + "1 20 4\n", + "4 50 4" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# DataFrame with Boolean Indexing # show only the columns in this list\n", - "#trees[trees[\"priority\"] > 90] [[\"x\", \"y\"]] " + "trees[trees[\"priority\"] > 90] [[\"x\", \"y\"]] " ] }, { @@ -483,7 +597,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -574,7 +688,7 @@ "4 E 50 4 m 12 99" ] }, - "execution_count": 15, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -585,7 +699,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -634,19 +748,18 @@ "1 50 4" ] }, - "execution_count": 17, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# your answer here\n", - "query = \"\"\"\n", - " SELECT x, y\n", + "qry(\"\"\"\n", + " SELECT x,y\n", " FROM trees\n", " WHERE priority > 90\n", - "\"\"\"\n", - "qry(query)" + "\"\"\")" ] }, { @@ -659,16 +772,109 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>tree</th>\n", + " <th>x</th>\n", + " <th>y</th>\n", + " <th>species</th>\n", + " <th>diameter</th>\n", + " <th>priority</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>A</td>\n", + " <td>10</td>\n", + " <td>4</td>\n", + " <td>m</td>\n", + " <td>8</td>\n", + " <td>71</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>B</td>\n", + " <td>20</td>\n", + " <td>4</td>\n", + " <td>m</td>\n", + " <td>10</td>\n", + " <td>100</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>C</td>\n", + " <td>30</td>\n", + " <td>4</td>\n", + " <td>p</td>\n", + " <td>6</td>\n", + " <td>30</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>D</td>\n", + " <td>40</td>\n", + " <td>4</td>\n", + " <td>p</td>\n", + " <td>8</td>\n", + " <td>40</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>E</td>\n", + " <td>50</td>\n", + " <td>4</td>\n", + " <td>m</td>\n", + " <td>12</td>\n", + " <td>99</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " tree x y species diameter priority\n", + "0 A 10 4 m 8 71\n", + "1 B 20 4 m 10 100\n", + "2 C 30 4 p 6 30\n", + "3 D 40 4 p 8 40\n", + "4 E 50 4 m 12 99" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "trees" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -719,7 +925,7 @@ "2 54" ] }, - "execution_count": 18, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -732,12 +938,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 2b. *Convert* the query into an equivalent *pandas* statement." + "### 2b. *Convert* the querry into an equivalent *pandas* statement." ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -751,7 +957,7 @@ "Name: x, dtype: int64" ] }, - "execution_count": 19, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -763,7 +969,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -775,7 +981,7 @@ "Name: x, dtype: int64" ] }, - "execution_count": 20, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -787,7 +993,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -799,19 +1005,19 @@ "Name: y, dtype: int64" ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Do the same for y \n", - "trees[\"y\"] [ trees[\"species\"] == 'm']" + "trees[\"y\"] [trees[\"species\"] == 'm']" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -823,7 +1029,7 @@ "dtype: int64" ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -836,7 +1042,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -887,7 +1093,7 @@ "4 54" ] }, - "execution_count": 23, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -909,67 +1115,16 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>code</th>\n", - " <th>species</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>m</td>\n", - " <td>maple</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>p</td>\n", - " <td>pine</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " code species\n", - "0 m maple\n", - "1 p pine" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "species" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -978,7 +1133,7 @@ "'m'" ] }, - "execution_count": 25, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -989,7 +1144,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -1000,7 +1155,7 @@ "Name: code, dtype: object" ] }, - "execution_count": 29, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1012,7 +1167,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -1022,7 +1177,7 @@ "Name: code, dtype: object" ] }, - "execution_count": 30, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -1034,7 +1189,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -1043,7 +1198,7 @@ "'m'" ] }, - "execution_count": 31, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -1055,7 +1210,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -1064,7 +1219,7 @@ "'m'" ] }, - "execution_count": 32, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -1076,7 +1231,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -1128,24 +1283,6 @@ " <td>100</td>\n", " </tr>\n", " <tr>\n", - " <th>2</th>\n", - " <td>C</td>\n", - " <td>30</td>\n", - " <td>4</td>\n", - " <td>p</td>\n", - " <td>6</td>\n", - " <td>30</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>D</td>\n", - " <td>40</td>\n", - " <td>4</td>\n", - " <td>p</td>\n", - " <td>8</td>\n", - " <td>40</td>\n", - " </tr>\n", - " <tr>\n", " <th>4</th>\n", " <td>E</td>\n", " <td>50</td>\n", @@ -1162,24 +1299,22 @@ " tree x y species diameter priority\n", "0 A 10 4 m 8 71\n", "1 B 20 4 m 10 100\n", - "2 C 30 4 p 6 30\n", - "3 D 40 4 p 8 40\n", "4 E 50 4 m 12 99" ] }, - "execution_count": 33, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# DataFrame with Boolean Indexing\n", - "trees" + "trees [trees[\"species\"] == cd]" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -1191,7 +1326,7 @@ "Name: tree, dtype: object" ] }, - "execution_count": 34, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -1217,7 +1352,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -1258,7 +1393,7 @@ "0 m" ] }, - "execution_count": 35, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1269,7 +1404,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -1279,7 +1414,7 @@ "Name: code, dtype: object" ] }, - "execution_count": 36, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -1291,7 +1426,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -1300,21 +1435,21 @@ "'m'" ] }, - "execution_count": 37, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# DataFrame with column selection get the value at iloc 0\n", - "cd = qry(\"select code from species where species = 'maple' \") ['code'] .iloc[0]\n", + "cd = qry(\"select code from species where species = 'maple' \") ['code'] .iloc[0]\n", "cd\n", "\n" ] }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -1365,7 +1500,7 @@ "2 E" ] }, - "execution_count": 38, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1377,7 +1512,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -1428,7 +1563,7 @@ "2 E" ] }, - "execution_count": 39, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1448,7 +1583,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 38, "metadata": {}, "outputs": [ { @@ -1509,7 +1644,7 @@ "4 p" ] }, - "execution_count": 40, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1525,40 +1660,6 @@ "### 4.b *Convert* the query code to *Pandas*" ] }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1 m\n", - "4 m\n", - "0 m\n", - "3 p\n", - "2 p\n", - "Name: species, dtype: object" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# DataFrame soted by priority # with column selection\n", - "trees.sort_values(\"priority\", ascending = False) ['species']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "----\n", - "### 5a. *Predict* the output of the following code" - ] - }, { "cell_type": "code", "execution_count": 43, @@ -1585,71 +1686,41 @@ " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", - " <th>tree</th>\n", - " <th>x</th>\n", - " <th>y</th>\n", " <th>species</th>\n", - " <th>diameter</th>\n", - " <th>priority</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", - " <th>0</th>\n", - " <td>A</td>\n", - " <td>10</td>\n", - " <td>4</td>\n", + " <th>1</th>\n", " <td>m</td>\n", - " <td>8</td>\n", - " <td>71</td>\n", " </tr>\n", " <tr>\n", - " <th>1</th>\n", - " <td>B</td>\n", - " <td>20</td>\n", - " <td>4</td>\n", + " <th>4</th>\n", " <td>m</td>\n", - " <td>10</td>\n", - " <td>100</td>\n", " </tr>\n", " <tr>\n", - " <th>2</th>\n", - " <td>C</td>\n", - " <td>30</td>\n", - " <td>4</td>\n", - " <td>p</td>\n", - " <td>6</td>\n", - " <td>30</td>\n", + " <th>0</th>\n", + " <td>m</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>D</td>\n", - " <td>40</td>\n", - " <td>4</td>\n", " <td>p</td>\n", - " <td>8</td>\n", - " <td>40</td>\n", " </tr>\n", " <tr>\n", - " <th>4</th>\n", - " <td>E</td>\n", - " <td>50</td>\n", - " <td>4</td>\n", - " <td>m</td>\n", - " <td>12</td>\n", - " <td>99</td>\n", + " <th>2</th>\n", + " <td>p</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ - " tree x y species diameter priority\n", - "0 A 10 4 m 8 71\n", - "1 B 20 4 m 10 100\n", - "2 C 30 4 p 6 30\n", - "3 D 40 4 p 8 40\n", - "4 E 50 4 m 12 99" + " species\n", + "1 m\n", + "4 m\n", + "0 m\n", + "3 p\n", + "2 p" ] }, "execution_count": 43, @@ -1657,6 +1728,24 @@ "output_type": "execute_result" } ], + "source": [ + "# DataFrame sorted by priority # with column selection\n", + "trees[['species','priority']].sort_values(by= 'priority', ascending=False)[['species']]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "### 5a. *Predict* the output of the following code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "trees" ] @@ -1916,7 +2005,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 51, "metadata": {}, "outputs": [ { @@ -1927,19 +2016,19 @@ "Name: 1, dtype: object" ] }, - "execution_count": 50, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# DataFrame # grab the first row #slicing by certain indices\n", - "trees.sort_values(\"priority\", ascending=False) .iloc[0] [[\"tree\", \"priority\"]]" + "# DataFrame # grab the first row #slicing by certain indices\n", + "trees.sort_values(\"priority\", ascending=False).iloc[0][[\"tree\",\"priority\"]]" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 52, "metadata": {}, "outputs": [ { @@ -1948,14 +2037,14 @@ "['B', 100]" ] }, - "execution_count": 51, + "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# list gets the values only\n", - "list(trees.sort_values(\"priority\", ascending=False) .iloc[0] [[\"tree\", \"priority\"]])" + "list(trees.sort_values(\"priority\", ascending=False).iloc[0][[\"tree\",\"priority\"]])" ] }, { @@ -1968,7 +2057,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -2042,7 +2131,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 54, "metadata": {}, "outputs": [ { @@ -2051,7 +2140,7 @@ "5" ] }, - "execution_count": 55, + "execution_count": 54, "metadata": {}, "output_type": "execute_result" } @@ -2064,7 +2153,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 55, "metadata": {}, "outputs": [ { @@ -2075,7 +2164,7 @@ "Name: species, dtype: int64" ] }, - "execution_count": 57, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -2087,7 +2176,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 56, "metadata": {}, "outputs": [ { @@ -2096,19 +2185,19 @@ "2" ] }, - "execution_count": 58, + "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "c2 = len(trees['species'].value_counts())\n", + "c2 = len( trees['species'].value_counts() )\n", "c2" ] }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 57, "metadata": {}, "outputs": [ { @@ -2117,7 +2206,7 @@ "[5, 2]" ] }, - "execution_count": 59, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } @@ -2129,7 +2218,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 58, "metadata": {}, "outputs": [ { @@ -2172,7 +2261,7 @@ "0 5 2" ] }, - "execution_count": 60, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" } @@ -2205,7 +2294,109 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>tree</th>\n", + " <th>x</th>\n", + " <th>y</th>\n", + " <th>species</th>\n", + " <th>diameter</th>\n", + " <th>priority</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>A</td>\n", + " <td>10</td>\n", + " <td>4</td>\n", + " <td>m</td>\n", + " <td>8</td>\n", + " <td>71</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>B</td>\n", + " <td>20</td>\n", + " <td>4</td>\n", + " <td>m</td>\n", + " <td>10</td>\n", + " <td>100</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>C</td>\n", + " <td>30</td>\n", + " <td>4</td>\n", + " <td>p</td>\n", + " <td>6</td>\n", + " <td>30</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>D</td>\n", + " <td>40</td>\n", + " <td>4</td>\n", + " <td>p</td>\n", + " <td>8</td>\n", + " <td>40</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>E</td>\n", + " <td>50</td>\n", + " <td>4</td>\n", + " <td>m</td>\n", + " <td>12</td>\n", + " <td>99</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " tree x y species diameter priority\n", + "0 A 10 4 m 8 71\n", + "1 B 20 4 m 10 100\n", + "2 C 30 4 p 6 30\n", + "3 D 40 4 p 8 40\n", + "4 E 50 4 m 12 99" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trees" + ] + }, + { + "cell_type": "code", + "execution_count": 60, "metadata": {}, "outputs": [ { @@ -2257,7 +2448,7 @@ "1 p 2 7.0" ] }, - "execution_count": 61, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } @@ -2278,7 +2469,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 61, "metadata": {}, "outputs": [ { @@ -2287,7 +2478,7 @@ "['m', 'p']" ] }, - "execution_count": 62, + "execution_count": 61, "metadata": {}, "output_type": "execute_result" } @@ -2300,14 +2491,14 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/var/folders/07/v8h5vw9j6v71xlj9rgbrh5h40000gn/T/ipykernel_17964/363807400.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n", + "/var/folders/07/v8h5vw9j6v71xlj9rgbrh5h40000gn/T/ipykernel_22914/363807400.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n", " trees.groupby(\"species\").mean()\n" ] }, @@ -2371,7 +2562,7 @@ "p 35.000000 4.0 7.0 35.0" ] }, - "execution_count": 63, + "execution_count": 62, "metadata": {}, "output_type": "execute_result" } @@ -2382,14 +2573,14 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 63, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/var/folders/07/v8h5vw9j6v71xlj9rgbrh5h40000gn/T/ipykernel_17964/3042628405.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n", + "/var/folders/07/v8h5vw9j6v71xlj9rgbrh5h40000gn/T/ipykernel_22914/3042628405.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n", " size_list = list(trees.groupby(\"species\").mean()[\"diameter\"])\n" ] }, @@ -2399,7 +2590,7 @@ "[10.0, 7.0]" ] }, - "execution_count": 64, + "execution_count": 63, "metadata": {}, "output_type": "execute_result" } @@ -2412,7 +2603,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 64, "metadata": {}, "outputs": [ { @@ -2421,7 +2612,7 @@ "[3, 2]" ] }, - "execution_count": 65, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } @@ -2434,7 +2625,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 65, "metadata": {}, "outputs": [ { @@ -2486,7 +2677,7 @@ "1 p 2 7.0" ] }, - "execution_count": 66, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" } @@ -2498,6 +2689,15 @@ " \"size\": size_list})" ] }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "conn.close()" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/sum23/lecture_materials/21_Advanced_Pandas/lec_21_pandas3_data_transformation_notes.ipynb b/sum23/lecture_materials/21_Advanced_Pandas/lec_21_pandas3_data_transformation_notes.ipynb index 2a9fa0eef8be35cf2cffa26b0621ba420fbe4a31..aa9b274768af69ea352e49760cd763a9e6c0dc6b 100644 --- a/sum23/lecture_materials/21_Advanced_Pandas/lec_21_pandas3_data_transformation_notes.ipynb +++ b/sum23/lecture_materials/21_Advanced_Pandas/lec_21_pandas3_data_transformation_notes.ipynb @@ -179,7 +179,7 @@ } ], "source": [ - "df = qry(\"SELECT * from sqlite_master\")\n", + "df = qry(\"SELECT * FROM sqlite_master\")\n", "df" ] }, @@ -192,7 +192,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -233,7 +233,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -565,13 +565,13 @@ "[35877 rows x 17 columns]" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df = qry(\"SELECT * from spotify\")\n", + "df = qry(\"SELECT * FROM spotify\")\n", "df" ] }, @@ -588,7 +588,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -924,7 +924,7 @@ "[35877 rows x 16 columns]" ] }, - "execution_count": 9, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -955,7 +955,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -1265,7 +1265,7 @@ "6xEnbXM1us9fDJy2LC0lru -5.431 0.0895 0.0797 " ] }, - "execution_count": 10, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -1289,7 +1289,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1316,7 +1316,7 @@ "Name: song_name, Length: 35877, dtype: bool" ] }, - "execution_count": 12, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -1336,7 +1336,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1353,7 +1353,7 @@ "Name: song_name, dtype: int64" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1374,7 +1374,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1559,7 +1559,7 @@ " <tr>\n", " <th>46bXU7Sgj7104ZoXxzz9tM</th>\n", " <td>Euphoric Hardstyle</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>hardstyle</td>\n", " <td>269208</td>\n", " <td>4</td>\n", @@ -1578,7 +1578,7 @@ " <tr>\n", " <th>0he2ViGMUO3ajKTxLOfWVT</th>\n", " <td>Greatest Hardstyle Playlist</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>hardstyle</td>\n", " <td>210112</td>\n", " <td>0</td>\n", @@ -1597,7 +1597,7 @@ " <tr>\n", " <th>72DAt9Lbpy9EUS29OzQLob</th>\n", " <td>Best of Hardstyle 2020</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>hardstyle</td>\n", " <td>234823</td>\n", " <td>8</td>\n", @@ -1616,7 +1616,7 @@ " <tr>\n", " <th>6HXgExFVuE1c3cq9QjFCcU</th>\n", " <td>Euphoric Hardstyle</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>hardstyle</td>\n", " <td>323200</td>\n", " <td>6</td>\n", @@ -1635,7 +1635,7 @@ " <tr>\n", " <th>6MAAMZImxcvYhRnxDLTufD</th>\n", " <td>Best of Hardstyle 2020</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>hardstyle</td>\n", " <td>162161</td>\n", " <td>9</td>\n", @@ -1671,77 +1671,77 @@ "6HXgExFVuE1c3cq9QjFCcU Euphoric Hardstyle \n", "6MAAMZImxcvYhRnxDLTufD Best of Hardstyle 2020 \n", "\n", - " song_name genre \\\n", - "id \n", - "7pgJBLVz5VmnL7uGHmRj6p Pathology Dark Trap \n", - "0vSWgAlfpye0WCGeNmuNhy Symbiote Dark Trap \n", - "7EL7ifncK2PWFYThJjzR25 BRAINFOOD Dark Trap \n", - "1umsRbM7L4ju7rn9aU8Ju6 Sacrifice Dark Trap \n", - "4SKqOHKYU5pgHr5UiVKiQN Backpack Dark Trap \n", - "... ... ... \n", - "46bXU7Sgj7104ZoXxzz9tM This song does not have a name hardstyle \n", - "0he2ViGMUO3ajKTxLOfWVT This song does not have a name hardstyle \n", - "72DAt9Lbpy9EUS29OzQLob This song does not have a name hardstyle \n", - "6HXgExFVuE1c3cq9QjFCcU This song does not have a name hardstyle \n", - "6MAAMZImxcvYhRnxDLTufD This song does not have a name hardstyle \n", + " song_name genre duration_ms \\\n", + "id \n", + "7pgJBLVz5VmnL7uGHmRj6p Pathology Dark Trap 224427 \n", + "0vSWgAlfpye0WCGeNmuNhy Symbiote Dark Trap 98821 \n", + "7EL7ifncK2PWFYThJjzR25 BRAINFOOD Dark Trap 101172 \n", + "1umsRbM7L4ju7rn9aU8Ju6 Sacrifice Dark Trap 96062 \n", + "4SKqOHKYU5pgHr5UiVKiQN Backpack Dark Trap 135079 \n", + "... ... ... ... \n", + "46bXU7Sgj7104ZoXxzz9tM This song doesn't have a name hardstyle 269208 \n", + "0he2ViGMUO3ajKTxLOfWVT This song doesn't have a name hardstyle 210112 \n", + "72DAt9Lbpy9EUS29OzQLob This song doesn't have a name hardstyle 234823 \n", + "6HXgExFVuE1c3cq9QjFCcU This song doesn't have a name hardstyle 323200 \n", + "6MAAMZImxcvYhRnxDLTufD This song doesn't have a name hardstyle 162161 \n", "\n", - " duration_ms key mode time_signature tempo \\\n", - "id \n", - "7pgJBLVz5VmnL7uGHmRj6p 224427 8 1 4 115.080 \n", - "0vSWgAlfpye0WCGeNmuNhy 98821 5 1 4 218.050 \n", - "7EL7ifncK2PWFYThJjzR25 101172 8 1 4 189.938 \n", - "1umsRbM7L4ju7rn9aU8Ju6 96062 10 0 4 139.990 \n", - "4SKqOHKYU5pgHr5UiVKiQN 135079 5 1 4 128.014 \n", - "... ... ... ... ... ... \n", - "46bXU7Sgj7104ZoXxzz9tM 269208 4 1 4 150.013 \n", - "0he2ViGMUO3ajKTxLOfWVT 210112 0 0 4 149.928 \n", - "72DAt9Lbpy9EUS29OzQLob 234823 8 1 4 154.935 \n", - "6HXgExFVuE1c3cq9QjFCcU 323200 6 0 4 150.042 \n", - "6MAAMZImxcvYhRnxDLTufD 162161 9 1 4 155.047 \n", + " key mode time_signature tempo acousticness \\\n", + "id \n", + "7pgJBLVz5VmnL7uGHmRj6p 8 1 4 115.080 0.401000 \n", + "0vSWgAlfpye0WCGeNmuNhy 5 1 4 218.050 0.013800 \n", + "7EL7ifncK2PWFYThJjzR25 8 1 4 189.938 0.187000 \n", + "1umsRbM7L4ju7rn9aU8Ju6 10 0 4 139.990 0.145000 \n", + "4SKqOHKYU5pgHr5UiVKiQN 5 1 4 128.014 0.007700 \n", + "... ... ... ... ... ... \n", + "46bXU7Sgj7104ZoXxzz9tM 4 1 4 150.013 0.031500 \n", + "0he2ViGMUO3ajKTxLOfWVT 0 0 4 149.928 0.022500 \n", + "72DAt9Lbpy9EUS29OzQLob 8 1 4 154.935 0.026000 \n", + "6HXgExFVuE1c3cq9QjFCcU 6 0 4 150.042 0.000551 \n", + "6MAAMZImxcvYhRnxDLTufD 9 1 4 155.047 0.001890 \n", "\n", - " acousticness danceability energy instrumentalness \\\n", - "id \n", - "7pgJBLVz5VmnL7uGHmRj6p 0.401000 0.719 0.493 0.000000 \n", - "0vSWgAlfpye0WCGeNmuNhy 0.013800 0.850 0.893 0.000004 \n", - "7EL7ifncK2PWFYThJjzR25 0.187000 0.864 0.365 0.000000 \n", - "1umsRbM7L4ju7rn9aU8Ju6 0.145000 0.767 0.576 0.000003 \n", - "4SKqOHKYU5pgHr5UiVKiQN 0.007700 0.765 0.726 0.000000 \n", - "... ... ... ... ... \n", - "46bXU7Sgj7104ZoXxzz9tM 0.031500 0.528 0.693 0.000345 \n", - "0he2ViGMUO3ajKTxLOfWVT 0.022500 0.517 0.768 0.000018 \n", - "72DAt9Lbpy9EUS29OzQLob 0.026000 0.361 0.821 0.000242 \n", - "6HXgExFVuE1c3cq9QjFCcU 0.000551 0.477 0.921 0.029600 \n", - "6MAAMZImxcvYhRnxDLTufD 0.001890 0.529 0.945 0.000055 \n", + " danceability energy instrumentalness liveness \\\n", + "id \n", + "7pgJBLVz5VmnL7uGHmRj6p 0.719 0.493 0.000000 0.1180 \n", + "0vSWgAlfpye0WCGeNmuNhy 0.850 0.893 0.000004 0.3720 \n", + "7EL7ifncK2PWFYThJjzR25 0.864 0.365 0.000000 0.1160 \n", + "1umsRbM7L4ju7rn9aU8Ju6 0.767 0.576 0.000003 0.0968 \n", + "4SKqOHKYU5pgHr5UiVKiQN 0.765 0.726 0.000000 0.6190 \n", + "... ... ... ... ... \n", + "46bXU7Sgj7104ZoXxzz9tM 0.528 0.693 0.000345 0.1210 \n", + "0he2ViGMUO3ajKTxLOfWVT 0.517 0.768 0.000018 0.2050 \n", + "72DAt9Lbpy9EUS29OzQLob 0.361 0.821 0.000242 0.3850 \n", + "6HXgExFVuE1c3cq9QjFCcU 0.477 0.921 0.029600 0.0575 \n", + "6MAAMZImxcvYhRnxDLTufD 0.529 0.945 0.000055 0.4140 \n", "\n", - " liveness loudness speechiness valence \n", - "id \n", - "7pgJBLVz5VmnL7uGHmRj6p 0.1180 -7.230 0.0794 0.1240 \n", - "0vSWgAlfpye0WCGeNmuNhy 0.3720 -4.783 0.0623 0.0391 \n", - "7EL7ifncK2PWFYThJjzR25 0.1160 -10.219 0.0655 0.0478 \n", - "1umsRbM7L4ju7rn9aU8Ju6 0.0968 -9.683 0.2560 0.1870 \n", - "4SKqOHKYU5pgHr5UiVKiQN 0.6190 -5.580 0.1910 0.2700 \n", - "... ... ... ... ... \n", - "46bXU7Sgj7104ZoXxzz9tM 0.1210 -5.148 0.0304 0.3940 \n", - "0he2ViGMUO3ajKTxLOfWVT 0.2050 -7.922 0.0479 0.3830 \n", - "72DAt9Lbpy9EUS29OzQLob 0.3850 -3.102 0.0505 0.1240 \n", - "6HXgExFVuE1c3cq9QjFCcU 0.0575 -4.777 0.0392 0.4880 \n", - "6MAAMZImxcvYhRnxDLTufD 0.4140 -5.862 0.0615 0.1340 \n", + " loudness speechiness valence \n", + "id \n", + "7pgJBLVz5VmnL7uGHmRj6p -7.230 0.0794 0.1240 \n", + "0vSWgAlfpye0WCGeNmuNhy -4.783 0.0623 0.0391 \n", + "7EL7ifncK2PWFYThJjzR25 -10.219 0.0655 0.0478 \n", + "1umsRbM7L4ju7rn9aU8Ju6 -9.683 0.2560 0.1870 \n", + "4SKqOHKYU5pgHr5UiVKiQN -5.580 0.1910 0.2700 \n", + "... ... ... ... \n", + "46bXU7Sgj7104ZoXxzz9tM -5.148 0.0304 0.3940 \n", + "0he2ViGMUO3ajKTxLOfWVT -7.922 0.0479 0.3830 \n", + "72DAt9Lbpy9EUS29OzQLob -3.102 0.0505 0.1240 \n", + "6HXgExFVuE1c3cq9QjFCcU -4.777 0.0392 0.4880 \n", + "6MAAMZImxcvYhRnxDLTufD -5.862 0.0615 0.1340 \n", "\n", "[35877 rows x 16 columns]" ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# use .fillna to replace missing values\n", - "df[\"song_name\"].fillna(\"This song does not have a name\")\n", + "df[\"song_name\"].fillna(\"This song doesn't have a name\")\n", "\n", "# to replace the original DataFrame's column, you need to explicitly update that object instance\n", "# TODO: uncomment the below lines and update the code\n", - "df[\"song_name\"] = df[\"song_name\"].fillna(\"This song does not have a name\")\n", + "df[\"song_name\"] = df[\"song_name\"].fillna(\"This song doesn't have a name\")\n", "df" ] }, @@ -1756,7 +1756,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1828,7 +1828,7 @@ " <tr>\n", " <th>5LzAV6KfjN8VhWCedeygfY</th>\n", " <td>Dirtybird Players</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>techhouse</td>\n", " <td>197499</td>\n", " <td>7</td>\n", @@ -1847,7 +1847,7 @@ " <tr>\n", " <th>3TsCb6ueD678XBJDiRrvhr</th>\n", " <td>tech house</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>techhouse</td>\n", " <td>206000</td>\n", " <td>10</td>\n", @@ -1866,7 +1866,7 @@ " <tr>\n", " <th>6Y0Fy2buEis7bEOlG0QET1</th>\n", " <td>Tech House Bangerz</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>techhouse</td>\n", " <td>199839</td>\n", " <td>4</td>\n", @@ -1885,7 +1885,7 @@ " <tr>\n", " <th>4EJI2XGViSQp6WscLKgYDD</th>\n", " <td>tech house</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>techhouse</td>\n", " <td>173861</td>\n", " <td>8</td>\n", @@ -1904,7 +1904,7 @@ " <tr>\n", " <th>4x6VzOQTLIrkkCWcDPh5Y0</th>\n", " <td>blanc | Tech House</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>techhouse</td>\n", " <td>394960</td>\n", " <td>8</td>\n", @@ -1942,7 +1942,7 @@ " <tr>\n", " <th>46bXU7Sgj7104ZoXxzz9tM</th>\n", " <td>Euphoric Hardstyle</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>hardstyle</td>\n", " <td>269208</td>\n", " <td>4</td>\n", @@ -1961,7 +1961,7 @@ " <tr>\n", " <th>0he2ViGMUO3ajKTxLOfWVT</th>\n", " <td>Greatest Hardstyle Playlist</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>hardstyle</td>\n", " <td>210112</td>\n", " <td>0</td>\n", @@ -1980,7 +1980,7 @@ " <tr>\n", " <th>72DAt9Lbpy9EUS29OzQLob</th>\n", " <td>Best of Hardstyle 2020</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>hardstyle</td>\n", " <td>234823</td>\n", " <td>8</td>\n", @@ -1999,7 +1999,7 @@ " <tr>\n", " <th>6HXgExFVuE1c3cq9QjFCcU</th>\n", " <td>Euphoric Hardstyle</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>hardstyle</td>\n", " <td>323200</td>\n", " <td>6</td>\n", @@ -2018,7 +2018,7 @@ " <tr>\n", " <th>6MAAMZImxcvYhRnxDLTufD</th>\n", " <td>Best of Hardstyle 2020</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>hardstyle</td>\n", " <td>162161</td>\n", " <td>9</td>\n", @@ -2054,66 +2054,66 @@ "6HXgExFVuE1c3cq9QjFCcU Euphoric Hardstyle \n", "6MAAMZImxcvYhRnxDLTufD Best of Hardstyle 2020 \n", "\n", - " song_name genre \\\n", - "id \n", - "5LzAV6KfjN8VhWCedeygfY This song does not have a name techhouse \n", - "3TsCb6ueD678XBJDiRrvhr This song does not have a name techhouse \n", - "6Y0Fy2buEis7bEOlG0QET1 This song does not have a name techhouse \n", - "4EJI2XGViSQp6WscLKgYDD This song does not have a name techhouse \n", - "4x6VzOQTLIrkkCWcDPh5Y0 This song does not have a name techhouse \n", - "... ... ... \n", - "46bXU7Sgj7104ZoXxzz9tM This song does not have a name hardstyle \n", - "0he2ViGMUO3ajKTxLOfWVT This song does not have a name hardstyle \n", - "72DAt9Lbpy9EUS29OzQLob This song does not have a name hardstyle \n", - "6HXgExFVuE1c3cq9QjFCcU This song does not have a name hardstyle \n", - "6MAAMZImxcvYhRnxDLTufD This song does not have a name hardstyle \n", + " song_name genre duration_ms \\\n", + "id \n", + "5LzAV6KfjN8VhWCedeygfY This song doesn't have a name techhouse 197499 \n", + "3TsCb6ueD678XBJDiRrvhr This song doesn't have a name techhouse 206000 \n", + "6Y0Fy2buEis7bEOlG0QET1 This song doesn't have a name techhouse 199839 \n", + "4EJI2XGViSQp6WscLKgYDD This song doesn't have a name techhouse 173861 \n", + "4x6VzOQTLIrkkCWcDPh5Y0 This song doesn't have a name techhouse 394960 \n", + "... ... ... ... \n", + "46bXU7Sgj7104ZoXxzz9tM This song doesn't have a name hardstyle 269208 \n", + "0he2ViGMUO3ajKTxLOfWVT This song doesn't have a name hardstyle 210112 \n", + "72DAt9Lbpy9EUS29OzQLob This song doesn't have a name hardstyle 234823 \n", + "6HXgExFVuE1c3cq9QjFCcU This song doesn't have a name hardstyle 323200 \n", + "6MAAMZImxcvYhRnxDLTufD This song doesn't have a name hardstyle 162161 \n", "\n", - " duration_ms key mode time_signature tempo \\\n", - "id \n", - "5LzAV6KfjN8VhWCedeygfY 197499 7 1 4 127.997 \n", - "3TsCb6ueD678XBJDiRrvhr 206000 10 1 4 124.994 \n", - "6Y0Fy2buEis7bEOlG0QET1 199839 4 0 4 124.006 \n", - "4EJI2XGViSQp6WscLKgYDD 173861 8 1 4 125.031 \n", - "4x6VzOQTLIrkkCWcDPh5Y0 394960 8 0 4 127.029 \n", - "... ... ... ... ... ... \n", - "46bXU7Sgj7104ZoXxzz9tM 269208 4 1 4 150.013 \n", - "0he2ViGMUO3ajKTxLOfWVT 210112 0 0 4 149.928 \n", - "72DAt9Lbpy9EUS29OzQLob 234823 8 1 4 154.935 \n", - "6HXgExFVuE1c3cq9QjFCcU 323200 6 0 4 150.042 \n", - "6MAAMZImxcvYhRnxDLTufD 162161 9 1 4 155.047 \n", + " key mode time_signature tempo acousticness \\\n", + "id \n", + "5LzAV6KfjN8VhWCedeygfY 7 1 4 127.997 0.000957 \n", + "3TsCb6ueD678XBJDiRrvhr 10 1 4 124.994 0.062300 \n", + "6Y0Fy2buEis7bEOlG0QET1 4 0 4 124.006 0.019100 \n", + "4EJI2XGViSQp6WscLKgYDD 8 1 4 125.031 0.053000 \n", + "4x6VzOQTLIrkkCWcDPh5Y0 8 0 4 127.029 0.000301 \n", + "... ... ... ... ... ... \n", + "46bXU7Sgj7104ZoXxzz9tM 4 1 4 150.013 0.031500 \n", + "0he2ViGMUO3ajKTxLOfWVT 0 0 4 149.928 0.022500 \n", + "72DAt9Lbpy9EUS29OzQLob 8 1 4 154.935 0.026000 \n", + "6HXgExFVuE1c3cq9QjFCcU 6 0 4 150.042 0.000551 \n", + "6MAAMZImxcvYhRnxDLTufD 9 1 4 155.047 0.001890 \n", "\n", - " acousticness danceability energy instrumentalness \\\n", - "id \n", - "5LzAV6KfjN8VhWCedeygfY 0.000957 0.806 0.950 0.920000 \n", - "3TsCb6ueD678XBJDiRrvhr 0.062300 0.729 0.978 0.908000 \n", - "6Y0Fy2buEis7bEOlG0QET1 0.019100 0.724 0.792 0.812000 \n", - "4EJI2XGViSQp6WscLKgYDD 0.053000 0.700 0.898 0.418000 \n", - "4x6VzOQTLIrkkCWcDPh5Y0 0.000301 0.803 0.919 0.926000 \n", - "... ... ... ... ... \n", - "46bXU7Sgj7104ZoXxzz9tM 0.031500 0.528 0.693 0.000345 \n", - "0he2ViGMUO3ajKTxLOfWVT 0.022500 0.517 0.768 0.000018 \n", - "72DAt9Lbpy9EUS29OzQLob 0.026000 0.361 0.821 0.000242 \n", - "6HXgExFVuE1c3cq9QjFCcU 0.000551 0.477 0.921 0.029600 \n", - "6MAAMZImxcvYhRnxDLTufD 0.001890 0.529 0.945 0.000055 \n", + " danceability energy instrumentalness liveness \\\n", + "id \n", + "5LzAV6KfjN8VhWCedeygfY 0.806 0.950 0.920000 0.1130 \n", + "3TsCb6ueD678XBJDiRrvhr 0.729 0.978 0.908000 0.0353 \n", + "6Y0Fy2buEis7bEOlG0QET1 0.724 0.792 0.812000 0.1080 \n", + "4EJI2XGViSQp6WscLKgYDD 0.700 0.898 0.418000 0.5740 \n", + "4x6VzOQTLIrkkCWcDPh5Y0 0.803 0.919 0.926000 0.1020 \n", + "... ... ... ... ... \n", + "46bXU7Sgj7104ZoXxzz9tM 0.528 0.693 0.000345 0.1210 \n", + "0he2ViGMUO3ajKTxLOfWVT 0.517 0.768 0.000018 0.2050 \n", + "72DAt9Lbpy9EUS29OzQLob 0.361 0.821 0.000242 0.3850 \n", + "6HXgExFVuE1c3cq9QjFCcU 0.477 0.921 0.029600 0.0575 \n", + "6MAAMZImxcvYhRnxDLTufD 0.529 0.945 0.000055 0.4140 \n", "\n", - " liveness loudness speechiness valence \n", - "id \n", - "5LzAV6KfjN8VhWCedeygfY 0.1130 -6.782 0.0811 0.580 \n", - "3TsCb6ueD678XBJDiRrvhr 0.0353 -6.645 0.0420 0.778 \n", - "6Y0Fy2buEis7bEOlG0QET1 0.1080 -8.555 0.0405 0.346 \n", - "4EJI2XGViSQp6WscLKgYDD 0.5740 -6.099 0.2570 0.791 \n", - "4x6VzOQTLIrkkCWcDPh5Y0 0.1020 -8.667 0.0702 0.754 \n", - "... ... ... ... ... \n", - "46bXU7Sgj7104ZoXxzz9tM 0.1210 -5.148 0.0304 0.394 \n", - "0he2ViGMUO3ajKTxLOfWVT 0.2050 -7.922 0.0479 0.383 \n", - "72DAt9Lbpy9EUS29OzQLob 0.3850 -3.102 0.0505 0.124 \n", - "6HXgExFVuE1c3cq9QjFCcU 0.0575 -4.777 0.0392 0.488 \n", - "6MAAMZImxcvYhRnxDLTufD 0.4140 -5.862 0.0615 0.134 \n", + " loudness speechiness valence \n", + "id \n", + "5LzAV6KfjN8VhWCedeygfY -6.782 0.0811 0.580 \n", + "3TsCb6ueD678XBJDiRrvhr -6.645 0.0420 0.778 \n", + "6Y0Fy2buEis7bEOlG0QET1 -8.555 0.0405 0.346 \n", + "4EJI2XGViSQp6WscLKgYDD -6.099 0.2570 0.791 \n", + "4x6VzOQTLIrkkCWcDPh5Y0 -8.667 0.0702 0.754 \n", + "... ... ... ... \n", + "46bXU7Sgj7104ZoXxzz9tM -5.148 0.0304 0.394 \n", + "0he2ViGMUO3ajKTxLOfWVT -7.922 0.0479 0.383 \n", + "72DAt9Lbpy9EUS29OzQLob -3.102 0.0505 0.124 \n", + "6HXgExFVuE1c3cq9QjFCcU -4.777 0.0392 0.488 \n", + "6MAAMZImxcvYhRnxDLTufD -5.862 0.0615 0.134 \n", "\n", "[17529 rows x 16 columns]" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -2123,212 +2123,6 @@ "df.dropna()" ] }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>title</th>\n", - " <th>song_name</th>\n", - " <th>genre</th>\n", - " <th>duration_ms</th>\n", - " <th>key</th>\n", - " <th>mode</th>\n", - " <th>time_signature</th>\n", - " <th>tempo</th>\n", - " <th>acousticness</th>\n", - " <th>danceability</th>\n", - " <th>energy</th>\n", - " <th>instrumentalness</th>\n", - " <th>liveness</th>\n", - " <th>loudness</th>\n", - " <th>speechiness</th>\n", - " <th>valence</th>\n", - " </tr>\n", - " <tr>\n", - " <th>id</th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>7pgJBLVz5VmnL7uGHmRj6p</th>\n", - " <td>NaN</td>\n", - " <td>Pathology</td>\n", - " <td>Dark Trap</td>\n", - " <td>224427</td>\n", - " <td>8</td>\n", - " <td>1</td>\n", - " <td>4</td>\n", - " <td>115.080</td>\n", - " <td>0.4010</td>\n", - " <td>0.719</td>\n", - " <td>0.493</td>\n", - " <td>0.000000</td>\n", - " <td>0.1180</td>\n", - " <td>-7.230</td>\n", - " <td>0.0794</td>\n", - " <td>0.1240</td>\n", - " </tr>\n", - " <tr>\n", - " <th>0vSWgAlfpye0WCGeNmuNhy</th>\n", - " <td>NaN</td>\n", - " <td>Symbiote</td>\n", - " <td>Dark Trap</td>\n", - " <td>98821</td>\n", - " <td>5</td>\n", - " <td>1</td>\n", - " <td>4</td>\n", - " <td>218.050</td>\n", - " <td>0.0138</td>\n", - " <td>0.850</td>\n", - " <td>0.893</td>\n", - " <td>0.000004</td>\n", - " <td>0.3720</td>\n", - " <td>-4.783</td>\n", - " <td>0.0623</td>\n", - " <td>0.0391</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7EL7ifncK2PWFYThJjzR25</th>\n", - " <td>NaN</td>\n", - " <td>BRAINFOOD</td>\n", - " <td>Dark Trap</td>\n", - " <td>101172</td>\n", - " <td>8</td>\n", - " <td>1</td>\n", - " <td>4</td>\n", - " <td>189.938</td>\n", - " <td>0.1870</td>\n", - " <td>0.864</td>\n", - " <td>0.365</td>\n", - " <td>0.000000</td>\n", - " <td>0.1160</td>\n", - " <td>-10.219</td>\n", - " <td>0.0655</td>\n", - " <td>0.0478</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1umsRbM7L4ju7rn9aU8Ju6</th>\n", - " <td>NaN</td>\n", - " <td>Sacrifice</td>\n", - " <td>Dark Trap</td>\n", - " <td>96062</td>\n", - " <td>10</td>\n", - " <td>0</td>\n", - " <td>4</td>\n", - " <td>139.990</td>\n", - " <td>0.1450</td>\n", - " <td>0.767</td>\n", - " <td>0.576</td>\n", - " <td>0.000003</td>\n", - " <td>0.0968</td>\n", - " <td>-9.683</td>\n", - " <td>0.2560</td>\n", - " <td>0.1870</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4SKqOHKYU5pgHr5UiVKiQN</th>\n", - " <td>NaN</td>\n", - " <td>Backpack</td>\n", - " <td>Dark Trap</td>\n", - " <td>135079</td>\n", - " <td>5</td>\n", - " <td>1</td>\n", - " <td>4</td>\n", - " <td>128.014</td>\n", - " <td>0.0077</td>\n", - " <td>0.765</td>\n", - " <td>0.726</td>\n", - " <td>0.000000</td>\n", - " <td>0.6190</td>\n", - " <td>-5.580</td>\n", - " <td>0.1910</td>\n", - " <td>0.2700</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " title song_name genre duration_ms key mode \\\n", - "id \n", - "7pgJBLVz5VmnL7uGHmRj6p NaN Pathology Dark Trap 224427 8 1 \n", - "0vSWgAlfpye0WCGeNmuNhy NaN Symbiote Dark Trap 98821 5 1 \n", - "7EL7ifncK2PWFYThJjzR25 NaN BRAINFOOD Dark Trap 101172 8 1 \n", - "1umsRbM7L4ju7rn9aU8Ju6 NaN Sacrifice Dark Trap 96062 10 0 \n", - "4SKqOHKYU5pgHr5UiVKiQN NaN Backpack Dark Trap 135079 5 1 \n", - "\n", - " time_signature tempo acousticness danceability \\\n", - "id \n", - "7pgJBLVz5VmnL7uGHmRj6p 4 115.080 0.4010 0.719 \n", - "0vSWgAlfpye0WCGeNmuNhy 4 218.050 0.0138 0.850 \n", - "7EL7ifncK2PWFYThJjzR25 4 189.938 0.1870 0.864 \n", - "1umsRbM7L4ju7rn9aU8Ju6 4 139.990 0.1450 0.767 \n", - "4SKqOHKYU5pgHr5UiVKiQN 4 128.014 0.0077 0.765 \n", - "\n", - " energy instrumentalness liveness loudness \\\n", - "id \n", - "7pgJBLVz5VmnL7uGHmRj6p 0.493 0.000000 0.1180 -7.230 \n", - "0vSWgAlfpye0WCGeNmuNhy 0.893 0.000004 0.3720 -4.783 \n", - "7EL7ifncK2PWFYThJjzR25 0.365 0.000000 0.1160 -10.219 \n", - "1umsRbM7L4ju7rn9aU8Ju6 0.576 0.000003 0.0968 -9.683 \n", - "4SKqOHKYU5pgHr5UiVKiQN 0.726 0.000000 0.6190 -5.580 \n", - "\n", - " speechiness valence \n", - "id \n", - "7pgJBLVz5VmnL7uGHmRj6p 0.0794 0.1240 \n", - "0vSWgAlfpye0WCGeNmuNhy 0.0623 0.0391 \n", - "7EL7ifncK2PWFYThJjzR25 0.0655 0.0478 \n", - "1umsRbM7L4ju7rn9aU8Ju6 0.2560 0.1870 \n", - "4SKqOHKYU5pgHr5UiVKiQN 0.1910 0.2700 " - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, { "cell_type": "markdown", "metadata": { @@ -2347,7 +2141,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -2360,7 +2154,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -2381,7 +2175,7 @@ "Name: mode, Length: 35877, dtype: object" ] }, - "execution_count": 20, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -2401,7 +2195,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 19, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -2428,13 +2222,13 @@ "Name: mode, Length: 35877, dtype: object" ] }, - "execution_count": 21, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df[\"mode\"].apply(lambda x: \"major\" if x == 1 else \"minor\")" + "df[\"mode\"].apply( lambda m: \"major\" if m == 1 else \"minor\" )" ] }, { @@ -2447,7 +2241,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -2634,7 +2428,7 @@ " <tr>\n", " <th>46bXU7Sgj7104ZoXxzz9tM</th>\n", " <td>Euphoric Hardstyle</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>hardstyle</td>\n", " <td>269208</td>\n", " <td>4</td>\n", @@ -2654,7 +2448,7 @@ " <tr>\n", " <th>0he2ViGMUO3ajKTxLOfWVT</th>\n", " <td>Greatest Hardstyle Playlist</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>hardstyle</td>\n", " <td>210112</td>\n", " <td>0</td>\n", @@ -2674,7 +2468,7 @@ " <tr>\n", " <th>72DAt9Lbpy9EUS29OzQLob</th>\n", " <td>Best of Hardstyle 2020</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>hardstyle</td>\n", " <td>234823</td>\n", " <td>8</td>\n", @@ -2694,7 +2488,7 @@ " <tr>\n", " <th>6HXgExFVuE1c3cq9QjFCcU</th>\n", " <td>Euphoric Hardstyle</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>hardstyle</td>\n", " <td>323200</td>\n", " <td>6</td>\n", @@ -2714,7 +2508,7 @@ " <tr>\n", " <th>6MAAMZImxcvYhRnxDLTufD</th>\n", " <td>Best of Hardstyle 2020</td>\n", - " <td>This song does not have a name</td>\n", + " <td>This song doesn't have a name</td>\n", " <td>hardstyle</td>\n", " <td>162161</td>\n", " <td>9</td>\n", @@ -2751,66 +2545,66 @@ "6HXgExFVuE1c3cq9QjFCcU Euphoric Hardstyle \n", "6MAAMZImxcvYhRnxDLTufD Best of Hardstyle 2020 \n", "\n", - " song_name genre \\\n", - "id \n", - "7pgJBLVz5VmnL7uGHmRj6p Pathology Dark Trap \n", - "0vSWgAlfpye0WCGeNmuNhy Symbiote Dark Trap \n", - "7EL7ifncK2PWFYThJjzR25 BRAINFOOD Dark Trap \n", - "1umsRbM7L4ju7rn9aU8Ju6 Sacrifice Dark Trap \n", - "4SKqOHKYU5pgHr5UiVKiQN Backpack Dark Trap \n", - "... ... ... \n", - "46bXU7Sgj7104ZoXxzz9tM This song does not have a name hardstyle \n", - "0he2ViGMUO3ajKTxLOfWVT This song does not have a name hardstyle \n", - "72DAt9Lbpy9EUS29OzQLob This song does not have a name hardstyle \n", - "6HXgExFVuE1c3cq9QjFCcU This song does not have a name hardstyle \n", - "6MAAMZImxcvYhRnxDLTufD This song does not have a name hardstyle \n", + " song_name genre duration_ms \\\n", + "id \n", + "7pgJBLVz5VmnL7uGHmRj6p Pathology Dark Trap 224427 \n", + "0vSWgAlfpye0WCGeNmuNhy Symbiote Dark Trap 98821 \n", + "7EL7ifncK2PWFYThJjzR25 BRAINFOOD Dark Trap 101172 \n", + "1umsRbM7L4ju7rn9aU8Ju6 Sacrifice Dark Trap 96062 \n", + "4SKqOHKYU5pgHr5UiVKiQN Backpack Dark Trap 135079 \n", + "... ... ... ... \n", + "46bXU7Sgj7104ZoXxzz9tM This song doesn't have a name hardstyle 269208 \n", + "0he2ViGMUO3ajKTxLOfWVT This song doesn't have a name hardstyle 210112 \n", + "72DAt9Lbpy9EUS29OzQLob This song doesn't have a name hardstyle 234823 \n", + "6HXgExFVuE1c3cq9QjFCcU This song doesn't have a name hardstyle 323200 \n", + "6MAAMZImxcvYhRnxDLTufD This song doesn't have a name hardstyle 162161 \n", "\n", - " duration_ms key mode time_signature tempo \\\n", - "id \n", - "7pgJBLVz5VmnL7uGHmRj6p 224427 8 1 4 115.080 \n", - "0vSWgAlfpye0WCGeNmuNhy 98821 5 1 4 218.050 \n", - "7EL7ifncK2PWFYThJjzR25 101172 8 1 4 189.938 \n", - "1umsRbM7L4ju7rn9aU8Ju6 96062 10 0 4 139.990 \n", - "4SKqOHKYU5pgHr5UiVKiQN 135079 5 1 4 128.014 \n", - "... ... ... ... ... ... \n", - "46bXU7Sgj7104ZoXxzz9tM 269208 4 1 4 150.013 \n", - "0he2ViGMUO3ajKTxLOfWVT 210112 0 0 4 149.928 \n", - "72DAt9Lbpy9EUS29OzQLob 234823 8 1 4 154.935 \n", - "6HXgExFVuE1c3cq9QjFCcU 323200 6 0 4 150.042 \n", - "6MAAMZImxcvYhRnxDLTufD 162161 9 1 4 155.047 \n", + " key mode time_signature tempo acousticness \\\n", + "id \n", + "7pgJBLVz5VmnL7uGHmRj6p 8 1 4 115.080 0.401000 \n", + "0vSWgAlfpye0WCGeNmuNhy 5 1 4 218.050 0.013800 \n", + "7EL7ifncK2PWFYThJjzR25 8 1 4 189.938 0.187000 \n", + "1umsRbM7L4ju7rn9aU8Ju6 10 0 4 139.990 0.145000 \n", + "4SKqOHKYU5pgHr5UiVKiQN 5 1 4 128.014 0.007700 \n", + "... ... ... ... ... ... \n", + "46bXU7Sgj7104ZoXxzz9tM 4 1 4 150.013 0.031500 \n", + "0he2ViGMUO3ajKTxLOfWVT 0 0 4 149.928 0.022500 \n", + "72DAt9Lbpy9EUS29OzQLob 8 1 4 154.935 0.026000 \n", + "6HXgExFVuE1c3cq9QjFCcU 6 0 4 150.042 0.000551 \n", + "6MAAMZImxcvYhRnxDLTufD 9 1 4 155.047 0.001890 \n", "\n", - " acousticness danceability energy instrumentalness \\\n", - "id \n", - "7pgJBLVz5VmnL7uGHmRj6p 0.401000 0.719 0.493 0.000000 \n", - "0vSWgAlfpye0WCGeNmuNhy 0.013800 0.850 0.893 0.000004 \n", - "7EL7ifncK2PWFYThJjzR25 0.187000 0.864 0.365 0.000000 \n", - "1umsRbM7L4ju7rn9aU8Ju6 0.145000 0.767 0.576 0.000003 \n", - "4SKqOHKYU5pgHr5UiVKiQN 0.007700 0.765 0.726 0.000000 \n", - "... ... ... ... ... \n", - "46bXU7Sgj7104ZoXxzz9tM 0.031500 0.528 0.693 0.000345 \n", - "0he2ViGMUO3ajKTxLOfWVT 0.022500 0.517 0.768 0.000018 \n", - "72DAt9Lbpy9EUS29OzQLob 0.026000 0.361 0.821 0.000242 \n", - "6HXgExFVuE1c3cq9QjFCcU 0.000551 0.477 0.921 0.029600 \n", - "6MAAMZImxcvYhRnxDLTufD 0.001890 0.529 0.945 0.000055 \n", + " danceability energy instrumentalness liveness \\\n", + "id \n", + "7pgJBLVz5VmnL7uGHmRj6p 0.719 0.493 0.000000 0.1180 \n", + "0vSWgAlfpye0WCGeNmuNhy 0.850 0.893 0.000004 0.3720 \n", + "7EL7ifncK2PWFYThJjzR25 0.864 0.365 0.000000 0.1160 \n", + "1umsRbM7L4ju7rn9aU8Ju6 0.767 0.576 0.000003 0.0968 \n", + "4SKqOHKYU5pgHr5UiVKiQN 0.765 0.726 0.000000 0.6190 \n", + "... ... ... ... ... \n", + "46bXU7Sgj7104ZoXxzz9tM 0.528 0.693 0.000345 0.1210 \n", + "0he2ViGMUO3ajKTxLOfWVT 0.517 0.768 0.000018 0.2050 \n", + "72DAt9Lbpy9EUS29OzQLob 0.361 0.821 0.000242 0.3850 \n", + "6HXgExFVuE1c3cq9QjFCcU 0.477 0.921 0.029600 0.0575 \n", + "6MAAMZImxcvYhRnxDLTufD 0.529 0.945 0.000055 0.4140 \n", "\n", - " liveness loudness speechiness valence modified_mode \n", - "id \n", - "7pgJBLVz5VmnL7uGHmRj6p 0.1180 -7.230 0.0794 0.1240 major \n", - "0vSWgAlfpye0WCGeNmuNhy 0.3720 -4.783 0.0623 0.0391 major \n", - "7EL7ifncK2PWFYThJjzR25 0.1160 -10.219 0.0655 0.0478 major \n", - "1umsRbM7L4ju7rn9aU8Ju6 0.0968 -9.683 0.2560 0.1870 minor \n", - "4SKqOHKYU5pgHr5UiVKiQN 0.6190 -5.580 0.1910 0.2700 major \n", - "... ... ... ... ... ... \n", - "46bXU7Sgj7104ZoXxzz9tM 0.1210 -5.148 0.0304 0.3940 major \n", - "0he2ViGMUO3ajKTxLOfWVT 0.2050 -7.922 0.0479 0.3830 minor \n", - "72DAt9Lbpy9EUS29OzQLob 0.3850 -3.102 0.0505 0.1240 major \n", - "6HXgExFVuE1c3cq9QjFCcU 0.0575 -4.777 0.0392 0.4880 minor \n", - "6MAAMZImxcvYhRnxDLTufD 0.4140 -5.862 0.0615 0.1340 major \n", + " loudness speechiness valence modified_mode \n", + "id \n", + "7pgJBLVz5VmnL7uGHmRj6p -7.230 0.0794 0.1240 major \n", + "0vSWgAlfpye0WCGeNmuNhy -4.783 0.0623 0.0391 major \n", + "7EL7ifncK2PWFYThJjzR25 -10.219 0.0655 0.0478 major \n", + "1umsRbM7L4ju7rn9aU8Ju6 -9.683 0.2560 0.1870 minor \n", + "4SKqOHKYU5pgHr5UiVKiQN -5.580 0.1910 0.2700 major \n", + "... ... ... ... ... \n", + "46bXU7Sgj7104ZoXxzz9tM -5.148 0.0304 0.3940 major \n", + "0he2ViGMUO3ajKTxLOfWVT -7.922 0.0479 0.3830 minor \n", + "72DAt9Lbpy9EUS29OzQLob -3.102 0.0505 0.1240 major \n", + "6HXgExFVuE1c3cq9QjFCcU -4.777 0.0392 0.4880 minor \n", + "6MAAMZImxcvYhRnxDLTufD -5.862 0.0615 0.1340 major \n", "\n", "[35877 rows x 17 columns]" ] }, - "execution_count": 22, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -2829,7 +2623,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 21, "metadata": { "id": "ZoiyUleiyhMg" }, @@ -3156,7 +2950,7 @@ "[35877 rows x 17 columns]" ] }, - "execution_count": 23, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -3175,7 +2969,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -3281,7 +3075,7 @@ "[35877 rows x 2 columns]" ] }, - "execution_count": 24, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -3303,7 +3097,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 24, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3312,6 +3106,41 @@ "id": "trRMgGMysdkb", "outputId": "d02098c3-7722-4505-c599-5897bb8ace19" }, + "outputs": [ + { + "data": { + "text/plain": [ + "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x137459e40>" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[[\"genre\", \"duration_ms\"]].groupby(\"genre\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### What is the average duration for each genre ordered based on decreasing order of averages?\n", + "#### v1: using `df` (`pandas`) to answer the question" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, "outputs": [ { "data": { @@ -3426,26 +3255,20 @@ "trap 225149.277731" ] }, - "execution_count": 27, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# mean is our aggregation function\n", + "# it's not sorted yet\n", "df[[\"genre\", \"duration_ms\"]].groupby(\"genre\").mean()" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### What is the average duration for each genre ordered based on decreasing order of averages?\n", - "#### v1: using `df` (`pandas`) to answer the question" - ] - }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -3561,23 +3384,143 @@ "Trap Metal 145940.519467" ] }, - "execution_count": 29, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df[[\"genre\", \"duration_ms\"]].groupby('genre').mean() \\\n", - " .sort_values(by=\"duration_ms\", ascending=False)" + "# let's sort it\n", + "df[[\"genre\", \"duration_ms\"]].groupby(\"genre\").mean()\\\n", + " .sort_values(by=\"duration_ms\" ,ascending=False)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>duration_ms</th>\n", + " </tr>\n", + " <tr>\n", + " <th>genre</th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Dark Trap</th>\n", + " <td>3590</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Emo</th>\n", + " <td>1622</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Hiphop</th>\n", + " <td>3027</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Pop</th>\n", + " <td>453</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Rap</th>\n", + " <td>1546</td>\n", + " </tr>\n", + " <tr>\n", + " <th>RnB</th>\n", + " <td>1905</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Trap Metal</th>\n", + " <td>1875</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Underground Rap</th>\n", + " <td>4330</td>\n", + " </tr>\n", + " <tr>\n", + " <th>dnb</th>\n", + " <td>2507</td>\n", + " </tr>\n", + " <tr>\n", + " <th>hardstyle</th>\n", + " <td>2351</td>\n", + " </tr>\n", + " <tr>\n", + " <th>psytrance</th>\n", + " <td>2650</td>\n", + " </tr>\n", + " <tr>\n", + " <th>techhouse</th>\n", + " <td>2209</td>\n", + " </tr>\n", + " <tr>\n", + " <th>techno</th>\n", + " <td>2646</td>\n", + " </tr>\n", + " <tr>\n", + " <th>trance</th>\n", + " <td>2804</td>\n", + " </tr>\n", + " <tr>\n", + " <th>trap</th>\n", + " <td>2362</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " duration_ms\n", + "genre \n", + "Dark Trap 3590\n", + "Emo 1622\n", + "Hiphop 3027\n", + "Pop 453\n", + "Rap 1546\n", + "RnB 1905\n", + "Trap Metal 1875\n", + "Underground Rap 4330\n", + "dnb 2507\n", + "hardstyle 2351\n", + "psytrance 2650\n", + "techhouse 2209\n", + "techno 2646\n", + "trance 2804\n", + "trap 2362" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "df[[\"genre\", \"duration_ms\"]]" + "# another aggregation method is .count()\n", + "df[[\"genre\", \"duration_ms\"]].groupby(\"genre\").count()" ] }, { @@ -3589,7 +3532,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -3613,7 +3556,7 @@ "Name: genre, dtype: int64" ] }, - "execution_count": 30, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -3632,7 +3575,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3755,7 +3698,7 @@ "Trap Metal 145940.519467" ] }, - "execution_count": 32, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -3763,7 +3706,7 @@ "source": [ "# SQL equivalent query of the above Pandas query\n", "avg_duration_per_genre = qry(\"\"\"\n", - " SELECT genre, AVG(duration_ms) duration_avg\n", + " SELECT genre, AVG(duration_ms) AS duration_avg\n", " FROM spotify\n", " GROUP BY genre\n", " ORDER BY duration_avg DESC\n", @@ -3786,7 +3729,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 34, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3878,7 +3821,7 @@ " 5 0.220177" ] }, - "execution_count": 33, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -3886,13 +3829,13 @@ "source": [ "# use a list to indicate all the columns you want to groupby \n", "groupby_cols = ['mode', 'time_signature']\n", - "cols_to_keep = [\"mode\", \"time_signature\", \"speechiness\"]\n", - "df[cols_to_keep].groupby(groupby_cols).mean()" + "df_cols = ['mode', 'time_signature', 'speechiness']\n", + "df[df_cols].groupby(groupby_cols).mean()\n" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 35, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3993,7 +3936,7 @@ "7 1 5 0.220177" ] }, - "execution_count": 34, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -4001,15 +3944,15 @@ "source": [ "# SQL equivalent query of the above Pandas query\n", "qry(\"\"\"\n", - " SELECT mode, time_signature, AVG(speechiness) as avg_speechiness\n", + " SELECT mode, time_signature, AVG(speechiness) AS avg_speechiness\n", " FROM spotify\n", " GROUP BY mode, time_signature\n", - "\"\"\")\n" + " \"\"\")" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [