diff --git a/s24/Louis_Lecture_Notes/39_Final_Exam_Practice/final_exam_practice.ipynb b/s24/Louis_Lecture_Notes/39_Final_Exam_Practice/final_exam_practice.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..73c8f499c673fce88ab5b2b167ba2d948822b1ad --- /dev/null +++ b/s24/Louis_Lecture_Notes/39_Final_Exam_Practice/final_exam_practice.ipynb @@ -0,0 +1,1718 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "227ea98e-6427-4bbf-bc65-7ac92d137b6c", + "metadata": {}, + "source": [ + "## Fall 2023" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "9604c67f-4d2d-46da-b9f5-af6ef234a036", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import pandas as pd\n", + "\n", + "\n", + "employee_data = {\n", + "\"Employee_ID\": [101, 102, 103, 104, 105],\n", + "\"Name\": [\"Alice\", \"Bob\", \"Charlie\", \"David\", \"Emma\"],\n", + "\"Department\": [\"HR\", \"Engineering\", \"Finance\", \"Engineering\", \"HR\"],\n", + "\"Salary\": [60000, 80000, 75000, 90000, 65000],\n", + "\"Years_Worked\": [2, 5, 3, 7, 1]\n", + "}\n", + "employees = pd.DataFrame(employee_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "56ac506c-3796-4f6f-bf72-9b4e5927b257", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "90000\n", + "90000\n" + ] + } + ], + "source": [ + "# Q1\n", + "print(employees[\"Salary\"].max())\n", + "\n", + "print(employees.loc[employees['Salary'].idxmax(),\"Salary\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5d5ec5ac-8b9a-4f8d-a1b4-2eb4fd2f15f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 HR\n", + "1 Engineering\n", + "2 Finance\n", + "3 Engineering\n", + "4 HR\n", + "Name: Department, dtype: object\n", + "0 Business\n", + "1 Tech\n", + "2 Business\n", + "3 Tech\n", + "4 Business\n", + "Name: Department, dtype: object\n" + ] + } + ], + "source": [ + "# Q2\n", + "print(employees['Department'])\n", + "employees[\"Department\"] = employees[\"Department\"].apply(lambda val:\n", + "\"Tech\" if val in [\"Engineering\"] else \"Business\")\n", + "print(employees['Department'])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "383ca6cd-878b-4be8-928e-99153484aae7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Movie_ID</th>\n", + " <th>Title</th>\n", + " <th>Genre</th>\n", + " <th>Rating</th>\n", + " <th>Year</th>\n", + " <th>Director</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>The Matrix</td>\n", + " <td>Action</td>\n", + " <td>4.5</td>\n", + " <td>1999</td>\n", + " <td>Wachowskis</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>Inception</td>\n", + " <td>Sci-Fi</td>\n", + " <td>4.8</td>\n", + " <td>2010</td>\n", + " <td>Christopher Nolan</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>Pulp Fiction</td>\n", + " <td>Crime</td>\n", + " <td>4.2</td>\n", + " <td>1994</td>\n", + " <td>Quentin Tarantino</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>The Shawshank Redemption</td>\n", + " <td>Drama</td>\n", + " <td>4.9</td>\n", + " <td>1994</td>\n", + " <td>Frank Darabont</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>5</td>\n", + " <td>The Dark Knight</td>\n", + " <td>Action</td>\n", + " <td>4.7</td>\n", + " <td>2008</td>\n", + " <td>Christopher Nolan</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Movie_ID Title Genre Rating Year Director\n", + "0 1 The Matrix Action 4.5 1999 Wachowskis\n", + "1 2 Inception Sci-Fi 4.8 2010 Christopher Nolan\n", + "2 3 Pulp Fiction Crime 4.2 1994 Quentin Tarantino\n", + "3 4 The Shawshank Redemption Drama 4.9 1994 Frank Darabont\n", + "4 5 The Dark Knight Action 4.7 2008 Christopher Nolan" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies = pd.DataFrame({\n", + "\"Movie_ID\": [1, 2, 3, 4, 5],\n", + "\"Title\": [\"The Matrix\", \"Inception\", \"Pulp Fiction\",\n", + "\"The Shawshank Redemption\", \"The Dark Knight\"],\n", + "\"Genre\": [\"Action\", \"Sci-Fi\", \"Crime\", \"Drama\", \"Action\"],\n", + "\"Rating\": [4.5, 4.8, 4.2, 4.9, 4.7],\n", + "\"Year\": [1999, 2010, 1994, 1994, 2008],\n", + "\"Director\": [\"Wachowskis\", \"Christopher Nolan\", \"Quentin Tarantino\",\n", + "\"Frank Darabont\", \"Christopher Nolan\"]\n", + "})\n", + "movies" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a7db0595-5a8a-48e7-bfe7-75ebac4b20e3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q3\n", + "\n", + "type(movies[\"Genre\"].value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "4b47b4cb-d397-48f4-8fae-05429c50aa73", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'Pulp Fiction'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[14], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Q4\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m movies\u001b[38;5;241m.\u001b[39mloc[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPulp Fiction\u001b[39m\u001b[38;5;124m\"\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDirector\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n", + "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1153\u001b[0m, in \u001b[0;36m_LocationIndexer.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1150\u001b[0m axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxis \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 1152\u001b[0m maybe_callable \u001b[38;5;241m=\u001b[39m com\u001b[38;5;241m.\u001b[39mapply_if_callable(key, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj)\n\u001b[0;32m-> 1153\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_axis(maybe_callable, axis\u001b[38;5;241m=\u001b[39maxis)\n", + "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1393\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1391\u001b[0m \u001b[38;5;66;03m# fall thru to straight lookup\u001b[39;00m\n\u001b[1;32m 1392\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_key(key, axis)\n\u001b[0;32m-> 1393\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_label(key, axis\u001b[38;5;241m=\u001b[39maxis)\n", + "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1343\u001b[0m, in \u001b[0;36m_LocIndexer._get_label\u001b[0;34m(self, label, axis)\u001b[0m\n\u001b[1;32m 1341\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_get_label\u001b[39m(\u001b[38;5;28mself\u001b[39m, label, axis: AxisInt):\n\u001b[1;32m 1342\u001b[0m \u001b[38;5;66;03m# GH#5567 this will fail if the label is not present in the axis.\u001b[39;00m\n\u001b[0;32m-> 1343\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39mxs(label, axis\u001b[38;5;241m=\u001b[39maxis)\n", + "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/pandas/core/generic.py:4236\u001b[0m, in \u001b[0;36mNDFrame.xs\u001b[0;34m(self, key, axis, level, drop_level)\u001b[0m\n\u001b[1;32m 4234\u001b[0m new_index \u001b[38;5;241m=\u001b[39m index[loc]\n\u001b[1;32m 4235\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 4236\u001b[0m loc \u001b[38;5;241m=\u001b[39m index\u001b[38;5;241m.\u001b[39mget_loc(key)\n\u001b[1;32m 4238\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(loc, np\u001b[38;5;241m.\u001b[39mndarray):\n\u001b[1;32m 4239\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m loc\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m np\u001b[38;5;241m.\u001b[39mbool_:\n", + "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/range.py:418\u001b[0m, in \u001b[0;36mRangeIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 416\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 417\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, Hashable):\n\u001b[0;32m--> 418\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[1;32m 419\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n\u001b[1;32m 420\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n", + "\u001b[0;31mKeyError\u001b[0m: 'Pulp Fiction'" + ] + } + ], + "source": [ + "# Q4\n", + "movies.loc[\"Pulp Fiction\"][\"Director\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "d1da11ec-3615-42fd-b7da-f2f4d69709d3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>year</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1994.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2004.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " year\n", + "0 1994.0\n", + "1 NaN\n", + "2 2004.0" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame([\n", + "{\"year\":1994.0},\n", + "{\"year\":None},\n", + "{\"year\":2004.0}\n", + "])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "cdd11f49-929a-4966-a82d-679691c2e95b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>year</th>\n", + " <th>decade</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1994.0</td>\n", + " <td>1990.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2004.0</td>\n", + " <td>2000.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " year decade\n", + "0 1994.0 1990.0\n", + "1 NaN NaN\n", + "2 2004.0 2000.0" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q5\n", + "df[\"decade\"] = df[\"year\"] // 10 * 10\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "9088d5ba-e391-4c75-abf9-2cc150884368", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1994\n", + "2 2004\n", + "Name: year, dtype: int64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q6\n", + "df[\"year\"].dropna().astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "7a178e67-3971-4d5d-b88a-4e1710c8882a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>year</th>\n", + " <th>decade</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1994.0</td>\n", + " <td>1990.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2004.0</td>\n", + " <td>2000.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>2008.0</td>\n", + " <td>2000.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " year decade\n", + "0 1994.0 1990.0\n", + "1 NaN NaN\n", + "2 2004.0 2000.0\n", + "3 2008.0 2000.0" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame([\n", + "{\"year\":1994, \"decade\": 1990},\n", + "{\"year\":None, \"decade\": None},\n", + "{\"year\":2004, \"decade\": 2000},\n", + "{\"year\":2008, \"decade\": 2000}\n", + "])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "3d4d135e-06a0-42c4-90a5-48fa2a76d978", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q7\n", + "df[\"decade\"].value_counts().iloc[0]" + ] + }, + { + "cell_type": "markdown", + "id": "8922116e-5f13-4815-a71e-65e9cc77a315", + "metadata": {}, + "source": [ + "##### Q8\n", + "`<a href=\"https://www.google.com\">link</a>`<br>\n", + "<a href=\"https://www.google.com\">link</a>" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "313994b5-6991-439b-b2d3-3c1886ce4cb5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Not 200\n" + ] + } + ], + "source": [ + "# Q9\n", + "import requests\n", + "\n", + "def fetch_data(api_url):\n", + " try:\n", + " response = requests.get(api_url)\n", + " status_code = response.status_code\n", + " except requests.HTTPError as e:\n", + " return None\n", + " if status_code == 200:\n", + " return \"Success\"\n", + " else:\n", + " return \"Not 200\"\n", + "\n", + "result = fetch_data(\"https://example.com/abc123\")\n", + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "4b047fab-12e4-4874-a395-53431e263fe4", + "metadata": {}, + "outputs": [], + "source": [ + "# Q10\n", + "import requests\n", + "\n", + "def save_html(url):\n", + " try:\n", + " response = requests.get(url)\n", + " response.raise_for_status()\n", + " file = open(\"web_page.html\", \"w\", encoding=\"utf-8\")\n", + " file.write(response.text) ###SOLUTION LINE ????\n", + " file.close()\n", + " except requests.HTTPError as e:\n", + " print(\"WARNING! Could not fetch page\")" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "15991384-1a60-4a5c-aa09-e88670e6bee2", + "metadata": {}, + "outputs": [], + "source": [ + "# Q11\n", + "from bs4 import BeautifulSoup\n", + "import os\n", + "\n", + "if os.path.exists(\"web_page.html\"):\n", + " file = open(\"web_page.html\")\n", + " html_content = file.read() ###SOLUTION LINE ????\n", + " soup = BeautifulSoup(html_content, \"html.parser\")\n", + " link = soup.find(\"a\")\n", + " file.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "0b64e8e4-0e7f-4e3c-aa81-1d8fdfac4b46", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "220" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q12\n", + "\n", + "# import requests\n", + "# response = requests.get(\"https://www.example.com/data.json\")\n", + "# data_text = response.text\n", + "# data_json = response.json()\n", + "\n", + "import json\n", + "\n", + "data_text = '''{\"john\": 150, \"emma\": 220, \"mike\": 180}'''\n", + "data_json = json.loads(data_text)\n", + "\n", + "data_json[\"emma\"] ###SOLUTION LINE" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "8873033e-97e0-47e0-84e1-461b3ff0b0f6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>index</th>\n", + " <th>name</th>\n", + " <th>industry</th>\n", + " <th>headquarters</th>\n", + " <th>established</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>2</td>\n", + " <td>Toyota</td>\n", + " <td>Automotive</td>\n", + " <td>Japan</td>\n", + " <td>1937</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>6</td>\n", + " <td>Volkswagen</td>\n", + " <td>Automotive</td>\n", + " <td>Germany</td>\n", + " <td>1937</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " index name industry headquarters established\n", + "0 2 Toyota Automotive Japan 1937\n", + "1 6 Volkswagen Automotive Germany 1937" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q13\n", + "\n", + "import sqlite3\n", + "import pandas as pd\n", + "\n", + "# companies=pd.DataFrame({\n", + "# 'name':['Walmart','Samsung Electronics','Toyota','Amazon','Mercedes Benz','Apple','Volkswagen'],\n", + "# 'industry':['Retail','Electronics','Automotive','Retail','Automotive','Electronics','Automotive'],\n", + "# 'headquarters':['USA','South Korea','Japan','USA','Germany','USA','Germany'],\n", + "# 'established':[1962,1969,1937,1994,1926,1976,1937]\n", + "# })\n", + "conn = sqlite3.connect(\"companies.db\")\n", + "# companies.to_sql(\"companies\",conn)\n", + "\n", + "\n", + "\n", + "companies = pd.read_sql(\"SELECT * FROM companies\", conn)\n", + "\n", + "\n", + "query = \"SELECT * FROM companies WHERE established = 1937\" ### SOLUTION\n", + "pd.read_sql(query, conn)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "bd78ea9b-55db-4e55-a548-8b7b3ff80330", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>index</th>\n", + " <th>name</th>\n", + " <th>industry</th>\n", + " <th>headquarters</th>\n", + " <th>established</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>3</td>\n", + " <td>Amazon</td>\n", + " <td>Retail</td>\n", + " <td>USA</td>\n", + " <td>1994</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>5</td>\n", + " <td>Apple</td>\n", + " <td>Electronics</td>\n", + " <td>USA</td>\n", + " <td>1976</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0</td>\n", + " <td>Walmart</td>\n", + " <td>Retail</td>\n", + " <td>USA</td>\n", + " <td>1962</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " index name industry headquarters established\n", + "0 3 Amazon Retail USA 1994\n", + "1 5 Apple Electronics USA 1976\n", + "2 0 Walmart Retail USA 1962" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q14\n", + "pd.read_sql(\"\"\"\n", + "SELECT * FROM companies WHERE headquarters = \"USA\" ORDER BY name ASC\n", + "\"\"\",conn)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "f34d5232-d7d2-4b67-9a8c-f58374c2e4fa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>headquarters</th>\n", + " <th>num_companies</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>USA</td>\n", + " <td>3</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " headquarters num_companies\n", + "0 USA 3" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q15\n", + "query = \"\"\"\n", + "SELECT headquarters, COUNT(*) AS num_companies\n", + "FROM companies\n", + "GROUP BY headquarters\n", + "HAVING num_companies > 2\n", + "ORDER BY num_companies DESC\n", + "\"\"\"\n", + "output = pd.read_sql(query, conn)\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "ade68c8c-b236-4017-903f-056fcd81f88b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>index</th>\n", + " <th>name</th>\n", + " <th>industry</th>\n", + " <th>headquarters</th>\n", + " <th>established</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>Walmart</td>\n", + " <td>Retail</td>\n", + " <td>USA</td>\n", + " <td>1962</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " index name industry headquarters established\n", + "0 0 Walmart Retail USA 1962" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q16\n", + "pd.read_sql(\"\"\"\n", + "SELECT * FROM companies WHERE headquarters = \"USA\" AND industry = \"Retail\" ORDER BY established ASC Limit 1\n", + "\"\"\",conn)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "96ddf943-dec1-426a-b480-1d280dfdfd59", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " established num_companies\n", + "0 1937 2\n" + ] + }, + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q17\n", + "question_df = pd.read_sql(\"\"\"\n", + "SELECT established, COUNT(*) AS num_companies\n", + "FROM companies\n", + "GROUP BY established\n", + "HAVING num_companies > 1\n", + "\"\"\", conn)\n", + "print(question_df)\n", + "len(question_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "4d9f7b22-bff3-433e-9c33-115fb507dfe3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>index</th>\n", + " <th>name</th>\n", + " <th>industry</th>\n", + " <th>headquarters</th>\n", + " <th>established</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>Samsung Electronics</td>\n", + " <td>Electronics</td>\n", + " <td>South Korea</td>\n", + " <td>1969</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>Toyota</td>\n", + " <td>Automotive</td>\n", + " <td>Japan</td>\n", + " <td>1937</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>4</td>\n", + " <td>Mercedes Benz</td>\n", + " <td>Automotive</td>\n", + " <td>Germany</td>\n", + " <td>1926</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>6</td>\n", + " <td>Volkswagen</td>\n", + " <td>Automotive</td>\n", + " <td>Germany</td>\n", + " <td>1937</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " index name industry headquarters established\n", + "0 1 Samsung Electronics Electronics South Korea 1969\n", + "1 2 Toyota Automotive Japan 1937\n", + "2 4 Mercedes Benz Automotive Germany 1926\n", + "3 6 Volkswagen Automotive Germany 1937" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q18 (the != is what is being tested)\n", + "non_usa = pd.read_sql(\"\"\"\n", + "SELECT *\n", + "FROM companies\n", + "WHERE headquarters != \"USA\"\n", + "\"\"\", conn)\n", + "non_usa" + ] + }, + { + "cell_type": "markdown", + "id": "b731e73f-dc21-441b-9591-a5df1f313c05", + "metadata": {}, + "source": [ + "#### Q19\n", + "**Question asking about differences between `.loc` and `.iloc`**\n", + "\n", + "Answer: loc is primarily used with row and column labels, while iloc is used with\n", + "integer positions." + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "9f559dae-aba9-455d-b4ab-ca0845c9eb44", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<Axes: xlabel='A', ylabel='C'>" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Q20 - Answer: C A scatter plot with dots that are all the same color\n", + "\n", + "df = pd.DataFrame({\"A\":[1,2,3,4], \"B\":[2,7,5,8], \"C\":[3,6,9,12]})\n", + "df.plot.scatter(x=\"A\", y=\"C\")" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "b50e53cf-5998-4914-95c0-d9bd813bd825", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<Axes: ylabel='A'>" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Q21 Pandas DOES support pie plots\n", + "df.plot.pie(y='A')" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "f2cb95c4-fd6d-4429-8ca9-ad9395df4018", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 0, 'Hello World')" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Q22\n", + "ax = df.plot.bar()\n", + "ax.set_xlabel(\"Hello World\")" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "0bac7315-9dc6-403e-953f-f37cc7749f5d", + "metadata": {}, + "outputs": [], + "source": [ + "# Q23\n", + "\n", + "import os\n", + "\n", + "def get_all_paths_in(directory):\n", + " paths = []\n", + " files = os.listdir(directory)\n", + " for file in files:\n", + " if file.startswith(\".\"):\n", + " continue\n", + " path = os.path.join(directory, file)\n", + " if os.path.isfile(path):\n", + " paths.append(path)\n", + " elif os.path.isdir(path):\n", + " paths.extend(get_all_paths_in(path)) ###SOLUTION ????\n", + " return sorted(paths, reverse=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "09b189dc-9de7-4a90-9828-ce42cc6ac5fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Red Giant': ['11 UMi', '14 And'],\n", + " 'White Dwarf': ['mu2 Sco'],\n", + " 'Neutron Star': ['HD 96127']}" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q24\n", + "\n", + "stars_dict = {\n", + "\"11 UMi\": {\"Stellar Mass\": 2.78, \"Stellar Age\": 1.560},\n", + "\"14 And\": {\"Stellar Mass\": 1.78, \"Stellar Age\": 4.500},\n", + "\"CD Cet\": {\"Stellar Mass\": 0.16, \"Stellar Age\": 3.000},\n", + "\"mu2 Sco\": {\"Stellar Mass\": 9.1, \"Stellar Age\": 0.020},\n", + "\"HD 96127\": {\"Stellar Mass\": 12.94, \"Stellar Age\": 4.067}\n", + "}\n", + "\n", + "star_classes = {\n", + "\"Red Giant\": [],\n", + "\"White Dwarf\": [],\n", + "\"Neutron Star\": []\n", + "}\n", + "\n", + "# SOLUTION: < < <= < >=\n", + "for star in stars_dict:\n", + " star_info = stars_dict[star]\n", + " mass = star_info[\"Stellar Mass\"]\n", + " if mass == None:\n", + " continue\n", + " if 0.5 < mass < 8:\n", + " star_classes[\"Red Giant\"].append(star)\n", + " elif 8 <= mass < 12:\n", + " star_classes[\"White Dwarf\"].append(star)\n", + " elif mass >= 12:\n", + " star_classes[\"Neutron Star\"].append(star)\n", + "\n", + "star_classes" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "ead522cf-fc2c-4888-8c83-17cd2173a04c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4.067\n" + ] + } + ], + "source": [ + "# Q25\n", + "print(stars_dict[star_classes[\"Neutron Star\"][0]][\"Stellar Age\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "391c03c4-3fe7-404a-a484-d8b2bbd175ed", + "metadata": {}, + "outputs": [], + "source": [ + "# Q26\n", + "import csv\n", + "import json\n", + "import os\n", + "\n", + "def process_csv(filename):\n", + " with open(filename) as file:\n", + " csv_reader = csv.reader(file)\n", + " list_data = list(csv_reader)\n", + " return list_data\n", + " \n", + "def read_json(path):\n", + " with open(path, encoding=\"utf-8\") as f:\n", + " return json.load(f)\n", + " \n", + "def get_planets_data(planet_file, mapping_file):\n", + " mapping_dict = read_json(mapping_file)\n", + " planets_csv = process_csv(planet_file)\n", + " planets_header = planets_csv[0]\n", + " planets_rows = planets_csv[1:]\n", + " return (planets_header, planets_rows, mapping_dict)\n", + "\n", + "# Root\n", + "# |\n", + "# |-- data\n", + "# | |-- planets1.csv\n", + "# | |-- mappings\n", + "# | |-- mapping1.json\n", + "# |\n", + "# |-- other_folder\n", + "\n", + "#SOLUTION\n", + "# get_planets_data(os.path.join(\"data\", \"planets1.csv\"),\n", + "# os.path.join(\"data\", \"mappings\", \"mapping1.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "352ffe18-be4b-4b34-a9ca-2ea46701ec48", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Column 1', 'Column 2']" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q27\n", + "data = \"\"\"\n", + "<table>\n", + "<thead>\n", + "<tr>\n", + "<th>Column 1</th>\n", + "<th>Column 2</th>\n", + "</tr>\n", + "</thead>\n", + "<tbody>\n", + "<tr>\n", + "<td>Row 1, Cell 1</td>\n", + "<td>Row 1, Cell 2</td>\n", + "</tr>\n", + "<tr>\n", + "<td>Row 2, Cell 1</td>\n", + "<td>Row 2, Cell 2</td>\n", + "</tr>\n", + "</tbody>\n", + "</table>\n", + "\"\"\"\n", + "\n", + "f=open(\"2023.html\",'w')\n", + "f.write(data)\n", + "f.close()\n", + "\n", + "from bs4 import BeautifulSoup\n", + "import pandas as pd\n", + "\n", + "with open(\"2023.html\", \"r\", encoding=\"utf-8\") as file:\n", + " html_content = file.read()\n", + "\n", + "soup = BeautifulSoup(html_content, \"html.parser\")\n", + "table = soup.find(\"table\")\n", + "headers = [val.get_text().strip() for val in table.find(\"thead\").find_all(\"th\")]\n", + "headers" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "15c56c7c-e2a9-4bf1-a593-bb7f0ed9fa86", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Column 1</th>\n", + " <th>Column 2</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Row 1, Cell 1</td>\n", + " <td>Row 1, Cell 2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Row 2, Cell 1</td>\n", + " <td>Row 2, Cell 2</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Column 1 Column 2\n", + "0 Row 1, Cell 1 Row 1, Cell 2\n", + "1 Row 2, Cell 1 Row 2, Cell 2" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q28\n", + "table = soup.find(\"table\")\n", + "headers = [\"Column 1\", \"Column 2\"]\n", + "data = []\n", + "rows = table.find_all(\"tr\")\n", + "for row in rows[1:]:\n", + " cols = row.find_all(\"td\")\n", + " row_data = {}\n", + " for i in range(len(headers)):\n", + " row_data[headers[i]] = cols[i].get_text() ###SOLUTION ????\n", + " data.append(row_data)\n", + "\n", + "df = pd.DataFrame(data)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "d6048328-0f8e-469c-9250-924bbe8e66d0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "None []\n" + ] + } + ], + "source": [ + "# Q29\n", + "print(soup.find(\"a\"), soup.find_all(\"a\"))" + ] + }, + { + "cell_type": "markdown", + "id": "47b5bddd-b827-4f6e-af43-af09b26bc7c7", + "metadata": {}, + "source": [ + "#### Q30 Primary Difference between `.find()` and `.find_all()`\n", + "answer: .find() returns the first match, while .find all() returns a list of\n", + "all matches" + ] + }, + { + "cell_type": "markdown", + "id": "06521e9d-a527-45fb-8a8e-8f3b502e3a75", + "metadata": {}, + "source": [ + "#### Q31\n", + "Assume that all necessary imports have already been made. Please select the answer that\n", + "correctly replaces all of the ???? spaces (in top-to-bottom order) as they appear below to\n", + "prepare the rankings database:\n", + "```\n", + "var1 = \"rankings.db\"\n", + "var2 = \"rankings\"\n", + "var3 = \"rankings.json\"\n", + "\n", + "rankings = pd.read_json(????)\n", + "conn = sqlite3.connect(????)\n", + "rankings.to_sql(????, conn, if_exists=\"replace\", index=False)\n", + "```\n", + "\n", + "Answer: E. var3, var1, var2" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "256c4df3-4090-4c54-a588-53a5f1c70587", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# SETUP FOR Q32 and beyond\n", + "df = pd.DataFrame({\n", + " 'Year':[2022,2022,2022,2021],\n", + " 'Institution Name':['University of Tokyo','University of Munich','Kyoto University','Wisconsin-Madison'],\n", + " 'Country':['Japan','Germany','Japan','USA'],\n", + " 'Fac Student':[7.2, 14.3, 6.5, 15.2],\n", + " 'Cit Per Fac':[97.5, 97.2, 96.8, 96.1],\n", + " 'International':[99.7, 91.5, 98.9, 83.4]\n", + "})\n", + "df.to_sql(\"rankings\",conn)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "ac8b5ac6-a9c0-478c-81b6-10d58376dbbf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Institution Name</th>\n", + " <th>Fac Student</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [Institution Name, Fac Student]\n", + "Index: []" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q32\n", + "pd.read_sql(\"\"\"\n", + "SELECT \"Institution Name\", \"Fac Student\"\n", + "FROM rankings\n", + "WHERE Year = 2023 AND Country = \"Brazil\"\n", + "ORDER BY \"Fac Student\" DESC\n", + "LIMIT 10\n", + "\"\"\",conn)\n", + "\n", + "# NOTE: Prefer double quotes for quoting identifiers, such as column or table names. It's the SQL standard.\n", + "# Backticks also work, but they're only supported for MySQL syntax compatibility. Single quotes are for string literals,\n", + "# not identifiers. Aug 5, 2014\n", + "# https://stackoverflow.com/questions/25141090/use-backticks-or-double-quotes-with-python-and-sqlite#:~:text=Prefer%20double%20quotes%20for%20quoting,for%20string%20literals%2C%20not%20identifiers." + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "d66c5428-7fc1-4ffa-a095-0399d93bfafa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7715279808433153" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q33\n", + "qry = \"\"\"\n", + "SELECT \"International\", \"Cit Per Fac\" FROM rankings\n", + "\n", + "\"\"\" # took out this so some data in qry: WHERE \"year\" = 2020 AND \"Country\" = \"Germany\"\n", + "pd.read_sql(qry, conn).corr().loc[\"International\"].loc[\"Cit Per Fac\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "ca82e3ba-f6b3-4a75-b73d-88658b101aef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Country</th>\n", + " <th>Num Institutions</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [Country, Num Institutions]\n", + "Index: []" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Q34\n", + "qry = \"\"\"SELECT Country, COUNT(*) AS \"Num Institutions\" FROM rankings\n", + "WHERE Year = 2020 GROUP BY Country ORDER BY \"Num Institutions\" DESC\n", + "LIMIT 5\"\"\"\n", + "pd.read_sql(qry,conn)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "dc95d1ed-5375-459a-85d7-df63a51222a1", + "metadata": {}, + "outputs": [], + "source": [ + "# Q35\n", + "qry = \"\"\"\n", + "SELECT (\"Fac Student\" * \"Cit Per Fac\") AS \"Cit Per Student\"\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "bc08b646-d7df-435c-b2c0-a9f0bc518014", + "metadata": {}, + "outputs": [], + "source": [ + "conn.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16530e6a-83fd-47b0-a52b-819224f2ce8c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}