diff --git a/sum23/lecture_materials/23_Plotting2/Exam Review.ipynb b/sum23/lecture_materials/23_Plotting2/Exam Review.ipynb index f74342769b636570c12a9f678a3ce9026702bf86..46387bf155989469d7691f263e598205db8c6638 100644 --- a/sum23/lecture_materials/23_Plotting2/Exam Review.ipynb +++ b/sum23/lecture_materials/23_Plotting2/Exam Review.ipynb @@ -2,19 +2,20 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "id": "c3f52922", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from bs4 import BeautifulSoup\n", - "import sqlite3" + "import sqlite3\n", + "import os" ] }, { "cell_type": "markdown", - "id": "5c9d1d79", + "id": "fbb93a47", "metadata": {}, "source": [ "# Big picture review\n", @@ -22,12 +23,47 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "69afd613", + "cell_type": "markdown", + "id": "e22a2d80", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "Pandas - a library that is commonly used with python for data science. (Specific to python)\n", + "\n", + "Series\n", + "* Pandas object\n", + "* Has index, values, and integer positions\n", + "* Can make a series from a list\n", + " * \\[1,4,9\\] can become a series. When it does, 1,4,9 are the values. The index and the IP will be the same, 0,1,2\n", + "* Can make a series from a dictionary\n", + " * {'a':1, 'b':4, 'c':9} will have 1,4,9 as the values, and 'a','b','c' will be the index, and the IP will be 0,1,2\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "bc397564", + "metadata": {}, + "source": [ + "Dataframe\n", + "* Also a Pandas object\n", + "* If a series is a list or a dictionary, then a dataframe is like a list of lists, or a list of dictionaries, or a dictionaries of lists, or a d of d. (e.g., would be loaded in from CSV or JSON)\n", + "* Still have an index and IP. There are different columns with different values\n" + ] + }, + { + "cell_type": "markdown", + "id": "09598b13", + "metadata": {}, + "source": [ + "SQL - a query language that is commonly used iwth databases. Separate from python but there are libraries that let us integrate SQL with python\n", + "\n", + "Database - \n", + "* A CS concept, a way of storing data in an organized way\n", + "* Can have multiple tables. Each table has a name. The columns in each table all have names. Each column has a specific data type and there are no missing values (but values can be \"fake\", like NaN).\n", + "* SQL is a language that allows us to work with databases\n", + "* When we use SQL from within python, using pd.read_sql function, it returns as a pandas dataframe\n", + "* SQL has very different syntax from python (and pandas)" + ] }, { "cell_type": "markdown", @@ -39,10 +75,83 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "4ebb9401", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Name</th>\n", + " <th>color</th>\n", + " <th>length</th>\n", + " <th>habitat</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>fish</td>\n", + " <td>green</td>\n", + " <td>8</td>\n", + " <td>water</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>cat</td>\n", + " <td>brown</td>\n", + " <td>15</td>\n", + " <td>house</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>tiger</td>\n", + " <td>orange</td>\n", + " <td>48</td>\n", + " <td>jungle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>dog</td>\n", + " <td>yellow</td>\n", + " <td>28</td>\n", + " <td>house</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Name color length habitat\n", + "0 fish green 8 water\n", + "1 cat brown 15 house\n", + "2 tiger orange 48 jungle\n", + "3 dog yellow 28 house" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "fish = {\"Name\":\"fish\", \"color\":\"green\", \"length\":8, \"habitat\":'water'}\n", "cat = {\"Name\":\"cat\",\"color\":\"brown\",\"length\":15,\"habitat\":'house'}\n", @@ -55,52 +164,666 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "35f32b68", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 water\n", + "1 house\n", + "2 jungle\n", + "3 house\n", + "Name: habitat, dtype: object" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# DataFrame vs Series" + "# DataFrame vs Series\n", + "animals['habitat']\n", + "animals.habitat" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, + "id": "680f38dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 True\n", + "2 False\n", + "3 True\n", + "Name: habitat, dtype: bool" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "animals['habitat'] == 'house'" + ] + }, + { + "cell_type": "code", + "execution_count": 7, "id": "a7bb549c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Name</th>\n", + " <th>color</th>\n", + " <th>length</th>\n", + " <th>habitat</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>cat</td>\n", + " <td>brown</td>\n", + " <td>15</td>\n", + " <td>house</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>dog</td>\n", + " <td>yellow</td>\n", + " <td>28</td>\n", + " <td>house</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Name color length habitat\n", + "1 cat brown 15 house\n", + "3 dog yellow 28 house" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Filtering - simple and complex conditions" + "# Filtering - simple and complex conditions\n", + "animals [ animals['habitat'] == 'house' ]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, + "id": "b42f5aa9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Name</th>\n", + " <th>color</th>\n", + " <th>length</th>\n", + " <th>habitat</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>fish</td>\n", + " <td>green</td>\n", + " <td>8</td>\n", + " <td>water</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>cat</td>\n", + " <td>brown</td>\n", + " <td>15</td>\n", + " <td>house</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Name color length habitat\n", + "0 fish green 8 water\n", + "1 cat brown 15 house" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "animals[ animals['length'] < 20 ] " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c5468761", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Name</th>\n", + " <th>color</th>\n", + " <th>length</th>\n", + " <th>habitat</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>cat</td>\n", + " <td>brown</td>\n", + " <td>15</td>\n", + " <td>house</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Name color length habitat\n", + "1 cat brown 15 house" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# filter by multiple things\n", + "animals [ (animals['habitat'] == 'house') & (animals['length'] < 20) ]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, "id": "f6f7d185", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 water\n", + "1 house\n", + "2 jungle\n", + "3 house\n", + "Name: habitat, dtype: object" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Accessing individual values" + "# Accessing individual values\n", + "hs = animals['habitat']\n", + "hs" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, + "id": "71863d9c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'house'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# how do we get 'house'?\n", + "hs.iloc[1]\n", + "hs.loc[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "5cdf5bc5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'water'" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# special case: if a series has exactly one value\n", + "hs [ hs == 'water' ].item() # this works\n", + "\n", + "# hs [ hs == 'house' ].item() # doesn't work because multiple animals live in house" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "7f619624", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Name</th>\n", + " <th>color</th>\n", + " <th>length</th>\n", + " <th>habitat</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>fish</td>\n", + " <td>green</td>\n", + " <td>8</td>\n", + " <td>water</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>cat</td>\n", + " <td>brown</td>\n", + " <td>15</td>\n", + " <td>house</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Name color length habitat\n", + "0 fish green 8 water\n", + "1 cat brown 15 house" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# slicing\n", + "animals[:2]" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "78d124d3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Name</th>\n", + " <th>color</th>\n", + " <th>length</th>\n", + " <th>habitat</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>fish</td>\n", + " <td>green</td>\n", + " <td>8</td>\n", + " <td>water</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>cat</td>\n", + " <td>brown</td>\n", + " <td>15</td>\n", + " <td>house</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Name color length habitat\n", + "0 fish green 8 water\n", + "1 cat brown 15 house" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "animals.iloc[:2]" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "7b9f6d90", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Name</th>\n", + " <th>color</th>\n", + " <th>length</th>\n", + " <th>habitat</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>fish</td>\n", + " <td>green</td>\n", + " <td>8</td>\n", + " <td>water</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>cat</td>\n", + " <td>brown</td>\n", + " <td>15</td>\n", + " <td>house</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>tiger</td>\n", + " <td>orange</td>\n", + " <td>48</td>\n", + " <td>jungle</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Name color length habitat\n", + "0 fish green 8 water\n", + "1 cat brown 15 house\n", + "2 tiger orange 48 jungle" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# weird edge case: if you use slicing with loc, it's inclusive of the endpoint \n", + "animals.loc[:2]" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "0b47daee", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>color</th>\n", + " <th>length</th>\n", + " <th>habitat</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Name</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>fish</th>\n", + " <td>green</td>\n", + " <td>8</td>\n", + " <td>water</td>\n", + " </tr>\n", + " <tr>\n", + " <th>cat</th>\n", + " <td>brown</td>\n", + " <td>15</td>\n", + " <td>house</td>\n", + " </tr>\n", + " <tr>\n", + " <th>tiger</th>\n", + " <td>orange</td>\n", + " <td>48</td>\n", + " <td>jungle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>dog</th>\n", + " <td>yellow</td>\n", + " <td>28</td>\n", + " <td>house</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " color length habitat\n", + "Name \n", + "fish green 8 water\n", + "cat brown 15 house\n", + "tiger orange 48 jungle\n", + "dog yellow 28 house" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# setting an index (set_index creates a new object)\n", + "animals = animals.set_index('Name')\n", + "animals" + ] + }, + { + "cell_type": "code", + "execution_count": 31, "id": "2f6f8f41", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'brown'" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Understanding loc, iloc, and slicing" + "# Understanding loc, iloc, and slicing\n", + "animals['color'].loc['cat']\n", + "animals['color'].iloc[1]" ] }, { "cell_type": "code", - "execution_count": null, - "id": "62ccdd15", + "execution_count": 33, + "id": "4fbb6177", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'brown'" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Setting an index column " + "animals.loc['cat', 'color']\n", + "animals.iloc[1, 0]\n" ] }, { @@ -142,7 +865,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "id": "8b44c2dd", "metadata": {}, "outputs": [], @@ -178,7 +901,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "id": "83ac470b", "metadata": {}, "outputs": [], @@ -188,25 +911,72 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "id": "c546e7b8", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# find works when there's a single object that we're looking for\n", + "table = bs_obj.find('table')\n", + "# find_all is used when there might be multiple of somethign" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, + "id": "085927ea", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['University', 'Department']" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "header = [item.get_text() for item in table.find_all('th') ] \n", + "# use a list comprehension to pull out just the text\n", + "header" + ] + }, + { + "cell_type": "code", + "execution_count": 54, "id": "be3d05e0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://www.cs.wisc.edu/ Computer Sciences\n", + "https://stat.wisc.edu/ Statistics\n", + "https://cdis.wisc.edu/ CDIS\n", + "https://eecs.berkeley.edu/ Electrical Engineering and Computer Sciences\n", + " \n" + ] + } + ], "source": [ + "rows = table.find_all('tr')\n", + "\n", + "for row in rows[1:]:\n", + " #print(row)\n", + " anchor = row.find('a')\n", + " link = anchor.attrs['href']\n", + " link_title = anchor.get_text()\n", + " print(link, link_title)\n", "# get_text, children, attrs" ] }, { "cell_type": "markdown", - "id": "753cf273", + "id": "471b6f91", "metadata": {}, "source": [ "# Databases" @@ -214,8 +984,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "4fda7cf8", + "execution_count": 36, + "id": "23a1e919", "metadata": {}, "outputs": [], "source": [ @@ -227,9 +997,227 @@ " return pd.read_sql(QUERY, conn)" ] }, + { + "cell_type": "code", + "execution_count": 38, + "id": "c64cc21d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Title</th>\n", + " <th>Genre</th>\n", + " <th>Director</th>\n", + " <th>Cast</th>\n", + " <th>Year</th>\n", + " <th>Runtime</th>\n", + " <th>Rating</th>\n", + " <th>Revenue</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Guardians of the Galaxy</td>\n", + " <td>Action,Adventure,Sci-Fi</td>\n", + " <td>James Gunn</td>\n", + " <td>Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...</td>\n", + " <td>2014</td>\n", + " <td>121</td>\n", + " <td>8.1</td>\n", + " <td>333.13</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Prometheus</td>\n", + " <td>Adventure,Mystery,Sci-Fi</td>\n", + " <td>Ridley Scott</td>\n", + " <td>Noomi Rapace, Logan Marshall-Green, Michael ...</td>\n", + " <td>2012</td>\n", + " <td>124</td>\n", + " <td>7.0</td>\n", + " <td>126.46</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Split</td>\n", + " <td>Horror,Thriller</td>\n", + " <td>M. Night Shyamalan</td>\n", + " <td>James McAvoy, Anya Taylor-Joy, Haley Lu Richar...</td>\n", + " <td>2016</td>\n", + " <td>117</td>\n", + " <td>7.3</td>\n", + " <td>138.12</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Sing</td>\n", + " <td>Animation,Comedy,Family</td>\n", + " <td>Christophe Lourdelet</td>\n", + " <td>Matthew McConaughey,Reese Witherspoon, Seth Ma...</td>\n", + " <td>2016</td>\n", + " <td>108</td>\n", + " <td>7.2</td>\n", + " <td>270.32</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Suicide Squad</td>\n", + " <td>Action,Adventure,Fantasy</td>\n", + " <td>David Ayer</td>\n", + " <td>Will Smith, Jared Leto, Margot Robbie, Viola D...</td>\n", + " <td>2016</td>\n", + " <td>123</td>\n", + " <td>6.2</td>\n", + " <td>325.02</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1063</th>\n", + " <td>Guardians of the Galaxy Vol. 2</td>\n", + " <td>Action, Adventure, Comedy</td>\n", + " <td>James Gunn</td>\n", + " <td>Chris Pratt, Zoe Saldana, Dave Bautista, Vin D...</td>\n", + " <td>2017</td>\n", + " <td>136</td>\n", + " <td>7.6</td>\n", + " <td>389.81</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1064</th>\n", + " <td>Baby Driver</td>\n", + " <td>Action, Crime, Drama</td>\n", + " <td>Edgar Wright</td>\n", + " <td>Ansel Elgort, Jon Bernthal, Jon Hamm, Eiza Gon...</td>\n", + " <td>2017</td>\n", + " <td>113</td>\n", + " <td>7.6</td>\n", + " <td>107.83</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1065</th>\n", + " <td>Only the Brave</td>\n", + " <td>Action, Biography, Drama</td>\n", + " <td>Joseph Kosinski</td>\n", + " <td>Josh Brolin, Miles Teller, Jeff Bridges, Jenni...</td>\n", + " <td>2017</td>\n", + " <td>134</td>\n", + " <td>7.6</td>\n", + " <td>18.34</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1066</th>\n", + " <td>Incredibles 2</td>\n", + " <td>Animation, Action, Adventure</td>\n", + " <td>Brad Bird</td>\n", + " <td>Craig T. Nelson, Holly Hunter, Sarah Vowell, H...</td>\n", + " <td>2018</td>\n", + " <td>118</td>\n", + " <td>7.6</td>\n", + " <td>608.58</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1067</th>\n", + " <td>A Star Is Born</td>\n", + " <td>Drama, Music, Romance</td>\n", + " <td>Bradley Cooper</td>\n", + " <td>Lady Gaga, Bradley Cooper, Sam Elliott, Greg G...</td>\n", + " <td>2018</td>\n", + " <td>136</td>\n", + " <td>7.6</td>\n", + " <td>215.29</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>1068 rows × 8 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Title Genre \\\n", + "0 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 Split Horror,Thriller \n", + "3 Sing Animation,Comedy,Family \n", + "4 Suicide Squad Action,Adventure,Fantasy \n", + "... ... ... \n", + "1063 Guardians of the Galaxy Vol. 2 Action, Adventure, Comedy \n", + "1064 Baby Driver Action, Crime, Drama \n", + "1065 Only the Brave Action, Biography, Drama \n", + "1066 Incredibles 2 Animation, Action, Adventure \n", + "1067 A Star Is Born Drama, Music, Romance \n", + "\n", + " Director Cast \\\n", + "0 James Gunn Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... \n", + "1 Ridley Scott Noomi Rapace, Logan Marshall-Green, Michael ... \n", + "2 M. Night Shyamalan James McAvoy, Anya Taylor-Joy, Haley Lu Richar... \n", + "3 Christophe Lourdelet Matthew McConaughey,Reese Witherspoon, Seth Ma... \n", + "4 David Ayer Will Smith, Jared Leto, Margot Robbie, Viola D... \n", + "... ... ... \n", + "1063 James Gunn Chris Pratt, Zoe Saldana, Dave Bautista, Vin D... \n", + "1064 Edgar Wright Ansel Elgort, Jon Bernthal, Jon Hamm, Eiza Gon... \n", + "1065 Joseph Kosinski Josh Brolin, Miles Teller, Jeff Bridges, Jenni... \n", + "1066 Brad Bird Craig T. Nelson, Holly Hunter, Sarah Vowell, H... \n", + "1067 Bradley Cooper Lady Gaga, Bradley Cooper, Sam Elliott, Greg G... \n", + "\n", + " Year Runtime Rating Revenue \n", + "0 2014 121 8.1 333.13 \n", + "1 2012 124 7.0 126.46 \n", + "2 2016 117 7.3 138.12 \n", + "3 2016 108 7.2 270.32 \n", + "4 2016 123 6.2 325.02 \n", + "... ... ... ... ... \n", + "1063 2017 136 7.6 389.81 \n", + "1064 2017 113 7.6 107.83 \n", + "1065 2017 134 7.6 18.34 \n", + "1066 2018 118 7.6 608.58 \n", + "1067 2018 136 7.6 215.29 \n", + "\n", + "[1068 rows x 8 columns]" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# qry(\"SELECT * from sqlite_master\") # this tells us there's one table called movies\n", + "\n", + "qry(\"SELECT * from movies\")" + ] + }, { "cell_type": "markdown", - "id": "f98c2f0c", + "id": "be0c4ac8", "metadata": {}, "source": [ "where vs having" @@ -237,11 +1225,481 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "a76a21e2", + "execution_count": 39, + "id": "ec2d494c", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Title</th>\n", + " <th>Genre</th>\n", + " <th>Director</th>\n", + " <th>Cast</th>\n", + " <th>Year</th>\n", + " <th>Runtime</th>\n", + " <th>Rating</th>\n", + " <th>Revenue</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Guardians of the Galaxy</td>\n", + " <td>Action,Adventure,Sci-Fi</td>\n", + " <td>James Gunn</td>\n", + " <td>Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...</td>\n", + " <td>2014</td>\n", + " <td>121</td>\n", + " <td>8.1</td>\n", + " <td>333.13</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Prometheus</td>\n", + " <td>Adventure,Mystery,Sci-Fi</td>\n", + " <td>Ridley Scott</td>\n", + " <td>Noomi Rapace, Logan Marshall-Green, Michael ...</td>\n", + " <td>2012</td>\n", + " <td>124</td>\n", + " <td>7.0</td>\n", + " <td>126.46</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Suicide Squad</td>\n", + " <td>Action,Adventure,Fantasy</td>\n", + " <td>David Ayer</td>\n", + " <td>Will Smith, Jared Leto, Margot Robbie, Viola D...</td>\n", + " <td>2016</td>\n", + " <td>123</td>\n", + " <td>6.2</td>\n", + " <td>325.02</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>La La Land</td>\n", + " <td>Comedy,Drama,Music</td>\n", + " <td>Damien Chazelle</td>\n", + " <td>Ryan Gosling, Emma Stone, Rosemarie DeWitt, J....</td>\n", + " <td>2016</td>\n", + " <td>128</td>\n", + " <td>8.3</td>\n", + " <td>151.06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>The Lost City of Z</td>\n", + " <td>Action,Adventure,Biography</td>\n", + " <td>James Gray</td>\n", + " <td>Charlie Hunnam, Robert Pattinson, Sienna Mille...</td>\n", + " <td>2016</td>\n", + " <td>141</td>\n", + " <td>7.1</td>\n", + " <td>8.01</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>325</th>\n", + " <td>Nelyubov</td>\n", + " <td>Drama</td>\n", + " <td>Andrey Zvyagintsev</td>\n", + " <td>Maryana Spivak, Aleksey Rozin, Matvey Novikov,...</td>\n", + " <td>2017</td>\n", + " <td>127</td>\n", + " <td>7.6</td>\n", + " <td>0.57</td>\n", + " </tr>\n", + " <tr>\n", + " <th>326</th>\n", + " <td>Just Mercy</td>\n", + " <td>Biography, Crime, Drama</td>\n", + " <td>Destin Daniel Cretton</td>\n", + " <td>Michael B. Jordan, Jamie Foxx, Brie Larson, Ch...</td>\n", + " <td>2019</td>\n", + " <td>137</td>\n", + " <td>7.6</td>\n", + " <td>50.40</td>\n", + " </tr>\n", + " <tr>\n", + " <th>327</th>\n", + " <td>Guardians of the Galaxy Vol. 2</td>\n", + " <td>Action, Adventure, Comedy</td>\n", + " <td>James Gunn</td>\n", + " <td>Chris Pratt, Zoe Saldana, Dave Bautista, Vin D...</td>\n", + " <td>2017</td>\n", + " <td>136</td>\n", + " <td>7.6</td>\n", + " <td>389.81</td>\n", + " </tr>\n", + " <tr>\n", + " <th>328</th>\n", + " <td>Only the Brave</td>\n", + " <td>Action, Biography, Drama</td>\n", + " <td>Joseph Kosinski</td>\n", + " <td>Josh Brolin, Miles Teller, Jeff Bridges, Jenni...</td>\n", + " <td>2017</td>\n", + " <td>134</td>\n", + " <td>7.6</td>\n", + " <td>18.34</td>\n", + " </tr>\n", + " <tr>\n", + " <th>329</th>\n", + " <td>A Star Is Born</td>\n", + " <td>Drama, Music, Romance</td>\n", + " <td>Bradley Cooper</td>\n", + " <td>Lady Gaga, Bradley Cooper, Sam Elliott, Greg G...</td>\n", + " <td>2018</td>\n", + " <td>136</td>\n", + " <td>7.6</td>\n", + " <td>215.29</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>330 rows × 8 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Title Genre \\\n", + "0 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 Suicide Squad Action,Adventure,Fantasy \n", + "3 La La Land Comedy,Drama,Music \n", + "4 The Lost City of Z Action,Adventure,Biography \n", + ".. ... ... \n", + "325 Nelyubov Drama \n", + "326 Just Mercy Biography, Crime, Drama \n", + "327 Guardians of the Galaxy Vol. 2 Action, Adventure, Comedy \n", + "328 Only the Brave Action, Biography, Drama \n", + "329 A Star Is Born Drama, Music, Romance \n", + "\n", + " Director Cast \\\n", + "0 James Gunn Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... \n", + "1 Ridley Scott Noomi Rapace, Logan Marshall-Green, Michael ... \n", + "2 David Ayer Will Smith, Jared Leto, Margot Robbie, Viola D... \n", + "3 Damien Chazelle Ryan Gosling, Emma Stone, Rosemarie DeWitt, J.... \n", + "4 James Gray Charlie Hunnam, Robert Pattinson, Sienna Mille... \n", + ".. ... ... \n", + "325 Andrey Zvyagintsev Maryana Spivak, Aleksey Rozin, Matvey Novikov,... \n", + "326 Destin Daniel Cretton Michael B. Jordan, Jamie Foxx, Brie Larson, Ch... \n", + "327 James Gunn Chris Pratt, Zoe Saldana, Dave Bautista, Vin D... \n", + "328 Joseph Kosinski Josh Brolin, Miles Teller, Jeff Bridges, Jenni... \n", + "329 Bradley Cooper Lady Gaga, Bradley Cooper, Sam Elliott, Greg G... \n", + "\n", + " Year Runtime Rating Revenue \n", + "0 2014 121 8.1 333.13 \n", + "1 2012 124 7.0 126.46 \n", + "2 2016 123 6.2 325.02 \n", + "3 2016 128 8.3 151.06 \n", + "4 2016 141 7.1 8.01 \n", + ".. ... ... ... ... \n", + "325 2017 127 7.6 0.57 \n", + "326 2019 137 7.6 50.40 \n", + "327 2017 136 7.6 389.81 \n", + "328 2017 134 7.6 18.34 \n", + "329 2018 136 7.6 215.29 \n", + "\n", + "[330 rows x 8 columns]" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "qry (\"\"\"\n", + " SELECT * \n", + " FROM movies \n", + " WHERE Runtime > 120\n", + "\"\"\")" + ] + }, + { + "cell_type": "markdown", + "id": "736f759a", + "metadata": {}, + "source": [ + "We want the average rating of movies for each year, only for movies that are over 2 hours long" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "9717b343", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Year</th>\n", + " <th>avg_rating</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>2006</td>\n", + " <td>7.400000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2007</td>\n", + " <td>7.486957</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2008</td>\n", + " <td>7.100000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>2009</td>\n", + " <td>7.326316</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>2010</td>\n", + " <td>6.958333</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>2011</td>\n", + " <td>7.338889</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>2012</td>\n", + " <td>7.176923</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>2013</td>\n", + " <td>7.236364</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>2014</td>\n", + " <td>7.245161</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>2015</td>\n", + " <td>6.997619</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>2016</td>\n", + " <td>6.960714</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>2017</td>\n", + " <td>7.970000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>2018</td>\n", + " <td>8.030769</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>2019</td>\n", + " <td>8.033333</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>2020</td>\n", + " <td>8.333333</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Year avg_rating\n", + "0 2006 7.400000\n", + "1 2007 7.486957\n", + "2 2008 7.100000\n", + "3 2009 7.326316\n", + "4 2010 6.958333\n", + "5 2011 7.338889\n", + "6 2012 7.176923\n", + "7 2013 7.236364\n", + "8 2014 7.245161\n", + "9 2015 6.997619\n", + "10 2016 6.960714\n", + "11 2017 7.970000\n", + "12 2018 8.030769\n", + "13 2019 8.033333\n", + "14 2020 8.333333" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "qry (\"\"\"\n", + " SELECT year, AVG(Rating) AS avg_rating \n", + " FROM movies \n", + " WHERE Runtime > 120\n", + " GROUP BY year\n", + "\"\"\")" + ] + }, + { + "cell_type": "markdown", + "id": "7a2d3f89", + "metadata": {}, + "source": [ + "We want the average rating of movies for each year that has at least 20 movies over 2 hrs long, only for movies that are over 2 hours long" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "ea3c391f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Year</th>\n", + " <th>avg_rating</th>\n", + " <th>num_movies</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>2007</td>\n", + " <td>7.486957</td>\n", + " <td>23</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2012</td>\n", + " <td>7.176923</td>\n", + " <td>26</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2013</td>\n", + " <td>7.236364</td>\n", + " <td>33</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>2014</td>\n", + " <td>7.245161</td>\n", + " <td>31</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>2015</td>\n", + " <td>6.997619</td>\n", + " <td>42</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>2016</td>\n", + " <td>6.960714</td>\n", + " <td>56</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Year avg_rating num_movies\n", + "0 2007 7.486957 23\n", + "1 2012 7.176923 26\n", + "2 2013 7.236364 33\n", + "3 2014 7.245161 31\n", + "4 2015 6.997619 42\n", + "5 2016 6.960714 56" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "qry (\"\"\"\n", + " SELECT year, AVG(Rating) AS avg_rating, COUNT(Title) as num_movies\n", + " FROM movies \n", + " WHERE Runtime > 120\n", + " GROUP BY year\n", + " HAVING num_movies > 20\n", + "\"\"\")" + ] }, { "cell_type": "markdown", @@ -272,6 +1730,94 @@ "# #. simple recursive function\n" ] }, + { + "cell_type": "code", + "execution_count": 62, + "id": "2ac5783e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "C\n", + "B\n", + "done\n" + ] + } + ], + "source": [ + "a = 4\n", + "b = 7\n", + "c = 10\n", + "if c == 10:\n", + " print(\"C\")\n", + "if a > b:\n", + " print(\"A\")\n", + "elif a == b:\n", + " print(\"equal\")\n", + "else:\n", + " print(\"B\")\n", + " \n", + "print(\"done\")" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "5835a6cf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "can't square a\n", + "4\n" + ] + } + ], + "source": [ + "mylist = [1,'a',2]\n", + "for item in mylist:\n", + " try:\n", + " print(item ** 2)\n", + " except:\n", + " print(\"can't square\", item)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "41bf53de", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + }, + { + "ename": "TypeError", + "evalue": "unsupported operand type(s) for ** or pow(): 'str' and 'int'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[64], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m mylist \u001b[38;5;241m=\u001b[39m [\u001b[38;5;241m1\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ma\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;241m2\u001b[39m]\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m item \u001b[38;5;129;01min\u001b[39;00m mylist:\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mitem\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m2\u001b[39;49m)\n", + "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for ** or pow(): 'str' and 'int'" + ] + } + ], + "source": [ + "mylist = [1,'a',2]\n", + "for item in mylist:\n", + " print(item ** 2)\n" + ] + }, { "cell_type": "markdown", "id": "5435a2eb", @@ -282,7 +1828,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 55, "id": "fc8d4c25", "metadata": {}, "outputs": [], @@ -291,10 +1837,92 @@ " print(salutation, name, punctuation)" ] }, + { + "cell_type": "markdown", + "id": "96af5d95", + "metadata": {}, + "source": [ + "Three types of parameters:\n", + "* Keyword\n", + " * In the function call, programmer specifies which argument corresponds to which parameter\n", + "* Positional\n", + " * Parameters filled in left ot right\n", + "* Default -- user doesn't supply, take whatever was provided in the function definition\n", + " * In our example `greeting`, `salutation` and `punctuation` have default values. `name` does not.\n", + " \n", + " \n", + "Rules\n", + "* Positional left ot right\n", + "* Can use positional and keyword otgether, but all keyword arguments come last" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "3f0eb87c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "hi anna !\n" + ] + } + ], + "source": [ + "# bad example\n", + "# greeting(salutation=\"hi\", \"anna\")\n", + "\n", + "# better example\n", + "greeting(\"anna\", salutation='hi')" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "f625c718", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "greeting() got multiple values for argument 'name'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[58], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# bad example\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mgreeting\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43manna\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbob\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msalutation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mhi\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: greeting() got multiple values for argument 'name'" + ] + } + ], + "source": [ + "# bad example\n", + "greeting(\"anna\", name=\"bob\", salutation='hi')" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "fb516149", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hello anna .\n" + ] + } + ], + "source": [ + "greeting(\"anna\", punctuation='.')" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "8bd6614f", + "id": "a2424d77", "metadata": {}, "outputs": [], "source": [] diff --git a/sum23/projects/p13/p13_test.py b/sum23/projects/p13/p13_test.py index dd4e42a5161b9edf2206f4d7330ee6a8722e5d18..3c1f4b249791918a5e26a31796f540cc1df1e302 100644 --- a/sum23/projects/p13/p13_test.py +++ b/sum23/projects/p13/p13_test.py @@ -7,7 +7,7 @@ from bs4 import BeautifulSoup HTML_TEST_FILE = 'p13_expected.html' -MAX_FILE_SIZE = 500 # units - KB +MAX_FILE_SIZE = 550 # units - KB REL_TOL = 6e-04 # relative tolerance for floats ABS_TOL = 15e-03 # absolute tolerance for floats