From 60de967ae3b3f2484c3469a7934bad060f05ad5b Mon Sep 17 00:00:00 2001 From: Anna Meyer <annapmeyer95@gmail.com> Date: Wed, 26 Jul 2023 11:40:19 -0500 Subject: [PATCH] lec materials --- .../17_Pandas/exam2review.ipynb | 744 ++++++++++++++++++ 1 file changed, 744 insertions(+) create mode 100644 sum23/lecture_materials/17_Pandas/exam2review.ipynb diff --git a/sum23/lecture_materials/17_Pandas/exam2review.ipynb b/sum23/lecture_materials/17_Pandas/exam2review.ipynb new file mode 100644 index 0000000..8e76de4 --- /dev/null +++ b/sum23/lecture_materials/17_Pandas/exam2review.ipynb @@ -0,0 +1,744 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "0e207f19", + "metadata": {}, + "outputs": [], + "source": [ + "from collections import namedtuple\n", + "import os\n", + "import json\n", + "import csv" + ] + }, + { + "cell_type": "markdown", + "id": "a4ae0ecf", + "metadata": {}, + "source": [ + "# Sorting" + ] + }, + { + "cell_type": "markdown", + "id": "1ae8993b", + "metadata": {}, + "source": [ + "example 1" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4d5cd750", + "metadata": {}, + "outputs": [], + "source": [ + "hurricanes = [\n", + " {\"name\": \"A\", \"year\": 2000, \"speed\": 150},\n", + " {\"name\": \"B\", \"year\": 1980, \"speed\": 100},\n", + " {\"name\": \"C\", \"year\": 1990}, # notice the missing speed key\n", + "]\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e663da6d", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "'<' not supported between instances of 'dict' and 'dict'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;43msorted\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mhurricanes\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: '<' not supported between instances of 'dict' and 'dict'" + ] + } + ], + "source": [ + "sorted(hurricanes)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6409fd88", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'name': 'C', 'year': 1990},\n", + " {'name': 'B', 'year': 1980, 'speed': 100},\n", + " {'name': 'A', 'year': 2000, 'speed': 150}]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def get_speed(hurricane):\n", + " if not \"speed\" in hurricane:\n", + " return 0\n", + " return hurricane[\"speed\"]\n", + "\n", + "# by default, sort is in ascending order (smallest to largest)\n", + "sorted(hurricanes, key = get_speed)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "84091bcc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'name': 'A', 'year': 2000, 'speed': 150},\n", + " {'name': 'B', 'year': 1980, 'speed': 100},\n", + " {'name': 'C', 'year': 1990}]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# what about sorting fastest to slowest?\n", + "sorted(hurricanes, key = get_speed, reverse=True)" + ] + }, + { + "cell_type": "markdown", + "id": "06fa5ce3", + "metadata": {}, + "source": [ + "example 2" + ] + }, + { + "cell_type": "markdown", + "id": "0c37fdbd", + "metadata": {}, + "source": [ + "## lambda functions\n", + "notice we got rid of the missing value in hurricanes. we *could* handle this, but it makes it more complicated" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2f713505", + "metadata": {}, + "outputs": [], + "source": [ + "hurricanes2 = [\n", + " {\"name\": \"A\", \"year\": 2000, \"speed\": 150},\n", + " {\"name\": \"B\", \"year\": 1980, \"speed\": 100},\n", + " {\"name\": \"C\", \"year\": 1990, \"speed\": 110}, \n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5ea40390", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'name': 'B', 'year': 1980, 'speed': 100},\n", + " {'name': 'C', 'year': 1990, 'speed': 110},\n", + " {'name': 'A', 'year': 2000, 'speed': 150}]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sorted(hurricanes2, key = lambda x : x['speed'])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "682e1b11", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'name': 'B', 'year': 1980, 'speed': 100},\n", + " {'name': 'C', 'year': 1990, 'speed': 110},\n", + " {'name': 'A', 'year': 2000, 'speed': 150}]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# sort by year\n", + "sorted(hurricanes2, key = lambda x : x['year'])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0f880100", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'bob': 20, 'alice': 8, 'alex': 9, 'cindy': 15}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "players = {\n", + " \"bob\": 20, \n", + " \"alice\": 8, \n", + " \"alex\": 9, \n", + " \"cindy\": 15} # Key: player_name; Value: score\n", + "players" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b687e79", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "6a326590", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('bob', 20), ('cindy', 15), ('alex', 9), ('alice', 8)]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# sorted always returns a new object, and always returns a list\n", + "sorted(players.items(), key = lambda x: x[1], reverse=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "32c29ba5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'name': 'B', 'year': 1980, 'speed': 100},\n", + " {'name': 'C', 'year': 1990, 'speed': 110},\n", + " {'name': 'A', 'year': 2000, 'speed': 150}]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# sort modifies the object in-place. Only works with lists\n", + "hurricanes2.sort(key = lambda x : x['year'])\n", + "hurricanes2" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "e4fb3e4a", + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'dict_items' object has no attribute 'sort'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[21], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mplayers\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msort\u001b[49m(key \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mlambda\u001b[39;00m x: x[\u001b[38;5;241m1\u001b[39m], reverse\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", + "\u001b[0;31mAttributeError\u001b[0m: 'dict_items' object has no attribute 'sort'" + ] + } + ], + "source": [ + "# can't .sort() things that aren't lists\n", + "players.items().sort(key = lambda x: x[1], reverse=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d78c1f09", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "92395297", + "metadata": {}, + "source": [ + "# Comprehensions\n", + "\n", + "\n", + "Step 1: \\[ ??? for x in y ??? \\] (fill in iteration piece)\n", + "\n", + "Step 2: \\[ ??? for x in y if ??? \\] (fill in \"if\")\n", + "\n", + "Step 2: \\[ e for x in y if ??? \\] (fill in expression)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "1502d714", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[2, 4, 8, 6, 4, 6, 2, 7]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "words = ['My', 'very', 'educated', 'mother', 'just', 'served', 'us', 'noodles']\n", + "# create list of word length\n", + "len_list = [len(w) for w in words ]\n", + "len_list" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "ee073d66", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['educated', 'mother', 'served', 'noodles']" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "words = ['My', 'very', 'educated', 'mother', 'just', 'served', 'us', 'noodles']\n", + "# filter this list to only include words that are longer than 5 letters\n", + "[w for w in words if len(w) > 5]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "76649dac", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['my', 'very', 'EDUCATED', 'MOTHER', 'just', 'SERVED', 'us', 'NOODLES']" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "words = ['My', 'very', 'educated', 'mother', 'just', 'served', 'us', 'noodles']\n", + "# create a new list. All short words should be lowercase, and all long words should be in all caps\n", + "\n", + " # if value condition # else value\n", + "# [ ??? if len(w) > 5 else ??? for w in words ]\n", + "[ w.upper() if len(w) > 5 else w.lower() for w in words ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c86ed63", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "6f0ec4e6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[1936, 1089, 3136, 441, 361]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "numbers = [44, 33, 56, 21, 19]\n", + "# create list of squares\n", + "[ n ** 2 for n in numbers ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06856ef5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "9f2ccc7c", + "metadata": {}, + "source": [ + "# Tuples\n", + "\n", + "1. Tuples are immutable (cannot modify, need to create a new instance)\n", + " - We can't call .sort() on a tuple\n", + " - We can called sorted() but it'll return a list, not a tuple\n", + "2. Tuples use indexing and slicing in the same way that lists do" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "7e030d85", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(20, 30, 40, 5)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t = (20, 30, 40, 5)\n", + "t" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "d2f8b719", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(30, 40, 5)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t[1:]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "aea2aebe", + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'tuple' object has no attribute 'sort'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[31], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msort\u001b[49m()\n", + "\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute 'sort'" + ] + } + ], + "source": [ + "t.sort()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "bd454683", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5, 20, 30, 40]\n", + "(5, 20, 30, 40)\n" + ] + } + ], + "source": [ + "print(sorted(t))\n", + "print(tuple(sorted(t)))" + ] + }, + { + "cell_type": "markdown", + "id": "a7bc5955", + "metadata": {}, + "source": [ + "## NamedTuple" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "63bf692f", + "metadata": {}, + "outputs": [], + "source": [ + "Animal = namedtuple(\"Animal\", [\"Name\", \"Legs\", \"Color\", \"Habitat\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "e0ac8e26", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Animal(Name='fish', Legs=0, Color='green', Habitat='water')" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fish = Animal(\"fish\", 0, \"green\", \"water\") # using positional parameters\n", + "fish" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "8022c9e5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Animal(Name='cat', Legs=4, Color='brown', Habitat='house')" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cat = Animal(Name = \"cat\", Color = \"brown\", Habitat = \"house\", Legs = 4) # keyword\n", + "cat" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "1edf9378", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Animal(Name='tiger', Legs=4, Color='orange', Habitat='jungle')" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tiger = Animal(\"tiger\", 4, Habitat = \"jungle\", Color = 'orange') # mix of positional (on the left) and keyword\n", + "tiger" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "6c8765a4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "I see a green fish in the water\n", + "I see a brown cat in the house\n", + "I see a orange tiger in the jungle\n" + ] + } + ], + "source": [ + "animals = [fish, cat, tiger]\n", + "for a in animals:\n", + " # print \"I see a <color> <animal name> in the <habitat>\"\n", + " print(\"I see a {} {} in the {}\".format( a.Color, a.Name, a.Habitat ))" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "48cb37c3", + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "can't set attribute", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[56], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfish\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mColor\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgold\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;66;03m# won't work, because namedtuples are immutable\u001b[39;00m\n", + "\u001b[0;31mAttributeError\u001b[0m: can't set attribute" + ] + } + ], + "source": [ + "fish.Color = 'gold' # won't work, because namedtuples are immutable" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "249d3cd4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Animal(Name='fish', Legs=0, Color='green', Habitat='water')\n" + ] + }, + { + "data": { + "text/plain": [ + "Animal(Name='goldfish', Legs=0, Color='gold', Habitat='water')" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(fish)\n", + "fish = Animal(\"goldfish\", fish.Legs, \"gold\", fish.Habitat)\n", + "fish" + ] + }, + { + "cell_type": "markdown", + "id": "10b6169b", + "metadata": {}, + "source": [ + "# Copying" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "35bd3c3e", + "metadata": {}, + "outputs": [], + "source": [ + "# see separate link to python tutor example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc4a9341", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} -- GitLab