From 60de967ae3b3f2484c3469a7934bad060f05ad5b Mon Sep 17 00:00:00 2001
From: Anna Meyer <annapmeyer95@gmail.com>
Date: Wed, 26 Jul 2023 11:40:19 -0500
Subject: [PATCH] lec materials

---
 .../17_Pandas/exam2review.ipynb               | 744 ++++++++++++++++++
 1 file changed, 744 insertions(+)
 create mode 100644 sum23/lecture_materials/17_Pandas/exam2review.ipynb

diff --git a/sum23/lecture_materials/17_Pandas/exam2review.ipynb b/sum23/lecture_materials/17_Pandas/exam2review.ipynb
new file mode 100644
index 0000000..8e76de4
--- /dev/null
+++ b/sum23/lecture_materials/17_Pandas/exam2review.ipynb
@@ -0,0 +1,744 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "0e207f19",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from collections import namedtuple\n",
+    "import os\n",
+    "import json\n",
+    "import csv"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a4ae0ecf",
+   "metadata": {},
+   "source": [
+    "# Sorting"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1ae8993b",
+   "metadata": {},
+   "source": [
+    "example 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "4d5cd750",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hurricanes = [\n",
+    "    {\"name\": \"A\", \"year\": 2000, \"speed\": 150},\n",
+    "    {\"name\": \"B\", \"year\": 1980, \"speed\": 100},\n",
+    "    {\"name\": \"C\", \"year\": 1990}, # notice the missing speed key\n",
+    "]\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "e663da6d",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "'<' not supported between instances of 'dict' and 'dict'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;43msorted\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mhurricanes\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[0;31mTypeError\u001b[0m: '<' not supported between instances of 'dict' and 'dict'"
+     ]
+    }
+   ],
+   "source": [
+    "sorted(hurricanes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "6409fd88",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'name': 'C', 'year': 1990},\n",
+       " {'name': 'B', 'year': 1980, 'speed': 100},\n",
+       " {'name': 'A', 'year': 2000, 'speed': 150}]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def get_speed(hurricane):\n",
+    "    if not \"speed\" in hurricane:\n",
+    "        return 0\n",
+    "    return hurricane[\"speed\"]\n",
+    "\n",
+    "# by default, sort is in ascending order (smallest to largest)\n",
+    "sorted(hurricanes, key = get_speed)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "84091bcc",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'name': 'A', 'year': 2000, 'speed': 150},\n",
+       " {'name': 'B', 'year': 1980, 'speed': 100},\n",
+       " {'name': 'C', 'year': 1990}]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what about sorting fastest to slowest?\n",
+    "sorted(hurricanes, key = get_speed, reverse=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "06fa5ce3",
+   "metadata": {},
+   "source": [
+    "example 2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0c37fdbd",
+   "metadata": {},
+   "source": [
+    "## lambda functions\n",
+    "notice we got rid of the missing value in hurricanes. we *could* handle this, but it makes it more complicated"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "2f713505",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hurricanes2 = [\n",
+    "    {\"name\": \"A\", \"year\": 2000, \"speed\": 150},\n",
+    "    {\"name\": \"B\", \"year\": 1980, \"speed\": 100},\n",
+    "    {\"name\": \"C\", \"year\": 1990, \"speed\": 110}, \n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "5ea40390",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'name': 'B', 'year': 1980, 'speed': 100},\n",
+       " {'name': 'C', 'year': 1990, 'speed': 110},\n",
+       " {'name': 'A', 'year': 2000, 'speed': 150}]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sorted(hurricanes2, key = lambda x : x['speed'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "682e1b11",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'name': 'B', 'year': 1980, 'speed': 100},\n",
+       " {'name': 'C', 'year': 1990, 'speed': 110},\n",
+       " {'name': 'A', 'year': 2000, 'speed': 150}]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# sort by year\n",
+    "sorted(hurricanes2, key = lambda x : x['year'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "0f880100",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'bob': 20, 'alice': 8, 'alex': 9, 'cindy': 15}"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "players = {\n",
+    "    \"bob\": 20, \n",
+    "    \"alice\": 8, \n",
+    "    \"alex\": 9, \n",
+    "    \"cindy\": 15} # Key: player_name; Value: score\n",
+    "players"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5b687e79",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "6a326590",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('bob', 20), ('cindy', 15), ('alex', 9), ('alice', 8)]"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# sorted always returns a new object, and always returns a list\n",
+    "sorted(players.items(), key = lambda x: x[1], reverse=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "32c29ba5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'name': 'B', 'year': 1980, 'speed': 100},\n",
+       " {'name': 'C', 'year': 1990, 'speed': 110},\n",
+       " {'name': 'A', 'year': 2000, 'speed': 150}]"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# sort modifies the object in-place. Only works with lists\n",
+    "hurricanes2.sort(key = lambda x : x['year'])\n",
+    "hurricanes2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "e4fb3e4a",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "'dict_items' object has no attribute 'sort'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[21], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mplayers\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msort\u001b[49m(key \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mlambda\u001b[39;00m x: x[\u001b[38;5;241m1\u001b[39m], reverse\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
+      "\u001b[0;31mAttributeError\u001b[0m: 'dict_items' object has no attribute 'sort'"
+     ]
+    }
+   ],
+   "source": [
+    "# can't .sort() things that aren't lists\n",
+    "players.items().sort(key = lambda x: x[1], reverse=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d78c1f09",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "92395297",
+   "metadata": {},
+   "source": [
+    "# Comprehensions\n",
+    "\n",
+    "\n",
+    "Step 1: \\[ ??? for x in y ??? \\] (fill in iteration piece)\n",
+    "\n",
+    "Step 2: \\[ ??? for x in y  if ??? \\] (fill in \"if\")\n",
+    "\n",
+    "Step 2: \\[ e for x in y  if ??? \\]  (fill in expression)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "1502d714",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[2, 4, 8, 6, 4, 6, 2, 7]"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "words = ['My', 'very', 'educated', 'mother', 'just', 'served', 'us', 'noodles']\n",
+    "# create list of word length\n",
+    "len_list = [len(w) for w in words ]\n",
+    "len_list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "ee073d66",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['educated', 'mother', 'served', 'noodles']"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "words = ['My', 'very', 'educated', 'mother', 'just', 'served', 'us', 'noodles']\n",
+    "# filter this list to only include words that are longer than 5 letters\n",
+    "[w for w in words if len(w) > 5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "76649dac",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['my', 'very', 'EDUCATED', 'MOTHER', 'just', 'SERVED', 'us', 'NOODLES']"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "words = ['My', 'very', 'educated', 'mother', 'just', 'served', 'us', 'noodles']\n",
+    "# create a new list. All short words should be lowercase, and all long words should be in all caps\n",
+    "\n",
+    "      # if value     condition           # else value\n",
+    "#   [   ???        if len(w) > 5    else      ???        for w in words ]\n",
+    "[   w.upper()        if len(w) > 5    else      w.lower()        for w in words ]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0c86ed63",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "6f0ec4e6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[1936, 1089, 3136, 441, 361]"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "numbers = [44, 33, 56, 21, 19]\n",
+    "# create list of squares\n",
+    "[  n ** 2    for n in numbers   ]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "06856ef5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9f2ccc7c",
+   "metadata": {},
+   "source": [
+    "# Tuples\n",
+    "\n",
+    "1. Tuples are immutable (cannot modify, need to create a new instance)\n",
+    "   - We can't call .sort() on a tuple\n",
+    "   - We can called sorted() but it'll return a list, not a tuple\n",
+    "2. Tuples use indexing and slicing in the same way that lists do"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "7e030d85",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(20, 30, 40, 5)"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "t = (20, 30, 40, 5)\n",
+    "t"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "d2f8b719",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(30, 40, 5)"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "t[1:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "aea2aebe",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "'tuple' object has no attribute 'sort'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[31], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msort\u001b[49m()\n",
+      "\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute 'sort'"
+     ]
+    }
+   ],
+   "source": [
+    "t.sort()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "bd454683",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[5, 20, 30, 40]\n",
+      "(5, 20, 30, 40)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(sorted(t))\n",
+    "print(tuple(sorted(t)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a7bc5955",
+   "metadata": {},
+   "source": [
+    "## NamedTuple"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "id": "63bf692f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Animal = namedtuple(\"Animal\", [\"Name\", \"Legs\", \"Color\", \"Habitat\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "id": "e0ac8e26",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Animal(Name='fish', Legs=0, Color='green', Habitat='water')"
+      ]
+     },
+     "execution_count": 58,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "fish = Animal(\"fish\", 0, \"green\", \"water\") # using positional parameters\n",
+    "fish"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "id": "8022c9e5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Animal(Name='cat', Legs=4, Color='brown', Habitat='house')"
+      ]
+     },
+     "execution_count": 52,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cat = Animal(Name = \"cat\", Color = \"brown\", Habitat = \"house\", Legs = 4) # keyword\n",
+    "cat"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "id": "1edf9378",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Animal(Name='tiger', Legs=4, Color='orange', Habitat='jungle')"
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tiger = Animal(\"tiger\", 4, Habitat = \"jungle\", Color = 'orange')  # mix of positional (on the left) and keyword\n",
+    "tiger"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "id": "6c8765a4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "I see a green fish in the water\n",
+      "I see a brown cat in the house\n",
+      "I see a orange tiger in the jungle\n"
+     ]
+    }
+   ],
+   "source": [
+    "animals = [fish, cat, tiger]\n",
+    "for a in animals:\n",
+    "    # print \"I see a <color> <animal name> in the <habitat>\"\n",
+    "    print(\"I see a {} {} in the {}\".format( a.Color, a.Name, a.Habitat ))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "id": "48cb37c3",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "can't set attribute",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[56], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfish\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mColor\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgold\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;66;03m# won't work, because namedtuples are immutable\u001b[39;00m\n",
+      "\u001b[0;31mAttributeError\u001b[0m: can't set attribute"
+     ]
+    }
+   ],
+   "source": [
+    "fish.Color = 'gold' # won't work, because namedtuples are immutable"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "id": "249d3cd4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Animal(Name='fish', Legs=0, Color='green', Habitat='water')\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Animal(Name='goldfish', Legs=0, Color='gold', Habitat='water')"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "print(fish)\n",
+    "fish = Animal(\"goldfish\", fish.Legs, \"gold\", fish.Habitat)\n",
+    "fish"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "10b6169b",
+   "metadata": {},
+   "source": [
+    "# Copying"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "id": "35bd3c3e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# see separate link to python tutor example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dc4a9341",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
-- 
GitLab