diff --git a/sum23/lecture_materials/17_Pandas/lec_17_pandas2_dataframe_template.ipynb b/sum23/lecture_materials/17_Pandas/lec_17_pandas2_dataframe_template.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..b775fd1c48bd7b4c55d93339a8a3944dae31e62d
--- /dev/null
+++ b/sum23/lecture_materials/17_Pandas/lec_17_pandas2_dataframe_template.ipynb
@@ -0,0 +1,1208 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from pandas import Series, DataFrame\n",
+    "# We can explictly import Series and DataFrame, why might we do this?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "###  Series Review\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Series from `list`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scores_list = [54, 22, 19, 73, 80]\n",
+    "scores_series = Series(scores_list)\n",
+    "scores_series\n",
+    "\n",
+    "# What is the terminology for:  0, 1, 2, ... ??       A:  \n",
+    "# What is the terminology for:  54, 22, 19, .... ??   A:  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Selecting certain scores.\n",
+    "What are all the scores `> 50`?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Answer:** Boolean indexing. Try the following..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scores_series[[True, True, False, False, True]] # often called a \"mask\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We are really writing a \"mask\" for our data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Series from `dict`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Imagine we hire students and track their weekly hours\n",
+    "week1 = Series({\"Rita\":5, \"Therese\":3, \"Janice\": 6})\n",
+    "week2 = Series({\"Rita\":3, \"Therese\":7, \"Janice\": 4})\n",
+    "week3 = Series({\"Therese\":5, \"Janice\":5, \"Rita\": 8}) # Wrong order! Will this matter?\n",
+    "print(week1)\n",
+    "print(week2)\n",
+    "print(week3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "####  For everyone in Week 1, add 3 to their hours "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "week1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Total up everyone's hours"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "total_hours = ???\n",
+    "total_hours"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### What is week1 / week3 ?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "???\n",
+    "# Notice that we didn't have to worry about the order of indices"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### What type of values are stored in  week1 > week2?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(week1)\n",
+    "print(week2)\n",
+    "???\n",
+    "# Notice that indices are ordered the same"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "####  What is week1 > week3?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(week1)\n",
+    "print(week3)\n",
+    "??? # Does it work?\n",
+    "\n",
+    "# How can we fix this?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "# Lecture 28:  Pandas 2 - DataFrames\n",
+    "\n",
+    "\n",
+    "Learning Objectives:\n",
+    "- Create a DataFrame from \n",
+    " - a dictionary of Series, lists, or dicts\n",
+    " - a list of Series, lists, dicts\n",
+    "- Select a column, row, cell, or rectangular region of a DataFrame\n",
+    "- Convert CSV files into DataFrames and DataFrames into CSV Files\n",
+    "- Access the head or tail of a DataFrame"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Big Idea**: Data Frames store 2-dimensional data in tables! It is a collection of Series."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## You can create a DataFrame in a variety of ways!\n",
+    "\n",
+    "- dictionary of Series\n",
+    "- dictionary of lists\n",
+    "- dictionary of dictionaries\n",
+    "- list of dictionarines\n",
+    "- list of lists\n",
+    "\n",
+    "### From a dictionary of Series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "names = Series([\"Alice\", \"Bob\", \"Cindy\", \"Dan\"])\n",
+    "scores = Series([6, 7, 8, 9])\n",
+    "\n",
+    "# to make a dictionary of Series, need to write column names for the keys\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### From a dictionary of lists"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "name_list = [\"Alice\", \"Bob\", \"Cindy\", \"Dan\"]\n",
+    "score_list = [6, 7, 8, 9]\n",
+    "\n",
+    "# this is the same as above, reminding us that Series act like lists\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### From a dictionary of dictionaries\n",
+    "We need to make up keys to match the things in each column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = {\n",
+    "    \"Player name\": {0: \"Alice\", 1: \"Bob\", 2: \"Cindy\", 3: \"Dan\"},\n",
+    "    \"Score\": {0: 6, 1: 7, 2: 8, 3: 9}\n",
+    "}\n",
+    "data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### From a list of dicts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = [\n",
+    "    {\"Player name\": \"Alice\", \"Score\": 6},\n",
+    "    {\"Player name\": \"Bob\", \"Score\": 7},\n",
+    "    {\"Player name\": \"Cindy\", \"Score\": 8},\n",
+    "    {\"Player name\": \"Dan\", \"Score\": 9}\n",
+    "]\n",
+    "data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### From a list of lists"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = [\n",
+    "    [\"Alice\", 6],\n",
+    "    [\"Bob\", 7],\n",
+    "    [\"Cindy\", 8],\n",
+    "    [\"Dan\", 9]\n",
+    "]\n",
+    "data\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Explicitly naming the columns\n",
+    "We have to add the column names, we do this with `columns = [name1, name2, ....]`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = [\n",
+    "    [\"Alice\", 6],\n",
+    "    [\"Bob\", 7],\n",
+    "    [\"Cindy\", 8],\n",
+    "    [\"Dan\", 9]\n",
+    "]\n",
+    "data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Explicitly naming the indices\n",
+    "We can use `index = [name1, name2, ...]` to rename the index of each row"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = [\n",
+    "    {\"Player name\": \"Alice\", \"Score\": 6},\n",
+    "    {\"Player name\": \"Bob\", \"Score\": 7},\n",
+    "    {\"Player name\": \"Cindy\", \"Score\": 8},\n",
+    "    {\"Player name\": \"Dan\", \"Score\": 9}\n",
+    "]\n",
+    "data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO: \n",
+    "# Make a DataFrame of 4 people you know with different ages\n",
+    "# Give names to both the columns and rows\n",
+    "\n",
+    "# Share how you did with this with your neighbor\n",
+    "# If you both did it the same way, try it a different way."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Select a column, row, cell, or rectangular region of a DataFrame\n",
+    "### Data lookup: Series\n",
+    "- `s.loc[X]`   <- lookup by pandas index\n",
+    "- `s.iloc[X]`  <- lookup by integer position"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hours = Series({\"Alice\": 6, \"Bob\": 7, \"Cindy\": 8, \"Dan\": 9})\n",
+    "hours"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lookup Bob's hours by pandas index.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lookup Bob's hours by integer position.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lookup Cindy's hours by pandas index.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "###  Data lookup: DataFrame\n",
+    "\n",
+    "\n",
+    "- `d.loc[r]`     lookup ROW by pandas ROW index\n",
+    "- `d.iloc[r]`    lookup ROW by ROW integer position\n",
+    "- `d[c]`         lookup COL by pandas COL index\n",
+    "- `d.loc[r, c]`  lookup by pandas ROW index and pandas COL index\n",
+    "- `d.iloc[r, c]`  lookup by ROW integer position and COL integer position"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We often call the object that we make df\n",
+    "data = [\n",
+    "    [\"Hope\", 10],\n",
+    "    [\"Peace\", 7],\n",
+    "    [\"Joy\", 4],\n",
+    "    [\"Love\", 11]\n",
+    "]\n",
+    "df = DataFrame(data, index = [\"H\", \"P\", \"J\", \"L\"], columns = [\"Player name\", \"Score\"])\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### What are 3 different ways of accessing row L? "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### How about accessing a column?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### What are 3 different ways to access a single cell?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## How to set values for a specific entry?\n",
+    "\n",
+    "- `d.loc[r, c] = new_val`\n",
+    "- `d.iloc[r, c] = new_val`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#change player D's name\n",
+    "df.loc[\"L\", \"Player name\"] = \"Luisa\"\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# then add 3 to that player's score using .loc\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# add 7 to a different player's score using .iloc\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Find the max score and the mean score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# find the max and mean of the \"Score\" column\n",
+    "print(df[\"Score\"].max(), df[\"Score\"].mean())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Find the highest scoring player"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##  Slicing a DataFrame\n",
+    "\n",
+    "- `df.iloc[ROW_SLICE, COL_SLICE]` <- make a rectangular slice from the DataFrame using integer positions\n",
+    "- `df.loc[ROW_SLICE, COL_SLICE]` <- make a rectangular slice from the DataFrame using index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.iloc[1:3, 0:2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.loc[\"P\":\"J\", \"Player name\":\"Score\"] # notice that this way is inclusive of endpoints"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set values for sliced DataFrame\n",
+    "\n",
+    "- `d.loc[ROW_SLICE, COL_SLICE] = new_val` <- set value by ROW INDEX and COL INDEX\n",
+    "- `d.iloc[ROW_SLICE, COL_SLICE] = new_val` <- set value by ROW Integer position and COL Integer position"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.loc[\"P\":\"J\", \"Score\"] += 5\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Pandas allows slicing of non-contiguous columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# just get Player name for Index P and L\n",
+    "df.loc[[\"P\", \"L\"],\"Player name\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# add 2 to the people in rows P and L\n",
+    "df.loc[[\"P\", \"L\"],\"Score\"] += 2\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Boolean indexing on a DataFrame\n",
+    "\n",
+    "- `d[BOOL SERIES]`  <- makes a new DF of all rows that lined up were True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Make a Series of Booleans based on Score >= 15"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "b"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### use b to slice the DataFrame\n",
+    "if b is true, include this row in the new df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### do the last two things in a single step"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Creating DataFrame from csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# it's that easy!  \n",
+    "df = pd.read_csv(\"IMDB-Movie-Data.csv\")\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "###   View the first few lines of the DataFrame\n",
+    "- `.head(n)` gets the first n lines, 5 is the default"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### get the first 2 rows"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "###   View the first few lines of the DataFrame\n",
+    "- `.tail(n)` gets the last n lines, 5 is the default"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### What are the first and last years in our dataset?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Extract Year column\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"First year: {}, Last year: {}\".format(???))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### What are the rows that correspond to movies whose title contains \"Harry\" ? \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### What is the movie at index 6 ? "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notice that there are two index columns\n",
+    "- That happened because when you write a csv from pandas to a file, it writes a new index column\n",
+    "- So if the dataFrame already contains an index, you are going to get two index columns\n",
+    "- Let's fix that problem"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### How can you use slicing to get just columns with Title and Year?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df2 = ???\n",
+    "df2\n",
+    "# notice that this does not have the 'index' column"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### How can you use slicing to get rid of the first column?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.iloc[???] #all the rows, not column 0\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Write a df to a csv file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.to_csv(\"better_movies.csv\", index = False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Practice on your own.....Data Analysis with Data Frames\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### What are all the movies that have above average run time (long movies)? "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "long_movies = ???\n",
+    "long_movies"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Which long movie has the lowest rating?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# of these movies, what was the min rating? \n",
+    "min_rating = ???\n",
+    "min_rating"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Which movies had this min rating?\n",
+    "???"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### What are all long movies with someone in the cast named \"Emma\" ? "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "???"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### What is the title of the shortest movie?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "???"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### What movie had the highest revenue?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"Revnue\"].max() # does not work, Why?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We need to clean our data\n",
+    "# Some movies have M at the end and others don't.\n",
+    "# All revenues are in millions of dollars.\n",
+    "def format_revenue(revenue):\n",
+    "    \"\"\" \n",
+    "    Checks the last character of the string and formats accordingly\n",
+    "    \"\"\"\n",
+    "    if type(revenue) == float: # need this in here if we run code multiple times\n",
+    "        return revenue\n",
+    "    elif revenue[-1] == 'M': # some have an \"M\" at the end\n",
+    "        return ??? # TODO: convert relevant part of the string to float and multiple by 1e6\n",
+    "    else:\n",
+    "        return ??? # TODO: convert to float and multiple by 1e6"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# What movie had the highest revenue?\n",
+    "revenue = df[\"Revenue\"].apply(format_revenue) # apply a function to a column; returns a Series\n",
+    "print(revenue.head())\n",
+    "max_revenue = revenue.max()\n",
+    "\n",
+    "# make a copy of our df\n",
+    "rev_df = df.copy()\n",
+    "rev_df[\"Revenue (float)\"] = revenue\n",
+    "rev_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now we can answer the question!\n",
+    "???"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Or more generally...\n",
+    "rev_df.sort_values(by = \"Revenue (float)\", ascending = False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### What is the average runtime for movies by \"Francis Lawrence\"?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### More complicated questions..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Which director had the highest average rating? \n",
+    "\n",
+    "# one way is to make a python dict of director, list of ratings\n",
+    "director_dict = dict()\n",
+    "\n",
+    "# make the dictionary: key is director, value is list of ratings\n",
+    "for i in range(len(df)):\n",
+    "    director = df.loc[i, \"Director\"]\n",
+    "    rating = df.loc[i, \"Rating\"]\n",
+    "    #print(i, director, rating)\n",
+    "    if director not in director_dict:\n",
+    "        director_dict[director] = []\n",
+    "    director_dict[director].append(rating)\n",
+    "\n",
+    "# make a ratings dict key is directory, value is average\n",
+    "# only include directors with > 4 movies\n",
+    "ratings_dict = {k:sum(v)/len(v) for (k,v) in director_dict.items() if len(v) > 4}\n",
+    "\n",
+    "#sort a dict by values\n",
+    "dict(sorted(ratings_dict.items(), key=lambda t:t[-1], reverse=True))\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# FOR DEMONSTRATION PURPOSES ONLY\n",
+    "# We haven't (and will not) learn about \"groupby\"\n",
+    "# Pandas has many operations which will be helpful!\n",
+    "\n",
+    "# Consider what you already know, and what Pandas can solve\n",
+    "# when formulating your solutions.\n",
+    "rating_groups = df.groupby(\"Director\")[\"Rating\"]\n",
+    "rating_groups.mean()[rating_groups.count() > 4].sort_values(ascending=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Extra Practice: Make up some of your own questions about the movies"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}