diff --git a/f23/Cole_Lecture_Notes/37_AdvPandas/Lec37_AdvPandas_Solution_Nelson.ipynb b/f23/Cole_Lecture_Notes/37_AdvPandas/Lec37_AdvPandas_Solution_Nelson.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..23f5dc7fbac84ee5935b443ce2259ee78cfbec8b
--- /dev/null
+++ b/f23/Cole_Lecture_Notes/37_AdvPandas/Lec37_AdvPandas_Solution_Nelson.ipynb
@@ -0,0 +1,685 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Advanced Pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "CeWtFirwteFY"
+   },
+   "outputs": [],
+   "source": [
+    "# known import statements\n",
+    "import pandas as pd\n",
+    "import sqlite3\n",
+    "import os\n",
+    "\n",
+    "# new import statement\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the Piazza data from 'piazza.db'\n",
+    "\n",
+    "db_name = \"piazza.db\"\n",
+    "assert os.path.exists(db_name)\n",
+    "conn = sqlite3.connect(db_name)\n",
+    "\n",
+    "def qry(sql):\n",
+    "    return pd.read_sql(sql, conn)\n",
+    "\n",
+    "df = qry(\"\"\"\n",
+    "    SELECT *\n",
+    "    FROM sqlite_master\n",
+    "    WHERE type='table'\n",
+    "\"\"\")\n",
+    "print(df.iloc[0]['sql'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "piazza_df = pd.read_sql(\"\"\"\n",
+    "    SELECT *\n",
+    "    FROM piazza\n",
+    "\"\"\", conn)\n",
+    "piazza_df.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 1: Set the student id column as the index\n",
+    "piazza_df = piazza_df.set_index(\"student_id\")\n",
+    "piazza_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2a: Which 10 students post the most?\n",
+    "top_students = piazza_df[piazza_df[\"role\"] == \"student\"].sort_values(\"posts\", ascending=False).head(10)\n",
+    "top_students"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2b: Can you plot their number of posts as a bar graph? Be sure to label your axes!\n",
+    "ax = top_students[\"posts\"].plot.bar()\n",
+    "ax.set_xlabel(\"Student ID\")\n",
+    "ax.set_ylabel(\"# of Posts\")\n",
+    "ax.set_title(\"Top Posting Students\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2c: How about with their name rather than their student id?\n",
+    "ax = top_students.plot.bar(x=\"name\", y=\"posts\")\n",
+    "ax.set_xlabel(\"Student\")\n",
+    "ax.set_ylabel(\"# of Posts\")\n",
+    "ax.set_title(\"Top Posting Students\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# Warmup 3a: Which people had more than 10 answers? Include all roles.\n",
+    "top_answers = piazza_df[piazza_df[\"answers\"] > 10].sort_values(\"answers\", ascending=False)\n",
+    "top_answers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 3b: Plot this as a bar graph.\n",
+    "top_answers[\"answers\"].plot.bar()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 3c: Plot the contributions as a bar graph.\n",
+    "top_answers[\"role\"].value_counts().plot.bar()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 3d: Can you get this same data using SQL?\n",
+    "qry(\"\"\"\n",
+    "SELECT role, COUNT(role) as NumAnswers\n",
+    "FROM piazza\n",
+    "WHERE answers > 10\n",
+    "GROUP BY role\n",
+    "ORDER BY NumAnswers DESC\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 3e: What about their average # of days online as well?\n",
+    "qry(\"\"\"\n",
+    "SELECT role, COUNT(role) as NumAnswers, AVG(days_online) as AvgDaysOnline\n",
+    "FROM piazza\n",
+    "WHERE answers > 10\n",
+    "GROUP BY role\n",
+    "ORDER BY NumAnswers DESC\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 3f: Can we do that in Pandas as well?\n",
+    "# Today's topic!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "yoLGptrqhbBo"
+   },
+   "source": [
+    "# Today's Learning Objectives: \n",
+    "\n",
+    "* Setting column as index for pandas `DataFrame`\n",
+    "* Identify, drop, or fill missing values (`np.NaN`) using Pandas `isna`, `dropna`, and `fillna`\n",
+    "* Applying transformations to `DataFrame`:\n",
+    "  * Use `apply` on pandas `Series` to apply a transformation function\n",
+    "  * Use `replace` to replace all target values in Pandas `Series` and `DataFrame` rows / columns\n",
+    "* Filter, aggregate, group, and summarize information in a `DataFrame` with `groupby`\n",
+    "* Convert .groupby examples to SQL\n",
+    "* Solving the same question using SQL and pandas `DataFrame` manipulations:\n",
+    "  * filtering, grouping, and aggregation / summarization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Sort by name... What do we notice?\n",
+    "piazza_df.sort_values(\"name\") # Some names are missing!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Not a Number\n",
+    "\n",
+    "- `np.NaN` is the floating point representation of Not a Number\n",
+    "- You do not need to know / learn the details about the `numpy` package \n",
+    "\n",
+    "### Replacing / modifying values within the `DataFrame`\n",
+    "\n",
+    "Syntax: `df.replace(<TARGET>, <REPLACE>)`\n",
+    "\n",
+    "Let's now replace the missing values (empty strings) with `np.NaN`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's replace these empty strings with a special value.\n",
+    "piazza_df = piazza_df.replace(\"\", np.NaN)\n",
+    "piazza_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Sort by name again... What do we notice?\n",
+    "piazza_df.sort_values(\"name\") # NaN's are at the end!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Checking for missing values\n",
+    "\n",
+    "Syntax: `Series.isna()`\n",
+    "- Returns a boolean Series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run isna() on the name column\n",
+    "piazza_df[\"name\"].isna()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How many people are missing a name?\n",
+    "piazza_df[\"name\"].isna().value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How many people are missing an email?\n",
+    "piazza_df[\"email\"].isna().value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How many people are missing both a name and email?\n",
+    "((piazza_df[\"name\"].isna()) & (piazza_df[\"email\"].isna())).value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How many people are missing either a name or email?\n",
+    "((piazza_df[\"name\"].isna()) | (piazza_df[\"email\"].isna())).value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# So... What do we do?\n",
+    "#  1. Drop those rows\n",
+    "#  2. Interpolate / Best Guess"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Option 1: Drop those rows.\n",
+    "pure_piazza_df = piazza_df.dropna()\n",
+    "pure_piazza_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Option 2a: Interpolate / Best Guess\n",
+    "anon_piazza_df = piazza_df.fillna(\"Anonymous\")\n",
+    "anon_piazza_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a function to take an email (e.g. \"calm_star@wisc.edu\")\n",
+    "# and return the name (e.g. \"calm star\")\n",
+    "def parse_name_from_email(email):\n",
+    "    if pd.isna(email):\n",
+    "        return np.nan\n",
+    "    else:\n",
+    "        return email.split(\"@\")[0].replace(\"_\", \" \")\n",
+    "\n",
+    "# Test your function!\n",
+    "parse_name_from_email(\"calm_star@wisc.edu\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Review: `Pandas.Series.apply(...)`\n",
+    "Syntax: `Series.apply(<FUNCTION OBJECT REFERENCE>)`\n",
+    "- applies input function to every element of the Series.\n",
+    "- Returns a new `Series`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now, apply that function to each value in email!\n",
+    "piazza_df[\"guessed_name\"] = piazza_df[\"email\"].apply(parse_name_from_email)\n",
+    "piazza_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a function to take a name (e.g. \"calm star\")\n",
+    "# and return the email (e.g. \"calm_star@wisc.edu\")\n",
+    "def parse_email_from_name(name):\n",
+    "    if pd.isna(name):\n",
+    "        return np.nan\n",
+    "    else:\n",
+    "        return name.replace(\" \", \"_\") + \"@wisc.edu\"\n",
+    "\n",
+    "# Test your function!\n",
+    "parse_email_from_name(\"calm star\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now, apply that function to each value in name!\n",
+    "piazza_df[\"guessed_email\"] = piazza_df[\"name\"].apply(parse_email_from_name)\n",
+    "piazza_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### `Pandas.DataFrame.apply(...)`\n",
+    "Syntax: `DataFrame.apply(<FUNCTION OBJECT REFERENCE>, axis=1)`\n",
+    "- `axis=1` means apply to each row.\n",
+    "- returns a new `Series`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If the name has a value, use it, otherwise use our best guess!\n",
+    "piazza_df[\"name\"] = piazza_df.apply(lambda r : r[\"guessed_name\"] if pd.isna(r[\"name\"]) else r[\"name\"], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Same thing for email!\n",
+    "piazza_df[\"email\"] = piazza_df.apply(lambda r : r[\"guessed_email\"] if pd.isna(r[\"email\"]) else r[\"email\"], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Drop the guessing columns\n",
+    "piazza_df = piazza_df.drop(\"guessed_name\", axis=1)\n",
+    "piazza_df = piazza_df.drop(\"guessed_email\", axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How many rows are missing data now?\n",
+    "len(piazza_df.dropna()) # only 12!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Give a name of \"anonymous\" and email of \"anonymous@wisc.edu\"\n",
+    "# to anyone with left with missing data.\n",
+    "piazza_df[\"name\"] = piazza_df[\"name\"].fillna(\"anonymous\")\n",
+    "piazza_df[\"email\"] = piazza_df[\"email\"].fillna(\"anonymous@wisc.edu\")\n",
+    "len(piazza_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### `Pandas.DataFrame.groupby(...)`\n",
+    "\n",
+    "Syntax: `DataFrame.groupby(<COLUMN>)`\n",
+    "- Returns a `groupby` object\n",
+    "- Need to apply aggregation functions to use the return value of `groupby`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# What does this return?\n",
+    "piazza_df.groupby(\"role\") # a groupby object!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Try getting the \"mean\" of this groupby object.\n",
+    "piazza_df.groupby(\"role\").mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How many answers does the average instructor, student, and TA give?\n",
+    "piazza_df[[\"role\", \"answers\"]].groupby(\"role\").mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How would we write this in SQL?\n",
+    "qry(\"\"\"\n",
+    "SELECT role, AVG(answers)\n",
+    "FROM piazza\n",
+    "GROUP BY role\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# What is the total number of days spent online for instructors, students, and TAs?\n",
+    "# Order your answer from lowest to highest\n",
+    "piazza_df[[\"role\", \"days_online\"]].groupby(\"role\").sum().sort_values(\"days_online\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How would we write this in SQL?\n",
+    "qry(\"\"\"\n",
+    "SELECT role, SUM(days_online) as AvgDaysOnline\n",
+    "FROM piazza\n",
+    "GROUP BY role\n",
+    "ORDER BY AvgDaysOnline\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Of those individuals who spend less than 100 days online,\n",
+    "# how does their average number of posts compare to those that\n",
+    "# spend 100 days or more online? Do your analysis by role as well.\n",
+    "\n",
+    "less_than_100 = piazza_df[piazza_df[\"days_online\"] < 100]\n",
+    "more_than_100 = piazza_df[piazza_df[\"days_online\"] >= 100]\n",
+    "\n",
+    "# In general, they post less...\n",
+    "print(more_than_100[\"posts\"].mean(), less_than_100[\"posts\"].mean())\n",
+    "print()\n",
+    "\n",
+    "# ... and this is also generally true.\n",
+    "print(more_than_100[[\"role\", \"posts\"]].groupby(\"role\").mean())\n",
+    "print(less_than_100[[\"role\", \"posts\"]].groupby(\"role\").mean())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How would we write this in SQL?\n",
+    "qry(\"\"\"\n",
+    "SELECT role, AVG(posts) as AvgPosts\n",
+    "FROM piazza\n",
+    "WHERE days_online < 100\n",
+    "GROUP BY role\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "qry(\"\"\"\n",
+    "SELECT role, AVG(posts) as AvgPosts\n",
+    "FROM piazza\n",
+    "WHERE days_online >= 100\n",
+    "GROUP BY role\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# What percentage of instructors, students, and TAs did not write a single answer,\n",
+    "# followup, or reply to a followup?\n",
+    "no_answers = piazza_df[(piazza_df[\"answers\"] == 0) & (piazza_df[\"followups\"] == 0) & (piazza_df[\"replies_to_followups\"] == 0)]\n",
+    "no_answers[\"role\"].value_counts() / piazza_df[\"role\"].value_counts() * 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How would we write this in SQL?\n",
+    "# The best we can write (without knowing subqueries) is how many!\n",
+    "qry(\"\"\"\n",
+    "SELECT role, COUNT(*)\n",
+    "FROM piazza\n",
+    "WHERE answers = 0 AND followups = 0 AND replies_to_followups = 0\n",
+    "GROUP BY role\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ... and then compare this with the total #!\n",
+    "qry(\"\"\"\n",
+    "SELECT role, COUNT(*)\n",
+    "FROM piazza\n",
+    "GROUP BY role\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conn.close()"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/f23/Cole_Lecture_Notes/37_AdvPandas/Lec37_AdvPandas_Template_Nelson.ipynb b/f23/Cole_Lecture_Notes/37_AdvPandas/Lec37_AdvPandas_Template_Nelson.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..ad7f23ea92e17af3b4bf742e9f6d51fc4c3ba036
--- /dev/null
+++ b/f23/Cole_Lecture_Notes/37_AdvPandas/Lec37_AdvPandas_Template_Nelson.ipynb
@@ -0,0 +1,587 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Advanced Pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "CeWtFirwteFY"
+   },
+   "outputs": [],
+   "source": [
+    "# known import statements\n",
+    "import pandas as pd\n",
+    "import sqlite3\n",
+    "import os\n",
+    "\n",
+    "# new import statement\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the Piazza data from 'piazza.db'\n",
+    "\n",
+    "db_name = \"piazza.db\"\n",
+    "assert os.path.exists(db_name)\n",
+    "conn = sqlite3.connect(db_name)\n",
+    "\n",
+    "def qry(sql):\n",
+    "    return pd.read_sql(sql, conn)\n",
+    "\n",
+    "df = qry(\"\"\"\n",
+    "    SELECT *\n",
+    "    FROM sqlite_master\n",
+    "    WHERE type='table'\n",
+    "\"\"\")\n",
+    "print(df.iloc[0]['sql'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "piazza_df = pd.read_sql(\"\"\"\n",
+    "    SELECT *\n",
+    "    FROM piazza\n",
+    "\"\"\", conn)\n",
+    "piazza_df.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 1: Set the student id column as the index\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2a: Which 10 students post the most?\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2b: Can you plot their number of posts as a bar graph? Be sure to label your axes!\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2c: How about with their name rather than their student id?\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# Warmup 3a: Which people had more than 10 answers? Include all roles.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 3b: Plot this as a bar graph.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 3c: Plot the contributions as a bar graph.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 3d: Can you get this same data using SQL?\n",
+    "qry(\"\"\"\n",
+    "\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 3e: What about their average # of days online as well?\n",
+    "qry(\"\"\"\n",
+    "\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 3f: Can we do that in Pandas as well?\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "yoLGptrqhbBo"
+   },
+   "source": [
+    "# Today's Learning Objectives: \n",
+    "\n",
+    "* Setting column as index for pandas `DataFrame`\n",
+    "* Identify, drop, or fill missing values (`np.NaN`) using Pandas `isna`, `dropna`, and `fillna`\n",
+    "* Applying transformations to `DataFrame`:\n",
+    "  * Use `apply` on pandas `Series` to apply a transformation function\n",
+    "  * Use `replace` to replace all target values in Pandas `Series` and `DataFrame` rows / columns\n",
+    "* Filter, aggregate, group, and summarize information in a `DataFrame` with `groupby`\n",
+    "* Convert .groupby examples to SQL\n",
+    "* Solving the same question using SQL and pandas `DataFrame` manipulations:\n",
+    "  * filtering, grouping, and aggregation / summarization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Sort by name... What do we notice?\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Not a Number\n",
+    "\n",
+    "- `np.NaN` is the floating point representation of Not a Number\n",
+    "- You do not need to know / learn the details about the `numpy` package \n",
+    "\n",
+    "### Replacing / modifying values within the `DataFrame`\n",
+    "\n",
+    "Syntax: `df.replace(<TARGET>, <REPLACE>)`\n",
+    "\n",
+    "Let's now replace the missing values (empty strings) with `np.NaN`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's replace these empty strings with a special value.\n",
+    "piazza_df = ???\n",
+    "piazza_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Sort by name again... What do we notice?\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Checking for missing values\n",
+    "\n",
+    "Syntax: `Series.isna()`\n",
+    "- Returns a boolean Series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run isna() on the name column\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How many people are missing a name?\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How many people are missing an email?\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How many people are missing both a name and email?\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How many people are missing either a name or email?\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# So... What do we do?\n",
+    "#  1. Drop those rows\n",
+    "#  2. Interpolate / Best Guess"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Option 1: Drop those rows.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Option 2a: Interpolate / Best Guess\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a function to take an email (e.g. \"calm_star@wisc.edu\")\n",
+    "# and return the name (e.g. \"calm star\")\n",
+    "def parse_name_from_email(email):\n",
+    "    if pd.isna(email):\n",
+    "        return np.nan\n",
+    "    else:\n",
+    "        pass # TODO Parse out the name!\n",
+    "\n",
+    "# Test your function!\n",
+    "parse_name_from_email(\"calm_star@wisc.edu\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Review: `Pandas.Series.apply(...)`\n",
+    "Syntax: `Series.apply(<FUNCTION OBJECT REFERENCE>)`\n",
+    "- applies input function to every element of the Series.\n",
+    "- Returns a new `Series`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now, apply that function to each value in email!\n",
+    "piazza_df[\"guessed_name\"] = ???\n",
+    "piazza_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a function to take a name (e.g. \"calm star\")\n",
+    "# and return the email (e.g. \"calm_star@wisc.edu\")\n",
+    "def parse_email_from_name(name):\n",
+    "    pass\n",
+    "\n",
+    "# Test your function!\n",
+    "parse_email_from_name(\"calm star\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now, apply that function to each value in name!\n",
+    "piazza_df[\"guessed_email\"] = ???\n",
+    "piazza_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### `Pandas.DataFrame.apply(...)`\n",
+    "Syntax: `DataFrame.apply(<FUNCTION OBJECT REFERENCE>, axis=1)`\n",
+    "- `axis=1` means apply to each row.\n",
+    "- returns a new `Series`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If the name has a value, use it, otherwise use our best guess!\n",
+    "piazza_df[\"name\"] = piazza_df.apply(lambda r : r[\"guessed_name\"] if pd.isna(r[\"name\"]) else r[\"name\"], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Same thing for email!\n",
+    "piazza_df[\"email\"] = piazza_df.apply(lambda r : r[\"guessed_email\"] if pd.isna(r[\"email\"]) else r[\"email\"], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Drop the guessing columns\n",
+    "piazza_df = piazza_df.drop(\"guessed_name\", axis=1)\n",
+    "piazza_df = piazza_df.drop(\"guessed_email\", axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How many rows are missing data now?\n",
+    "len(piazza_df.dropna())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Give a name of \"anonymous\" and email of \"anonymous@wisc.edu\"\n",
+    "# to anyone with left with missing data.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### `Pandas.DataFrame.groupby(...)`\n",
+    "\n",
+    "Syntax: `DataFrame.groupby(<COLUMN>)`\n",
+    "- Returns a `groupby` object\n",
+    "- Need to apply aggregation functions to use the return value of `groupby`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# What does this return?\n",
+    "piazza_df.groupby(\"role\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Try getting the \"mean\" of this groupby object.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How many answers does the average instructor, student, and TA give?\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How would we write this in SQL?\n",
+    "qry(\"\"\"\n",
+    "\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# What is the total number of days spent online for instructors, students, and TAs?\n",
+    "# Order your answer from lowest to highest\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How would we write this in SQL?\n",
+    "qry(\"\"\"\n",
+    "\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Of those individuals who spend less than 100 days online,\n",
+    "# how does their average number of posts compare to those that\n",
+    "# spend 100 days or more online? Do your analysis by role as well.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How would we write this in SQL?\n",
+    "qry(\"\"\"\n",
+    "\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# What percentage of instructors, students, and TAs did not write a single answer,\n",
+    "# followup, or reply to a followup?\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# How would we write this in SQL?\n",
+    "qry(\"\"\"\n",
+    "\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conn.close()"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/f23/Cole_Lecture_Notes/37_AdvPandas/piazza.db b/f23/Cole_Lecture_Notes/37_AdvPandas/piazza.db
new file mode 100644
index 0000000000000000000000000000000000000000..fd42fc45f64189a8a8f6b8013651198c97100fff
Binary files /dev/null and b/f23/Cole_Lecture_Notes/37_AdvPandas/piazza.db differ