cole lec38

96be45cd · Cole Nelson · df91f45c · 96be45cd · 96be45cd · 96be45cd
Commit 96be45cd authored 1 year ago by Cole Nelson
--- a/f23/Cole_Lecture_Notes/38_Plotting3/Lec38_Plotting3_Solution_Nelson.ipynb
+++ b/f23/Cole_Lecture_Notes/38_Plotting3/Lec38_Plotting3_Solution_Nelson.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 0\n",
+    "import sqlite3\n",
+    "import pandas as pd\n",
+    "from pandas import DataFrame, Series\n",
+    "import matplotlib\n",
+    "from matplotlib import pyplot as plt\n",
+    "matplotlib.rcParams[\"font.size\"] = 16"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 1: Write a function that converts any Fehrenheit temp to Celcius\n",
+    "# Note: The final exam will have a select amount of material from earlier in the course\n",
+    "# C = (5/9) * (f-32)\n",
+    "\n",
+    "def f_to_c(f):\n",
+    "    return (5/9) * (f-32)\n",
+    "\n",
+    "\n",
+    "# test it by making several calls\n",
+    "print(f_to_c(212))\n",
+    "print(f_to_c(32))\n",
+    "print(f_to_c(67))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2a: Save all the data from the \"piazza\" table to \"piazza_df\"\n",
+    "piazza_conn = sqlite3.connect(\"piazza.db\")\n",
+    "piazza_df = pd.read_sql(\"SELECT * FROM piazza\", piazza_conn)\n",
+    "piazza_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2b: Set the index of piazza_df to be student_id\n",
+    "piazza_df = piazza_df.set_index(\"student_id\")\n",
+    "piazza_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2c: Add a column \"total\" to \"piazza_df\". This should be the sum of \n",
+    "# the number of posts, answers, edits, followups, and replies_to_followups\n",
+    "piazza_df[\"total\"] = (piazza_df[\"posts\"] + piazza_df[\"answers\"] + piazza_df[\"edits\"] + piazza_df[\"followups\"] + piazza_df[\"replies_to_followups\"])\n",
+    "# piazza_df[\"total\"] = piazza_df.loc[:, \"posts\":\"replies_to_followups\"].sum(axis=1).sort_values() # advanced way!\n",
+    "piazza_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2d: Create a new dataframe \"contributors_df\" which contains those\n",
+    "#            that had more than 0 total contributions, and sort by this\n",
+    "#            value from highest to lowest. Break ties by name in alphabetical order.\n",
+    "contributors_df = piazza_df[piazza_df[\"total\"] > 0]\n",
+    "contributors_df = contributors_df.sort_values([\"total\", \"name\"], ascending=[False, True])\n",
+    "contributors_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2e: How would we have done this in sql?\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT *, posts + answers + edits + followups + replies_to_followups AS total\n",
+    "FROM piazza\n",
+    "WHERE total > 0\n",
+    "ORDER BY total DESC, name ASC\n",
+    "\"\"\", piazza_conn).set_index(\"student_id\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 3a: Of those that contributed, what was their average number of contributions?\n",
+    "#            Do your analysis by role (e.g. by ta, instructor, and student)\n",
+    "contributors_df.groupby(\"role\")[\"total\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 3b: How would we have done this in sql?\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT\n",
+    "    role,\n",
+    "    posts + answers + edits + followups + replies_to_followups AS total,\n",
+    "    AVG(posts + answers + edits + followups + replies_to_followups) as avg_total\n",
+    "FROM piazza\n",
+    "WHERE total > 0\n",
+    "GROUP BY role\n",
+    "\"\"\", piazza_conn).set_index(\"role\")[\"avg_total\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 4: What is the correlation between all of the columns?\n",
+    "contributors_df.corr()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 5: Close the connection.\n",
+    "piazza_conn.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Plotting Applications\n",
+    "\n",
+    "**Learning Objectives**\n",
+    "\n",
+    "- Make a line plot on a series or on a DataFrame\n",
+    "- Apply features of line plots and bar plots to visualize results of data investigations\n",
+    "- Clean Series data by dropping NaN values and by converting to int\n",
+    "- Make a stacked bar plot"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Line plots\n",
+    "- `SERIES.plot.line()`\n",
+    "- `DATAFRAME.plot.line()`    each column in the data frame becomes a line in the plot\n",
+    "\n",
+    "https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.line.html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# when you make a series from a list, the default indices 0, 1, 2, ...\n",
+    "s = Series([1758, 2002, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764]) # y values\n",
+    "s.plot.line()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# You can make a series from a list and add indices\n",
+    "s = Series([1758, 2002, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764], \\\n",
+    "           index=[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020])\n",
+    "s.plot.line()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We can save the AxesSubplot and \"beautify\" it like the other plots...\n",
+    "ax = s.plot.line()\n",
+    "ax.set_title(\"Number of Craft Breweries in the USA\")\n",
+    "ax.set_xlabel(\"Year\")\n",
+    "ax.set_ylabel(\"# Craft Breweries\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Be careful! If the indices are out of order you get a mess\n",
+    "# pandas plots each (index, value) in the order given\n",
+    "s = Series([1758, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764, 2002], \\\n",
+    "           index=[2010, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2011])\n",
+    "s.plot.line()\n",
+    "s"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# you can fix this by calling sort_index()\n",
+    "s.sort_index().plot.line()\n",
+    "s.sort_index()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Plotting lines from a DataFrame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This DataFrame is made using a dict of lists\n",
+    "# City of Madison normal high and low (degrees F) by month\n",
+    "temp_df = DataFrame( \n",
+    "    {\n",
+    "    \"high\": [26, 31, 43, 57, 68, 78, 82, 79, 72, 59, 44, 30],\n",
+    "    \"low\": [11, 15, 25, 36, 46, 56, 61, 59, 50, 39, 28, 16]     }\n",
+    ")\n",
+    "temp_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### A Line Plot made from a DataFrame automatically plots all columns\n",
+    "\n",
+    "The same is true for bar plots; we'll see this later."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# You can also add ticks and ticklabels to a line plot\n",
+    "\n",
+    "ax = temp_df.plot.line(figsize=(12, 4))\n",
+    "ax.set_title(\"Average Temperatures in Madison, WI\")\n",
+    "ax.set_xlabel(\"Month\")\n",
+    "ax.set_ylabel(\"Temp (Fahrenheit)\")\n",
+    "ax.set_xticks(range(12))   # makes a range from 0 to 11\n",
+    "ax.set_xticklabels([\"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\",\n",
+    "                   \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"])\n",
+    "\n",
+    "# This gets rid of the weird output\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ... Or explicitly pass the \"x\" and \"y\" parameters...\n",
+    "temp_df_with_month = DataFrame( \n",
+    "    {\n",
+    "    \"month\": [\"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\",\n",
+    "                   \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"],\n",
+    "    \"high\": [26, 31, 43, 57, 68, 78, 82, 79, 72, 59, 44, 30],\n",
+    "    \"low\": [11, 15, 25, 36, 46, 56, 61, 59, 50, 39, 28, 16]     }\n",
+    ")\n",
+    "\n",
+    "ax = temp_df_with_month.plot.line(x=\"month\", y=[\"high\", \"low\"], figsize=(12, 4))\n",
+    "ax.set_title(\"Average Temperatures in Madison, WI\")\n",
+    "ax.set_xlabel(\"Month\")\n",
+    "ax.set_ylabel(\"Temp (Fahrenheit)\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### We can perform a calculation on an entire DataFrame\n",
+    "Let's change the entire DataFrame to Celcius"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# call the function on the dataframe\n",
+    "celcius_df = f_to_c(temp_df)\n",
+    "celcius_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# here is one way to add a horizontal line to our line plots\n",
+    "celcius_df[\"freezing\"] = 0\n",
+    "celcius_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# this plots each column as lines\n",
+    "# with rotation for the tick labels\n",
+    "ax = celcius_df.plot.line(y=[\"high\", \"low\", \"freezing\"], figsize = (12,4))\n",
+    "ax.set_xlabel(\"Month\")\n",
+    "ax.set_ylabel(\"Temp (Celcius)\")\n",
+    "ax.set_xticks(range(12))\n",
+    "ax.set_xticklabels([\"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\",\n",
+    "                    \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"], rotation=45)\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Bar Plot Example w/ Fire Hydrants\n",
+    "\n",
+    "- General review of Pandas\n",
+    "- Some new Bar Plot options\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hdf = pd.read_csv(\"Fire_Hydrants.csv\")\n",
+    "hdf.tail()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# grab just the column names\n",
+    "hdf.columns"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Let's create a *bar plot* to visualize *colors* of fire hydrants."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make a series called counts_series which stores the value counts of the \"nozzle_color\"\n",
+    "color_counts = hdf[\"nozzle_color\"].value_counts()\n",
+    "color_counts # what type is this? "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO: Clean the data ......use str.upper()\n",
+    "\n",
+    "color_counts= hdf[\"nozzle_color\"].str.upper().value_counts()\n",
+    "color_counts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make a horizontal bar plot of counts of colors and have the colors match\n",
+    "# use color list: [\"b\", \"g\", \"darkorange\", \"r\", \"c\", \"0.5\"]\n",
+    "ax = color_counts.plot.barh(color=[\"b\", \"g\", \"darkorange\", \"r\", \"c\", \"0.5\"])\n",
+    "ax.set_ylabel(\"Fire hydrant count\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Let's create a *bar plot* to visualize *style* of fire hydrants."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Do the same thing as we did for the colors but this time for the \"Style\"\n",
+    "style_counts = hdf[\"Style\"].str.upper().value_counts()\n",
+    "style_counts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "style_counts.plot.bar()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Grab the top 12 \n",
+    "top12 = style_counts.iloc[:12]\n",
+    "\n",
+    "# and them add an index to our Series for the sum of all the \"other\" for \n",
+    "top12[\"other\"] = style_counts.iloc[12:].sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot the results\n",
+    "ax = top12.plot.bar(color=\"firebrick\")\n",
+    "ax.set_ylabel(\"Hydrant Count\")\n",
+    "ax.set_xlabel(\"Hydrant Type\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Plot the year manufactured for the Pacer Style as opposed to other styles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's get the year manufactured for all of the \"Pacer\" hydrants.\n",
+    "\n",
+    "pacer_years = hdf[hdf[\"Style\"] == \"Pacer\"][\"year_manufactured\"]\n",
+    "\n",
+    "# Note: We can do this either way\n",
+    "# pacer_years = hdf[\"year_manufactured\"][hdf[\"Style\"] == \"Pacer\"]\n",
+    "\n",
+    "pacer_years\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# then do the same for all the other data\n",
+    "other_years = hdf[\"year_manufactured\"][hdf[\"Style\"] != \"Pacer\"]\n",
+    "other_years"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Round each year down to the start of the decade.\n",
+    "# e.g. 1987 --> 1980,   2003 --> 2000\n",
+    "pacer_decades = (pacer_years // 10 * 10)\n",
+    "pacer_decades"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Drop the NaN values, convert to int, and do value counts\n",
+    "pacer_decades = pacer_decades.dropna()\n",
+    "pacer_decades = pacer_decades.astype(int).value_counts()\n",
+    "pacer_decades"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Do the same thing for other_years. Save to a variable called \"other_decades\"\n",
+    "other_decades = (other_years // 10 * 10).dropna()\n",
+    "other_decades = other_decades.astype(int).value_counts()\n",
+    "other_decades"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Build a DataFrame from a dictionary of key, Series\n",
+    "plot_df = DataFrame({\n",
+    "    \"pacer\": pacer_decades,\n",
+    "    \"other\": other_decades,\n",
+    "})\n",
+    "plot_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make a bar plot\n",
+    "\n",
+    "ax = plot_df.plot.bar()\n",
+    "ax.set_xlabel(\"Decade\")\n",
+    "ax.set_ylabel(\"Hydrant Count\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Ignore data from before 1950 using boolean indexing.\n",
+    "ax = plot_df[plot_df.index >= 1950].plot.bar()\n",
+    "ax.set_xlabel(\"Decade\")\n",
+    "ax.set_ylabel(\"Hydrant Count\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Make a Stacked Bar Chart!\n",
+    "ax = plot_df[plot_df.index >= 1950].plot.bar(stacked=True)\n",
+    "ax.set_xlabel(\"Decade\")\n",
+    "ax.set_ylabel(\"Hydrant Count\")\n",
+    "None"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 0
+import sqlite3
+import pandas as pd
+from pandas import DataFrame, Series
+import matplotlib
+from matplotlib import pyplot as plt
+matplotlib.rcParams["font.size"] = 16
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 1: Write a function that converts any Fehrenheit temp to Celcius
+# Note: The final exam will have a select amount of material from earlier in the course
+# C = (5/9) * (f-32)
+
+def f_to_c(f):
+    return (5/9) * (f-32)
+
+
+# test it by making several calls
+print(f_to_c(212))
+print(f_to_c(32))
+print(f_to_c(67))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 2a: Save all the data from the "piazza" table to "piazza_df"
+piazza_conn = sqlite3.connect("piazza.db")
+piazza_df = pd.read_sql("SELECT * FROM piazza", piazza_conn)
+piazza_df
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 2b: Set the index of piazza_df to be student_id
+piazza_df = piazza_df.set_index("student_id")
+piazza_df
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 2c: Add a column "total" to "piazza_df". This should be the sum of
+# the number of posts, answers, edits, followups, and replies_to_followups
+piazza_df["total"] = (piazza_df["posts"] + piazza_df["answers"] + piazza_df["edits"] + piazza_df["followups"] + piazza_df["replies_to_followups"])
+# piazza_df["total"] = piazza_df.loc[:, "posts":"replies_to_followups"].sum(axis=1).sort_values() # advanced way!
+piazza_df
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 2d: Create a new dataframe "contributors_df" which contains those
+#            that had more than 0 total contributions, and sort by this
+#            value from highest to lowest. Break ties by name in alphabetical order.
+contributors_df = piazza_df[piazza_df["total"] > 0]
+contributors_df = contributors_df.sort_values(["total", "name"], ascending=[False, True])
+contributors_df
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 2e: How would we have done this in sql?
+pd.read_sql("""
+SELECT *, posts + answers + edits + followups + replies_to_followups AS total
+FROM piazza
+WHERE total > 0
+ORDER BY total DESC, name ASC
+""", piazza_conn).set_index("student_id")
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 3a: Of those that contributed, what was their average number of contributions?
+#            Do your analysis by role (e.g. by ta, instructor, and student)
+contributors_df.groupby("role")["total"].mean()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 3b: How would we have done this in sql?
+pd.read_sql("""
+SELECT
+    role,
+    posts + answers + edits + followups + replies_to_followups AS total,
+    AVG(posts + answers + edits + followups + replies_to_followups) as avg_total
+FROM piazza
+WHERE total > 0
+GROUP BY role
+""", piazza_conn).set_index("role")["avg_total"]
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 4: What is the correlation between all of the columns?
+contributors_df.corr()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 5: Close the connection.
+piazza_conn.close()
+```
+
+%% Cell type:markdown id: tags:
+
+# Plotting Applications
+
+**Learning Objectives**
+
+- Make a line plot on a series or on a DataFrame
+- Apply features of line plots and bar plots to visualize results of data investigations
+- Clean Series data by dropping NaN values and by converting to int
+- Make a stacked bar plot
+
+%% Cell type:markdown id: tags:
+
+## Line plots
+- `SERIES.plot.line()`
+- `DATAFRAME.plot.line()`    each column in the data frame becomes a line in the plot
+
+https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.line.html
+
+%% Cell type:code id: tags:
+
+``` python
+# when you make a series from a list, the default indices 0, 1, 2, ...
+s = Series([1758, 2002, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764]) # y values
+s.plot.line()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# You can make a series from a list and add indices
+s = Series([1758, 2002, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764], \
+           index=[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020])
+s.plot.line()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# We can save the AxesSubplot and "beautify" it like the other plots...
+ax = s.plot.line()
+ax.set_title("Number of Craft Breweries in the USA")
+ax.set_xlabel("Year")
+ax.set_ylabel("# Craft Breweries")
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Be careful! If the indices are out of order you get a mess
+# pandas plots each (index, value) in the order given
+s = Series([1758, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764, 2002], \
+           index=[2010, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2011])
+s.plot.line()
+s
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# you can fix this by calling sort_index()
+s.sort_index().plot.line()
+s.sort_index()
+```
+
+%% Cell type:markdown id: tags:
+
+### Plotting lines from a DataFrame
+
+%% Cell type:code id: tags:
+
+``` python
+# This DataFrame is made using a dict of lists
+# City of Madison normal high and low (degrees F) by month
+temp_df = DataFrame(
+    {
+    "high": [26, 31, 43, 57, 68, 78, 82, 79, 72, 59, 44, 30],
+    "low": [11, 15, 25, 36, 46, 56, 61, 59, 50, 39, 28, 16]     }
+)
+temp_df
+```
+
+%% Cell type:markdown id: tags:
+
+### A Line Plot made from a DataFrame automatically plots all columns
+
+The same is true for bar plots; we'll see this later.
+
+%% Cell type:code id: tags:
+
+``` python
+# You can also add ticks and ticklabels to a line plot
+
+ax = temp_df.plot.line(figsize=(12, 4))
+ax.set_title("Average Temperatures in Madison, WI")
+ax.set_xlabel("Month")
+ax.set_ylabel("Temp (Fahrenheit)")
+ax.set_xticks(range(12))   # makes a range from 0 to 11
+ax.set_xticklabels(["Jan", "Feb", "Mar", "Apr", "May", "Jun",
+                   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"])
+
+# This gets rid of the weird output
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# ... Or explicitly pass the "x" and "y" parameters...
+temp_df_with_month = DataFrame(
+    {
+    "month": ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
+                   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
+    "high": [26, 31, 43, 57, 68, 78, 82, 79, 72, 59, 44, 30],
+    "low": [11, 15, 25, 36, 46, 56, 61, 59, 50, 39, 28, 16]     }
+)
+
+ax = temp_df_with_month.plot.line(x="month", y=["high", "low"], figsize=(12, 4))
+ax.set_title("Average Temperatures in Madison, WI")
+ax.set_xlabel("Month")
+ax.set_ylabel("Temp (Fahrenheit)")
+```
+
+%% Cell type:markdown id: tags:
+
+### We can perform a calculation on an entire DataFrame
+Let's change the entire DataFrame to Celcius
+
+%% Cell type:code id: tags:
+
+``` python
+# call the function on the dataframe
+celcius_df = f_to_c(temp_df)
+celcius_df
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# here is one way to add a horizontal line to our line plots
+celcius_df["freezing"] = 0
+celcius_df
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# this plots each column as lines
+# with rotation for the tick labels
+ax = celcius_df.plot.line(y=["high", "low", "freezing"], figsize = (12,4))
+ax.set_xlabel("Month")
+ax.set_ylabel("Temp (Celcius)")
+ax.set_xticks(range(12))
+ax.set_xticklabels(["Jan", "Feb", "Mar", "Apr", "May", "Jun",
+                    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"], rotation=45)
+None
+```
+
+%% Cell type:markdown id: tags:
+
+### Bar Plot Example w/ Fire Hydrants
+
+- General review of Pandas
+- Some new Bar Plot options
+
+%% Cell type:code id: tags:
+
+``` python
+hdf = pd.read_csv("Fire_Hydrants.csv")
+hdf.tail()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# grab just the column names
+hdf.columns
+```
+
+%% Cell type:markdown id: tags:
+
+### Let's create a *bar plot* to visualize *colors* of fire hydrants.
+
+%% Cell type:code id: tags:
+
+``` python
+# make a series called counts_series which stores the value counts of the "nozzle_color"
+color_counts = hdf["nozzle_color"].value_counts()
+color_counts # what type is this?
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# TODO: Clean the data ......use str.upper()
+
+color_counts= hdf["nozzle_color"].str.upper().value_counts()
+color_counts
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# make a horizontal bar plot of counts of colors and have the colors match
+# use color list: ["b", "g", "darkorange", "r", "c", "0.5"]
+ax = color_counts.plot.barh(color=["b", "g", "darkorange", "r", "c", "0.5"])
+ax.set_ylabel("Fire hydrant count")
+```
+
+%% Cell type:markdown id: tags:
+
+### Let's create a *bar plot* to visualize *style* of fire hydrants.
+
+%% Cell type:code id: tags:
+
+``` python
+# Do the same thing as we did for the colors but this time for the "Style"
+style_counts = hdf["Style"].str.upper().value_counts()
+style_counts
+```
+
+%% Cell type:code id: tags:
+
+``` python
+style_counts.plot.bar()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Grab the top 12
+top12 = style_counts.iloc[:12]
+
+# and them add an index to our Series for the sum of all the "other" for
+top12["other"] = style_counts.iloc[12:].sum()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Plot the results
+ax = top12.plot.bar(color="firebrick")
+ax.set_ylabel("Hydrant Count")
+ax.set_xlabel("Hydrant Type")
+```
+
+%% Cell type:markdown id: tags:
+
+### Plot the year manufactured for the Pacer Style as opposed to other styles
+
+%% Cell type:code id: tags:
+
+``` python
+# Let's get the year manufactured for all of the "Pacer" hydrants.
+
+pacer_years = hdf[hdf["Style"] == "Pacer"]["year_manufactured"]
+
+# Note: We can do this either way
+# pacer_years = hdf["year_manufactured"][hdf["Style"] == "Pacer"]
+
+pacer_years
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# then do the same for all the other data
+other_years = hdf["year_manufactured"][hdf["Style"] != "Pacer"]
+other_years
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Round each year down to the start of the decade.
+# e.g. 1987 --> 1980,   2003 --> 2000
+pacer_decades = (pacer_years // 10 * 10)
+pacer_decades
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Drop the NaN values, convert to int, and do value counts
+pacer_decades = pacer_decades.dropna()
+pacer_decades = pacer_decades.astype(int).value_counts()
+pacer_decades
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Do the same thing for other_years. Save to a variable called "other_decades"
+other_decades = (other_years // 10 * 10).dropna()
+other_decades = other_decades.astype(int).value_counts()
+other_decades
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Build a DataFrame from a dictionary of key, Series
+plot_df = DataFrame({
+    "pacer": pacer_decades,
+    "other": other_decades,
+})
+plot_df
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# make a bar plot
+
+ax = plot_df.plot.bar()
+ax.set_xlabel("Decade")
+ax.set_ylabel("Hydrant Count")
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Ignore data from before 1950 using boolean indexing.
+ax = plot_df[plot_df.index >= 1950].plot.bar()
+ax.set_xlabel("Decade")
+ax.set_ylabel("Hydrant Count")
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Make a Stacked Bar Chart!
+ax = plot_df[plot_df.index >= 1950].plot.bar(stacked=True)
+ax.set_xlabel("Decade")
+ax.set_ylabel("Hydrant Count")
+None
+```
--- a/f23/Cole_Lecture_Notes/38_Plotting3/Lec38_Plotting3_Template_Nelson.ipynb
+++ b/f23/Cole_Lecture_Notes/38_Plotting3/Lec38_Plotting3_Template_Nelson.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 0\n",
+    "import sqlite3\n",
+    "import pandas as pd\n",
+    "from pandas import DataFrame, Series\n",
+    "import matplotlib\n",
+    "from matplotlib import pyplot as plt\n",
+    "matplotlib.rcParams[\"font.size\"] = 15"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 1: Write a function that converts any Fehrenheit temp to Celcius\n",
+    "# Note: The final exam will have a select amount of material from earlier in the course\n",
+    "# C = (5/9) * (f-32)\n",
+    "\n",
+    "def f_to_c(f):\n",
+    "    pass\n",
+    "\n",
+    "\n",
+    "# test it by making several calls\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2a: Save all the data from the \"piazza\" table to \"piazza_df\"\n",
+    "piazza_conn = sqlite3.connect(\"piazza.db\")\n",
+    "piazza_df = ???\n",
+    "piazza_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2b: Set the index of piazza_df to be student_id\n",
+    "piazza_df = ???\n",
+    "piazza_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2c: Add a column \"total\" to \"piazza_df\". This should be the sum of \n",
+    "# the number of posts, answers, edits, followups, and replies_to_followups\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2d: Create a new dataframe \"contributors_df\" which contains those\n",
+    "#            that had more than 0 total contributions, and sort by this\n",
+    "#            value from highest to lowest. Break ties by name in alphabetical order.\n",
+    "contributors_df = ???"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 2e: How would we have done this in sql?\n",
+    "pd.read_sql(\"\"\"\n",
+    "\n",
+    "\"\"\", piazza_conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 3a: Of those that contributed, what was their average number of contributions?\n",
+    "#            Do your analysis by role (e.g. by ta, instructor, and student)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 3b: How would we have done this in sql?\n",
+    "pd.read_sql(\"\"\"\n",
+    "\n",
+    "\"\"\", piazza_conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 4: What is the correlation between all of the columns?\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warmup 5: Close the connection.\n",
+    "piazza_conn.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Plotting Applications\n",
+    "\n",
+    "**Learning Objectives**\n",
+    "\n",
+    "- Make a line plot on a series or on a DataFrame\n",
+    "- Apply features of line plots and bar plots to visualize results of data investigations\n",
+    "- Clean Series data by dropping NaN values and by converting to int\n",
+    "- Make a stacked bar plot"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Line plots\n",
+    "- `SERIES.plot.line()`\n",
+    "- `DATAFRAME.plot.line()`    each column in the data frame becomes a line in the plot\n",
+    "\n",
+    "https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.line.html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# when you make a series from a list, the default indices 0, 1, 2, ...\n",
+    "s = Series([1758, 2002, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764]) # y values\n",
+    "s.plot.line()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# You can make a series from a list and add indices\n",
+    "s = Series([1758, 2002, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764], \\\n",
+    "           index=[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020])\n",
+    "s.plot.line()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We can save the AxesSubplot and \"beautify\" it like the other plots...\n",
+    "s.plot.line()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Be careful! If the indices are out of order you get a mess\n",
+    "# pandas plots each (index, value) in the order given\n",
+    "s = Series([1758, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764, 2002], \\\n",
+    "           index=[2010, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2011])\n",
+    "s.plot.line()\n",
+    "s"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# you can fix this by calling sort_index()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Plotting lines from a DataFrame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This DataFrame is made using a dict of lists\n",
+    "# City of Madison normal high and low (degrees F) by month\n",
+    "temp_df = DataFrame( \n",
+    "    {\n",
+    "    \"high\": [26, 31, 43, 57, 68, 78, 82, 79, 72, 59, 44, 30],\n",
+    "    \"low\": [11, 15, 25, 36, 46, 56, 61, 59, 50, 39, 28, 16]     }\n",
+    ")\n",
+    "temp_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### A Line Plot made from a DataFrame automatically plots all columns\n",
+    "\n",
+    "The same is true for bar plots; we'll see this later."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# You can also add ticks and ticklabels to a line plot\n",
+    "\n",
+    "ax = temp_df.plot.line(figsize=(12, 4))\n",
+    "ax.set_title(\"Average Temperatures in Madison, WI\")\n",
+    "ax.set_xlabel(\"Month\")\n",
+    "ax.set_ylabel(\"Temp (Fahrenheit)\")\n",
+    "ax.set_xticks(range(12))   # makes a range from 0 to 11\n",
+    "ax.set_xticklabels([\"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\",\n",
+    "                   \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"])\n",
+    "\n",
+    "# This gets rid of the weird output\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ... Or explicitly pass the \"x\" and \"y\" parameters...\n",
+    "temp_df_with_month = DataFrame( \n",
+    "    {\n",
+    "    \"month\": [\"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\",\n",
+    "                   \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"],\n",
+    "    \"high\": [26, 31, 43, 57, 68, 78, 82, 79, 72, 59, 44, 30],\n",
+    "    \"low\": [11, 15, 25, 36, 46, 56, 61, 59, 50, 39, 28, 16]     }\n",
+    ")\n",
+    "\n",
+    "ax = temp_df_with_month.plot.line(figsize=(12, 4))\n",
+    "ax.set_title(\"Average Temperatures in Madison, WI\")\n",
+    "ax.set_xlabel(\"Month\")\n",
+    "ax.set_ylabel(\"Temp (Fahrenheit)\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### We can perform a calculation on an entire DataFrame\n",
+    "Let's change the entire DataFrame to Celcius"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# call the function on the dataframe\n",
+    "celcius_df = ???\n",
+    "celcius_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# here is one way to add a horizontal line to our line plots\n",
+    "celcius_df[\"freezing\"] = 0\n",
+    "celcius_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# this plots each column as lines\n",
+    "# with rotation for the tick labels\n",
+    "ax = celcius_df.plot.line(y=[\"high\", \"low\", \"freezing\"], figsize = (12,4))\n",
+    "ax.set_xlabel(\"Month\")\n",
+    "ax.set_ylabel(\"Temp (Celcius)\")\n",
+    "ax.set_xticks(range(12))\n",
+    "ax.set_xticklabels([\"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\",\n",
+    "                    \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"], rotation=45)\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Bar Plot Example w/ Fire Hydrants\n",
+    "\n",
+    "- General review of Pandas\n",
+    "- Some new Bar Plot options\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hdf = ???\n",
+    "hdf.tail()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# grab just the column names\n",
+    "???"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Let's create a *bar plot* to visualize *colors* of fire hydrants."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make a series called counts_series which stores the value counts of the \"nozzle_color\"\n",
+    "color_counts = ???\n",
+    "color_counts # what type is this? "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO: Clean the data ......use str.upper()\n",
+    "\n",
+    "color_counts= ???\n",
+    "color_counts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make a horizontal bar plot of counts of colors and have the colors match\n",
+    "# use color list: [\"b\", \"g\", \"darkorange\", \"r\", \"c\", \"0.5\"]\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Let's create a *bar plot* to visualize *style* of fire hydrants."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Do the same thing as we did for the colors but this time for the \"Style\"\n",
+    "style_counts = ???\n",
+    "style_counts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "style_counts.plot.bar()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Grab the top 12 \n",
+    "\n",
+    "\n",
+    "# and them add an index to our Series for the sum of all the \"other\" for \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot the results\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Plot the year manufactured for the Pacer Style as opposed to other styles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's get the year manufactured for all of the \"Pacer\" hydrants.\n",
+    "pacer_years = ???\n",
+    "pacer_years"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# then do the same for all the other data\n",
+    "other_years = ???\n",
+    "other_years"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Round each year down to the start of the decade.\n",
+    "# e.g. 1987 --> 1980,   2003 --> 2000\n",
+    "pacer_decades = ???\n",
+    "pacer_decades"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Drop the NaN values, convert to int, and do value counts\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Do the same thing for other_years. Save to a variable called \"other_decades\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Build a DataFrame from a dictionary of key, Series\n",
+    "plot_df = DataFrame({\n",
+    "    \"pacer\": pacer_decades,\n",
+    "    \"other\": other_decades,\n",
+    "})\n",
+    "plot_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Make a bar plot\n",
+    "ax = plot_df.plot.bar()\n",
+    "ax.set_xlabel(\"Decade\")\n",
+    "ax.set_ylabel(\"Hydrant Count\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Ignore data from before 1950 using boolean indexing.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Make a Stacked Bar Chart!\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 0
+import sqlite3
+import pandas as pd
+from pandas import DataFrame, Series
+import matplotlib
+from matplotlib import pyplot as plt
+matplotlib.rcParams["font.size"] = 15
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 1: Write a function that converts any Fehrenheit temp to Celcius
+# Note: The final exam will have a select amount of material from earlier in the course
+# C = (5/9) * (f-32)
+
+def f_to_c(f):
+    pass
+
+
+# test it by making several calls
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 2a: Save all the data from the "piazza" table to "piazza_df"
+piazza_conn = sqlite3.connect("piazza.db")
+piazza_df = ???
+piazza_df
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 2b: Set the index of piazza_df to be student_id
+piazza_df = ???
+piazza_df
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 2c: Add a column "total" to "piazza_df". This should be the sum of
+# the number of posts, answers, edits, followups, and replies_to_followups
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 2d: Create a new dataframe "contributors_df" which contains those
+#            that had more than 0 total contributions, and sort by this
+#            value from highest to lowest. Break ties by name in alphabetical order.
+contributors_df = ???
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 2e: How would we have done this in sql?
+pd.read_sql("""
+
+""", piazza_conn)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 3a: Of those that contributed, what was their average number of contributions?
+#            Do your analysis by role (e.g. by ta, instructor, and student)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 3b: How would we have done this in sql?
+pd.read_sql("""
+
+""", piazza_conn)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 4: What is the correlation between all of the columns?
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Warmup 5: Close the connection.
+piazza_conn.close()
+```
+
+%% Cell type:markdown id: tags:
+
+# Plotting Applications
+
+**Learning Objectives**
+
+- Make a line plot on a series or on a DataFrame
+- Apply features of line plots and bar plots to visualize results of data investigations
+- Clean Series data by dropping NaN values and by converting to int
+- Make a stacked bar plot
+
+%% Cell type:markdown id: tags:
+
+## Line plots
+- `SERIES.plot.line()`
+- `DATAFRAME.plot.line()`    each column in the data frame becomes a line in the plot
+
+https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.line.html
+
+%% Cell type:code id: tags:
+
+``` python
+# when you make a series from a list, the default indices 0, 1, 2, ...
+s = Series([1758, 2002, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764]) # y values
+s.plot.line()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# You can make a series from a list and add indices
+s = Series([1758, 2002, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764], \
+           index=[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020])
+s.plot.line()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# We can save the AxesSubplot and "beautify" it like the other plots...
+s.plot.line()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Be careful! If the indices are out of order you get a mess
+# pandas plots each (index, value) in the order given
+s = Series([1758, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764, 2002], \
+           index=[2010, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2011])
+s.plot.line()
+s
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# you can fix this by calling sort_index()
+```
+
+%% Cell type:markdown id: tags:
+
+### Plotting lines from a DataFrame
+
+%% Cell type:code id: tags:
+
+``` python
+# This DataFrame is made using a dict of lists
+# City of Madison normal high and low (degrees F) by month
+temp_df = DataFrame(
+    {
+    "high": [26, 31, 43, 57, 68, 78, 82, 79, 72, 59, 44, 30],
+    "low": [11, 15, 25, 36, 46, 56, 61, 59, 50, 39, 28, 16]     }
+)
+temp_df
+```
+
+%% Cell type:markdown id: tags:
+
+### A Line Plot made from a DataFrame automatically plots all columns
+
+The same is true for bar plots; we'll see this later.
+
+%% Cell type:code id: tags:
+
+``` python
+# You can also add ticks and ticklabels to a line plot
+
+ax = temp_df.plot.line(figsize=(12, 4))
+ax.set_title("Average Temperatures in Madison, WI")
+ax.set_xlabel("Month")
+ax.set_ylabel("Temp (Fahrenheit)")
+ax.set_xticks(range(12))   # makes a range from 0 to 11
+ax.set_xticklabels(["Jan", "Feb", "Mar", "Apr", "May", "Jun",
+                   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"])
+
+# This gets rid of the weird output
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# ... Or explicitly pass the "x" and "y" parameters...
+temp_df_with_month = DataFrame(
+    {
+    "month": ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
+                   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
+    "high": [26, 31, 43, 57, 68, 78, 82, 79, 72, 59, 44, 30],
+    "low": [11, 15, 25, 36, 46, 56, 61, 59, 50, 39, 28, 16]     }
+)
+
+ax = temp_df_with_month.plot.line(figsize=(12, 4))
+ax.set_title("Average Temperatures in Madison, WI")
+ax.set_xlabel("Month")
+ax.set_ylabel("Temp (Fahrenheit)")
+```
+
+%% Cell type:markdown id: tags:
+
+### We can perform a calculation on an entire DataFrame
+Let's change the entire DataFrame to Celcius
+
+%% Cell type:code id: tags:
+
+``` python
+# call the function on the dataframe
+celcius_df = ???
+celcius_df
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# here is one way to add a horizontal line to our line plots
+celcius_df["freezing"] = 0
+celcius_df
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# this plots each column as lines
+# with rotation for the tick labels
+ax = celcius_df.plot.line(y=["high", "low", "freezing"], figsize = (12,4))
+ax.set_xlabel("Month")
+ax.set_ylabel("Temp (Celcius)")
+ax.set_xticks(range(12))
+ax.set_xticklabels(["Jan", "Feb", "Mar", "Apr", "May", "Jun",
+                    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"], rotation=45)
+None
+```
+
+%% Cell type:markdown id: tags:
+
+### Bar Plot Example w/ Fire Hydrants
+
+- General review of Pandas
+- Some new Bar Plot options
+
+%% Cell type:code id: tags:
+
+``` python
+hdf = ???
+hdf.tail()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# grab just the column names
+???
+```
+
+%% Cell type:markdown id: tags:
+
+### Let's create a *bar plot* to visualize *colors* of fire hydrants.
+
+%% Cell type:code id: tags:
+
+``` python
+# make a series called counts_series which stores the value counts of the "nozzle_color"
+color_counts = ???
+color_counts # what type is this?
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# TODO: Clean the data ......use str.upper()
+
+color_counts= ???
+color_counts
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# make a horizontal bar plot of counts of colors and have the colors match
+# use color list: ["b", "g", "darkorange", "r", "c", "0.5"]
+```
+
+%% Cell type:markdown id: tags:
+
+### Let's create a *bar plot* to visualize *style* of fire hydrants.
+
+%% Cell type:code id: tags:
+
+``` python
+# Do the same thing as we did for the colors but this time for the "Style"
+style_counts = ???
+style_counts
+```
+
+%% Cell type:code id: tags:
+
+``` python
+style_counts.plot.bar()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Grab the top 12
+
+
+# and them add an index to our Series for the sum of all the "other" for
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Plot the results
+```
+
+%% Cell type:markdown id: tags:
+
+### Plot the year manufactured for the Pacer Style as opposed to other styles
+
+%% Cell type:code id: tags:
+
+``` python
+# Let's get the year manufactured for all of the "Pacer" hydrants.
+pacer_years = ???
+pacer_years
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# then do the same for all the other data
+other_years = ???
+other_years
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Round each year down to the start of the decade.
+# e.g. 1987 --> 1980,   2003 --> 2000
+pacer_decades = ???
+pacer_decades
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Drop the NaN values, convert to int, and do value counts
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Do the same thing for other_years. Save to a variable called "other_decades"
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Build a DataFrame from a dictionary of key, Series
+plot_df = DataFrame({
+    "pacer": pacer_decades,
+    "other": other_decades,
+})
+plot_df
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Make a bar plot
+ax = plot_df.plot.bar()
+ax.set_xlabel("Decade")
+ax.set_ylabel("Hydrant Count")
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Ignore data from before 1950 using boolean indexing.
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Make a Stacked Bar Chart!
+```
--- a/f23/Cole_Lecture_Notes/38_Plotting3/fire_hydrants.csv
+++ b/f23/Cole_Lecture_Notes/38_Plotting3/fire_hydrants.csv
--- a/f23/Cole_Lecture_Notes/38_Plotting3/piazza.db
+++ b/f23/Cole_Lecture_Notes/38_Plotting3/piazza.db