From d3aca98c8d67620169dc94bfc388e6ec6120979d Mon Sep 17 00:00:00 2001
From: gsingh58 <gurmail-singh@wisc.edu>
Date: Fri, 8 Dec 2023 02:08:40 -0600
Subject: [PATCH] Lec38 updated

---
 .../lec_38_plotting3_line_plots.ipynb         |  25 +-
 ...g3_line_plots_template_Gurmail_lec1.ipynb} |  25 +-
 ...ng3_line_plots_template_Gurmail_lec2.ipynb | 938 ++++++++++++++++++
 3 files changed, 986 insertions(+), 2 deletions(-)
 rename f23/Gurmail_Lecture_Notes/38_Plotting-3/{lec_38_plotting3_line_plots_template.ipynb => lec_38_plotting3_line_plots_template_Gurmail_lec1.ipynb} (96%)
 create mode 100644 f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template_Gurmail_lec2.ipynb

diff --git a/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots.ipynb b/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots.ipynb
index 12857f9..287363b 100644
--- a/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots.ipynb
+++ b/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots.ipynb
@@ -1,5 +1,28 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Announcements - Friday, December 8\n",
+    "* Download ALL files for today's lecture\n",
+    "* Q10 is due tonight at 11:59 pm\n",
+    "* <b>If you have any problem with P8-P11 grades, please send me (Gurmail.Singh@wisc.edu) an email by December 11.</b>\n",
+    "* Late days may not be used on P13\n",
+    "* If you have questions, it is almost always faster to \n",
+    "  * Post on Piazza\n",
+    "  * Go to [office hours](https://sites.google.com/wisc.edu/cs220-oh-f23/home?pli=1) \n",
+    "### Conflict Form\n",
+    "  * [Final - December 19, 7:45 am](https://cs220.cs.wisc.edu/f23/surveys.html)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Plotting 3"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -2633,7 +2656,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
+   "version": "3.11.4"
   }
  },
  "nbformat": 4,
diff --git a/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template.ipynb b/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template_Gurmail_lec1.ipynb
similarity index 96%
rename from f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template.ipynb
rename to f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template_Gurmail_lec1.ipynb
index 80f463f..70a1b8b 100644
--- a/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template.ipynb
+++ b/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template_Gurmail_lec1.ipynb
@@ -1,5 +1,28 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Announcements - Friday, December 8\n",
+    "* Download ALL files for today's lecture\n",
+    "* Q10 is due tonight at 11:59 pm\n",
+    "* <b>If you have any problem with P8-P11 grades, please send me (Gurmail.Singh@wisc.edu) an email by December 11.</b>\n",
+    "* Late days may not be used on P13\n",
+    "* If you have questions, it is almost always faster to \n",
+    "  * Post on Piazza\n",
+    "  * Go to [office hours](https://sites.google.com/wisc.edu/cs220-oh-f23/home?pli=1) \n",
+    "### Conflict Form\n",
+    "  * [Final - December 19, 7:45 am](https://cs220.cs.wisc.edu/f23/surveys.html)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Plotting 3"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -907,7 +930,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
+   "version": "3.11.4"
   }
  },
  "nbformat": 4,
diff --git a/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template_Gurmail_lec2.ipynb b/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template_Gurmail_lec2.ipynb
new file mode 100644
index 0000000..70a1b8b
--- /dev/null
+++ b/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template_Gurmail_lec2.ipynb
@@ -0,0 +1,938 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Announcements - Friday, December 8\n",
+    "* Download ALL files for today's lecture\n",
+    "* Q10 is due tonight at 11:59 pm\n",
+    "* <b>If you have any problem with P8-P11 grades, please send me (Gurmail.Singh@wisc.edu) an email by December 11.</b>\n",
+    "* Late days may not be used on P13\n",
+    "* If you have questions, it is almost always faster to \n",
+    "  * Post on Piazza\n",
+    "  * Go to [office hours](https://sites.google.com/wisc.edu/cs220-oh-f23/home?pli=1) \n",
+    "### Conflict Form\n",
+    "  * [Final - December 19, 7:45 am](https://cs220.cs.wisc.edu/f23/surveys.html)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Plotting 3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# import statements\n",
+    "import sqlite3\n",
+    "import pandas as pd\n",
+    "from pandas import DataFrame, Series\n",
+    "import matplotlib\n",
+    "from matplotlib import pyplot as plt\n",
+    "matplotlib.rcParams[\"font.size\"] = 16"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Warmup 1: Write a function that converts any Fehrenheit temp to Celcius\n",
+    "C = (5/9) * (f-32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def f_to_c(f):\n",
+    "    return (5/9) * (f-32)\n",
+    "\n",
+    "# test it by making several calls\n",
+    "print(f_to_c(212))\n",
+    "print(f_to_c(32))\n",
+    "print(f_to_c(67))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Warmup 2a: What is the name of the only table inside of iris-flowers.db?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Establish a connection to \"iris-flowers.db\" database\n",
+    "iris_conn = sqlite3.connect(\"iris-flowers.db\")\n",
+    "pd.read_sql(\"SELECT * FROM sqlite_master WHERE type='table'\", iris_conn)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Warmup 2b: Save & display all the data from this table to a variable called \"iris_df\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "iris_df = pd.read_sql(\"SELECT * FROM iris\", iris_conn)\n",
+    "iris_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Warmup 3a: What are all the class types?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# v1: pandas\n",
+    "varietes = iris_df[\"class\"]\n",
+    "varietes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# v2: SQL\n",
+    "varietes = list(pd.read_sql(\"\"\"\n",
+    "    SELECT DISTINCT class\n",
+    "    FROM iris\n",
+    "\"\"\", iris_conn)[\"class\"])\n",
+    "varietes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Warmup 3b: Scatter plot to visualize relationship between `pet-width` and `pet-length`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# complete this code to make 3 plots in one\n",
+    "\n",
+    "colors = [\"blue\", \"green\", \"red\"]\n",
+    "markers = [\"o\", \"^\", \"v\"]\n",
+    "\n",
+    "# getting unique class column values\n",
+    "varietes = list(set(iris_df[\"class\"]))\n",
+    "\n",
+    "# Iterate over indices of varieties list\n",
+    "# Q: Why are we iterating over indices instead values here?\n",
+    "#    Discuss how it will be useful to extract information from other lists \n",
+    "#    like colors and markers\n",
+    "for i in range(len(varietes)):\n",
+    "    variety = varietes[i]\n",
+    "    curr_color = ??? # write code to extract color\n",
+    "    curr_marker = ??? # write code to extract marker\n",
+    "    \n",
+    "    # make a df just of just the data for this variety\n",
+    "    variety_df = iris_df[iris_df[\"class\"] == variety] \n",
+    "    # print each subset DataFrame and verify that the output is correct\n",
+    "    \n",
+    "    #make a scatter plot for this variety\n",
+    "    #variety_df.plot.scatter(x = \"pet-width\", y = \"pet-length\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Let's focus on \"Iris-virginica\" data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "iris_virginica = ???\n",
+    "# assert that length of iris_virginica is exactly 50\n",
+    "???\n",
+    "iris_virginica.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create scatter plot to visualize relationship between `pet-width` and `pet-length`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "iris_virginica.plot.scatter(x = \"pet-width\", y = \"pet-length\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Let's learn about *xlim* and *ylim*\n",
+    "- Allows us to set x-axis and y-axis limits\n",
+    "- Takes either a single value (LOWER-BOUND) or a tuple containing two values (LOWER-BOUND, UPPER-BOUND)\n",
+    "- You need to be careful about setting the UPPER-BOUND"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "iris_virginica.plot.scatter(x = \"pet-width\", y = \"pet-length\", xlim = ???, ylim = ???)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ax = iris_virginica.plot.scatter(x = \"pet-width\", y = \"pet-length\",\n",
+    "                    xlim = ???, ylim = ???,\n",
+    "                    figsize = (3, 3))\n",
+    "\n",
+    "# What is wrong with this plot?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "What is the maximum `pet-length`?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "iris_virginica[\"pet-length\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For every set method, there is a corresponding get method. Try `ax.get_ylim()`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's include assert statements to make sure we don't crop the plot!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ax = iris_virginica.plot.scatter(x = \"pet-width\", y = \"pet-length\",\n",
+    "                     xlim = (0, 6), ylim = (0, 6),\n",
+    "                     figsize = (3, 3))\n",
+    "\n",
+    "#print(\"Ran into AssertionError while checking axes limits\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Now let's try all 4 assert statements\n",
+    "\n",
+    "```\n",
+    "assert iris_virginica[ax.get_xlabel()].min() >= ax.get_xlim()[0]\n",
+    "assert iris_virginica[ax.get_xlabel()].max() <= ax.get_xlim()[1]\n",
+    "assert iris_virginica[ax.get_ylabel()].min() >= ax.get_ylim()[0]\n",
+    "assert iris_virginica[ax.get_ylabel()].max() <= ax.get_ylim()[1]\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ax = iris_virginica.plot.scatter(x = \"pet-width\", y = \"pet-length\",\n",
+    "                     xlim = (0, 7), ylim = (0, 7),\n",
+    "                     figsize = (3, 3))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Close the database connection.\n",
+    "iris_conn.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Plotting Applications\n",
+    "\n",
+    "**Learning Objectives**\n",
+    "\n",
+    "- Make a line plot on a series or on a DataFrame\n",
+    "- Apply features of line plots and bar plots to visualize results of data investigations\n",
+    "- Clean Series data by dropping NaN values and by converting to int\n",
+    "- Make a stacked bar plot"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Line plots\n",
+    "- `SERIES.plot.line()`       each value in the Series becomes y-value and each index becomes x-value\n",
+    "- `DATAFRAME.plot.line()`    each column in the data frame becomes a line in the plot\n",
+    "- ***IMPORTANT***: lines in line plots shouldn't be crooked, you need to sort the values based on increasing order of indices!\n",
+    "\n",
+    "https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.line.html"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Plotting line from a Series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# when you make a series from a list, the default indices 0, 1, 2, ...\n",
+    "s = Series([0, 100, 300, 200, 400])\n",
+    "s"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "s = Series([0, 100, 300, 200, 400], index = [0, 20, 21, 22, 1])\n",
+    "s # oops this produces a crooked line plot!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's fix it by sorting the Series values based on the indices\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Craft breweries example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# You can make a series from a list and add indices\n",
+    "s = Series([1758, 2002, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764], \\\n",
+    "           index=[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020])\n",
+    "\n",
+    "# We can save the AxesSubplot and \"beautify\" it like the other plots...\n",
+    "\n",
+    "# Set title to \"Craft Breweries in the USA\"\n",
+    "\n",
+    "# Set x-axis label to \"Year\"\n",
+    "\n",
+    "# Set y-axis label to \"# Craft Breweries\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Be careful! If the indices are out of order you get a mess\n",
+    "# pandas plots each (index, value) in the order given\n",
+    "s = Series([1758, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764, 2002], \\\n",
+    "           index=[2010, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2011])\n",
+    "# TODO: fix this crooked line plot\n",
+    "s.plot.line()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Fix it here\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Temperature example\n",
+    "Plotting lines from a DataFrame\n",
+    "\n",
+    "- `DATAFRAME.plot.line()`    each column in the data frame becomes a line in the plot\n",
+    "- ***IMPORTANT***: lines in line plots shouldn't be crooked, you need to sort the values based on increasing order of indices!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This DataFrame is made using a dict of lists\n",
+    "# City of Madison normal high and low (degrees F) by month\n",
+    "temp_df = DataFrame( {\n",
+    "    \"high\": [26, 31, 43, 57, 68, 78, 82, 79, 72, 59, 44, 30],\n",
+    "    \"low\": [11, 15, 25, 36, 46, 56, 61, 59, 50, 39, 28, 16]}\n",
+    ")\n",
+    "\n",
+    "# Q: do \"high\" and \"low\" become rows or columns within the DataFrame? \n",
+    "# A: \n",
+    "temp_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's create line plots\n",
+    " # not a nice plot\n",
+    "    \n",
+    "# Let's fix the aesthetics"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### A Line Plot made from a DataFrame automatically plots all columns\n",
+    "\n",
+    "The same is true for bar plots; we'll see this later.\n",
+    "\n",
+    "`ax.xticks(...)`: takes as argument a sequence of numbers and add ticks at those locations."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# You can also add ticks and ticklabels to a line plot\n",
+    "# TODOs:\n",
+    "# 1. Also add figure size as (8, 4)\n",
+    "# 2. Add xticks - how many do we need?\n",
+    "# 3. Add xticklables and rotate them by 45 degrees\n",
+    "#[\"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\", \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"]\n",
+    "\n",
+    "ax = temp_df.plot.line(???)\n",
+    "ax.set_title(\"Average Temperatures in Madison, WI\")\n",
+    "ax.set_xlabel(\"Month\")\n",
+    "ax.set_ylabel(\"Temp (Fahrenheit)\")\n",
+    "ax.set_xticks(???)   # makes a sequence of integers from 0 to 11\n",
+    "ax.set_xticklabels(???, ???)\n",
+    "\n",
+    "# This gets rid of the weird output\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We could explicitly pass arguments to the \"x\" and \"y\" parameters\n",
+    "temp_df_with_month = DataFrame( \n",
+    "    {\n",
+    "    \"month\": [\"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\",\n",
+    "                   \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"],\n",
+    "    \"high\": [26, 31, 43, 57, 68, 78, 82, 79, 72, 59, 44, 30],\n",
+    "    \"low\": [11, 15, 25, 36, 46, 56, 61, 59, 50, 39, 28, 16]}\n",
+    ")\n",
+    "\n",
+    "ax = temp_df_with_month.plot.line(x = ???, y = ???, figsize = (8, 4))\n",
+    "ax.set_title(\"Average Temperatures in Madison, WI\")\n",
+    "ax.set_xlabel(\"Month\")\n",
+    "ax.set_ylabel(\"Temp (Fahrenheit)\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### We can perform a calculation on an entire DataFrame\n",
+    "Let's change the entire DataFrame to Celcius"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# call the function on the dataframe\n",
+    "celcius_df = ???\n",
+    "celcius_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# here is one way to add a horizontal line to our line plots\n",
+    "celcius_df[???] = ???\n",
+    "celcius_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# this plots each column as lines\n",
+    "# with rotation for the tick labels\n",
+    "ax = celcius_df.plot.line(figsize = (8, 4))\n",
+    "ax.set_xlabel(\"Month\")\n",
+    "ax.set_ylabel(\"Temp (Celcius)\")\n",
+    "ax.set_xticks(range(12))\n",
+    "ax.set_xticklabels([\"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\",\n",
+    "                    \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"], rotation = 45)\n",
+    "ax.grid()\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Bar plots using DataFrames"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Bar Plot Example w/ Fire Hydrants\n",
+    "\n",
+    "- General review of pandas\n",
+    "- Some new bar plot options"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO: read \"Fire_Hydrants.csv\" into a DataFrame\n",
+    "hdf = ???\n",
+    "hdf.tail()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Extract just the column names\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Let's create a *bar plot* to visualize *colors* of fire hydrants."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Make a series called counts_series which stores the value counts of the \"nozzle_color\"\n",
+    "color_counts = ???\n",
+    "color_counts # what is wrong with this data?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO: Clean the data ......use str.upper()\n",
+    "\n",
+    "color_counts = ???\n",
+    "color_counts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Make a horizontal bar plot of counts of colors and have the colors match\n",
+    "# use color list: [\"b\", \"g\", \"darkorange\", \"r\", \"c\", \"0.5\"]\n",
+    "ax = ???\n",
+    "ax.set_xlabel(\"Fire hydrant count\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Let's create a *bar plot* to visualize *style* of fire hydrants."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Do the same thing as we did for the colors but this time for the \"Style\"\n",
+    "style_counts = ???\n",
+    "style_counts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Grab the top 12 \n",
+    "top12 = ???\n",
+    "\n",
+    "# and them add an index to our Series for the sum of all the \"other\" for \n",
+    "top12[???] = ???"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot the results\n",
+    "ax = ???(color = \"firebrick\")\n",
+    "ax.set_ylabel(\"Hydrant Count\")\n",
+    "ax.set_xlabel(\"Hydrant Type\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### In what *decade* were *pacers manufactured*?\n",
+    "### Take a peek at the *Style* column data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hdf[\"Style\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Which *column* gives *year* information?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hdf.columns"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### How to get the *year_manufactured* for *pacers* and *others*?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's get the year manufactured for all of the \"Pacer\" hydrants.\n",
+    "pacer_years = ???\n",
+    "\n",
+    "# Note: We can do this either way\n",
+    "# pacer_years = hdf[\"year_manufactured\"][hdf[\"Style\"] == \"Pacer\"]\n",
+    "\n",
+    "pacer_years"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# then do the same for all the other data\n",
+    "other_years = ???\n",
+    "other_years"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### How to get the *decade* for *pacers*?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Round each year down to the start of the decade.\n",
+    "# e.g. 1987 --> 1980, 2003 --> 2000\n",
+    "pacer_decades = ???\n",
+    "pacer_decades"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### How to convert the *decades* back to *int*?\n",
+    "- `astype(...)` method\n",
+    "- `dropna(...)` method"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Drop the NaN values, convert to int, and do value counts\n",
+    "pacer_decades = ???"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### How to *count the decades* for pacers?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pacer_decades_count = ???\n",
+    "pacer_decades_count"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Count the *decades* for others."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Do the same thing for other_years. Save to a variable called \"other_decades\"\n",
+    "other_decades = ???\n",
+    "other_decades_count = ???\n",
+    "other_decades_count"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Build a DataFrame from a dictionary of key, Series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_df = DataFrame(???)\n",
+    "plot_df # observe the NaN values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make a bar plot\n",
+    "\n",
+    "ax = ???\n",
+    "ax.set_xlabel(\"Decade\")\n",
+    "ax.set_ylabel(\"Hydrant Count\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Ignore data from before 1950 using boolean indexing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ax = ???\n",
+    "ax.set_xlabel(\"Decade\")\n",
+    "ax.set_ylabel(\"Hydrant Count\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Stacked Bar Chart\n",
+    "`stacked` parameter accepts boolean value as argument"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ax = ???\n",
+    "ax.set_xlabel(\"Decade\")\n",
+    "ax.set_ylabel(\"Hydrant Count\")\n",
+    "None"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
-- 
GitLab