From d3aca98c8d67620169dc94bfc388e6ec6120979d Mon Sep 17 00:00:00 2001 From: gsingh58 <gurmail-singh@wisc.edu> Date: Fri, 8 Dec 2023 02:08:40 -0600 Subject: [PATCH] Lec38 updated --- .../lec_38_plotting3_line_plots.ipynb | 25 +- ...g3_line_plots_template_Gurmail_lec1.ipynb} | 25 +- ...ng3_line_plots_template_Gurmail_lec2.ipynb | 938 ++++++++++++++++++ 3 files changed, 986 insertions(+), 2 deletions(-) rename f23/Gurmail_Lecture_Notes/38_Plotting-3/{lec_38_plotting3_line_plots_template.ipynb => lec_38_plotting3_line_plots_template_Gurmail_lec1.ipynb} (96%) create mode 100644 f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template_Gurmail_lec2.ipynb diff --git a/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots.ipynb b/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots.ipynb index 12857f9..287363b 100644 --- a/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots.ipynb +++ b/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots.ipynb @@ -1,5 +1,28 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Announcements - Friday, December 8\n", + "* Download ALL files for today's lecture\n", + "* Q10 is due tonight at 11:59 pm\n", + "* <b>If you have any problem with P8-P11 grades, please send me (Gurmail.Singh@wisc.edu) an email by December 11.</b>\n", + "* Late days may not be used on P13\n", + "* If you have questions, it is almost always faster to \n", + " * Post on Piazza\n", + " * Go to [office hours](https://sites.google.com/wisc.edu/cs220-oh-f23/home?pli=1) \n", + "### Conflict Form\n", + " * [Final - December 19, 7:45 am](https://cs220.cs.wisc.edu/f23/surveys.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Plotting 3" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -2633,7 +2656,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template.ipynb b/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template_Gurmail_lec1.ipynb similarity index 96% rename from f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template.ipynb rename to f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template_Gurmail_lec1.ipynb index 80f463f..70a1b8b 100644 --- a/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template.ipynb +++ b/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template_Gurmail_lec1.ipynb @@ -1,5 +1,28 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Announcements - Friday, December 8\n", + "* Download ALL files for today's lecture\n", + "* Q10 is due tonight at 11:59 pm\n", + "* <b>If you have any problem with P8-P11 grades, please send me (Gurmail.Singh@wisc.edu) an email by December 11.</b>\n", + "* Late days may not be used on P13\n", + "* If you have questions, it is almost always faster to \n", + " * Post on Piazza\n", + " * Go to [office hours](https://sites.google.com/wisc.edu/cs220-oh-f23/home?pli=1) \n", + "### Conflict Form\n", + " * [Final - December 19, 7:45 am](https://cs220.cs.wisc.edu/f23/surveys.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Plotting 3" + ] + }, { "cell_type": "code", "execution_count": null, @@ -907,7 +930,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template_Gurmail_lec2.ipynb b/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template_Gurmail_lec2.ipynb new file mode 100644 index 0000000..70a1b8b --- /dev/null +++ b/f23/Gurmail_Lecture_Notes/38_Plotting-3/lec_38_plotting3_line_plots_template_Gurmail_lec2.ipynb @@ -0,0 +1,938 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Announcements - Friday, December 8\n", + "* Download ALL files for today's lecture\n", + "* Q10 is due tonight at 11:59 pm\n", + "* <b>If you have any problem with P8-P11 grades, please send me (Gurmail.Singh@wisc.edu) an email by December 11.</b>\n", + "* Late days may not be used on P13\n", + "* If you have questions, it is almost always faster to \n", + " * Post on Piazza\n", + " * Go to [office hours](https://sites.google.com/wisc.edu/cs220-oh-f23/home?pli=1) \n", + "### Conflict Form\n", + " * [Final - December 19, 7:45 am](https://cs220.cs.wisc.edu/f23/surveys.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Plotting 3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# import statements\n", + "import sqlite3\n", + "import pandas as pd\n", + "from pandas import DataFrame, Series\n", + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "matplotlib.rcParams[\"font.size\"] = 16" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Warmup 1: Write a function that converts any Fehrenheit temp to Celcius\n", + "C = (5/9) * (f-32)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def f_to_c(f):\n", + " return (5/9) * (f-32)\n", + "\n", + "# test it by making several calls\n", + "print(f_to_c(212))\n", + "print(f_to_c(32))\n", + "print(f_to_c(67))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Warmup 2a: What is the name of the only table inside of iris-flowers.db?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Establish a connection to \"iris-flowers.db\" database\n", + "iris_conn = sqlite3.connect(\"iris-flowers.db\")\n", + "pd.read_sql(\"SELECT * FROM sqlite_master WHERE type='table'\", iris_conn)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Warmup 2b: Save & display all the data from this table to a variable called \"iris_df\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iris_df = pd.read_sql(\"SELECT * FROM iris\", iris_conn)\n", + "iris_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Warmup 3a: What are all the class types?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# v1: pandas\n", + "varietes = iris_df[\"class\"]\n", + "varietes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# v2: SQL\n", + "varietes = list(pd.read_sql(\"\"\"\n", + " SELECT DISTINCT class\n", + " FROM iris\n", + "\"\"\", iris_conn)[\"class\"])\n", + "varietes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Warmup 3b: Scatter plot to visualize relationship between `pet-width` and `pet-length`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# complete this code to make 3 plots in one\n", + "\n", + "colors = [\"blue\", \"green\", \"red\"]\n", + "markers = [\"o\", \"^\", \"v\"]\n", + "\n", + "# getting unique class column values\n", + "varietes = list(set(iris_df[\"class\"]))\n", + "\n", + "# Iterate over indices of varieties list\n", + "# Q: Why are we iterating over indices instead values here?\n", + "# Discuss how it will be useful to extract information from other lists \n", + "# like colors and markers\n", + "for i in range(len(varietes)):\n", + " variety = varietes[i]\n", + " curr_color = ??? # write code to extract color\n", + " curr_marker = ??? # write code to extract marker\n", + " \n", + " # make a df just of just the data for this variety\n", + " variety_df = iris_df[iris_df[\"class\"] == variety] \n", + " # print each subset DataFrame and verify that the output is correct\n", + " \n", + " #make a scatter plot for this variety\n", + " #variety_df.plot.scatter(x = \"pet-width\", y = \"pet-length\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Let's focus on \"Iris-virginica\" data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iris_virginica = ???\n", + "# assert that length of iris_virginica is exactly 50\n", + "???\n", + "iris_virginica.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Create scatter plot to visualize relationship between `pet-width` and `pet-length`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iris_virginica.plot.scatter(x = \"pet-width\", y = \"pet-length\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Let's learn about *xlim* and *ylim*\n", + "- Allows us to set x-axis and y-axis limits\n", + "- Takes either a single value (LOWER-BOUND) or a tuple containing two values (LOWER-BOUND, UPPER-BOUND)\n", + "- You need to be careful about setting the UPPER-BOUND" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iris_virginica.plot.scatter(x = \"pet-width\", y = \"pet-length\", xlim = ???, ylim = ???)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ax = iris_virginica.plot.scatter(x = \"pet-width\", y = \"pet-length\",\n", + " xlim = ???, ylim = ???,\n", + " figsize = (3, 3))\n", + "\n", + "# What is wrong with this plot?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "What is the maximum `pet-length`?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iris_virginica[\"pet-length\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For every set method, there is a corresponding get method. Try `ax.get_ylim()`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's include assert statements to make sure we don't crop the plot!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ax = iris_virginica.plot.scatter(x = \"pet-width\", y = \"pet-length\",\n", + " xlim = (0, 6), ylim = (0, 6),\n", + " figsize = (3, 3))\n", + "\n", + "#print(\"Ran into AssertionError while checking axes limits\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Now let's try all 4 assert statements\n", + "\n", + "```\n", + "assert iris_virginica[ax.get_xlabel()].min() >= ax.get_xlim()[0]\n", + "assert iris_virginica[ax.get_xlabel()].max() <= ax.get_xlim()[1]\n", + "assert iris_virginica[ax.get_ylabel()].min() >= ax.get_ylim()[0]\n", + "assert iris_virginica[ax.get_ylabel()].max() <= ax.get_ylim()[1]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ax = iris_virginica.plot.scatter(x = \"pet-width\", y = \"pet-length\",\n", + " xlim = (0, 7), ylim = (0, 7),\n", + " figsize = (3, 3))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Close the database connection.\n", + "iris_conn.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Plotting Applications\n", + "\n", + "**Learning Objectives**\n", + "\n", + "- Make a line plot on a series or on a DataFrame\n", + "- Apply features of line plots and bar plots to visualize results of data investigations\n", + "- Clean Series data by dropping NaN values and by converting to int\n", + "- Make a stacked bar plot" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Line plots\n", + "- `SERIES.plot.line()` each value in the Series becomes y-value and each index becomes x-value\n", + "- `DATAFRAME.plot.line()` each column in the data frame becomes a line in the plot\n", + "- ***IMPORTANT***: lines in line plots shouldn't be crooked, you need to sort the values based on increasing order of indices!\n", + "\n", + "https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.line.html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plotting line from a Series" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# when you make a series from a list, the default indices 0, 1, 2, ...\n", + "s = Series([0, 100, 300, 200, 400])\n", + "s" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s = Series([0, 100, 300, 200, 400], index = [0, 20, 21, 22, 1])\n", + "s # oops this produces a crooked line plot!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's fix it by sorting the Series values based on the indices\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Craft breweries example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You can make a series from a list and add indices\n", + "s = Series([1758, 2002, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764], \\\n", + " index=[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020])\n", + "\n", + "# We can save the AxesSubplot and \"beautify\" it like the other plots...\n", + "\n", + "# Set title to \"Craft Breweries in the USA\"\n", + "\n", + "# Set x-axis label to \"Year\"\n", + "\n", + "# Set y-axis label to \"# Craft Breweries\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Be careful! If the indices are out of order you get a mess\n", + "# pandas plots each (index, value) in the order given\n", + "s = Series([1758, 2408, 2898, 3814, 4803, 5713, 6661, 7618, 8391, 8764, 2002], \\\n", + " index=[2010, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2011])\n", + "# TODO: fix this crooked line plot\n", + "s.plot.line()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Fix it here\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Temperature example\n", + "Plotting lines from a DataFrame\n", + "\n", + "- `DATAFRAME.plot.line()` each column in the data frame becomes a line in the plot\n", + "- ***IMPORTANT***: lines in line plots shouldn't be crooked, you need to sort the values based on increasing order of indices!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# This DataFrame is made using a dict of lists\n", + "# City of Madison normal high and low (degrees F) by month\n", + "temp_df = DataFrame( {\n", + " \"high\": [26, 31, 43, 57, 68, 78, 82, 79, 72, 59, 44, 30],\n", + " \"low\": [11, 15, 25, 36, 46, 56, 61, 59, 50, 39, 28, 16]}\n", + ")\n", + "\n", + "# Q: do \"high\" and \"low\" become rows or columns within the DataFrame? \n", + "# A: \n", + "temp_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's create line plots\n", + " # not a nice plot\n", + " \n", + "# Let's fix the aesthetics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### A Line Plot made from a DataFrame automatically plots all columns\n", + "\n", + "The same is true for bar plots; we'll see this later.\n", + "\n", + "`ax.xticks(...)`: takes as argument a sequence of numbers and add ticks at those locations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You can also add ticks and ticklabels to a line plot\n", + "# TODOs:\n", + "# 1. Also add figure size as (8, 4)\n", + "# 2. Add xticks - how many do we need?\n", + "# 3. Add xticklables and rotate them by 45 degrees\n", + "#[\"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\", \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"]\n", + "\n", + "ax = temp_df.plot.line(???)\n", + "ax.set_title(\"Average Temperatures in Madison, WI\")\n", + "ax.set_xlabel(\"Month\")\n", + "ax.set_ylabel(\"Temp (Fahrenheit)\")\n", + "ax.set_xticks(???) # makes a sequence of integers from 0 to 11\n", + "ax.set_xticklabels(???, ???)\n", + "\n", + "# This gets rid of the weird output\n", + "None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We could explicitly pass arguments to the \"x\" and \"y\" parameters\n", + "temp_df_with_month = DataFrame( \n", + " {\n", + " \"month\": [\"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\",\n", + " \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"],\n", + " \"high\": [26, 31, 43, 57, 68, 78, 82, 79, 72, 59, 44, 30],\n", + " \"low\": [11, 15, 25, 36, 46, 56, 61, 59, 50, 39, 28, 16]}\n", + ")\n", + "\n", + "ax = temp_df_with_month.plot.line(x = ???, y = ???, figsize = (8, 4))\n", + "ax.set_title(\"Average Temperatures in Madison, WI\")\n", + "ax.set_xlabel(\"Month\")\n", + "ax.set_ylabel(\"Temp (Fahrenheit)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### We can perform a calculation on an entire DataFrame\n", + "Let's change the entire DataFrame to Celcius" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# call the function on the dataframe\n", + "celcius_df = ???\n", + "celcius_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# here is one way to add a horizontal line to our line plots\n", + "celcius_df[???] = ???\n", + "celcius_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this plots each column as lines\n", + "# with rotation for the tick labels\n", + "ax = celcius_df.plot.line(figsize = (8, 4))\n", + "ax.set_xlabel(\"Month\")\n", + "ax.set_ylabel(\"Temp (Celcius)\")\n", + "ax.set_xticks(range(12))\n", + "ax.set_xticklabels([\"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\",\n", + " \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"], rotation = 45)\n", + "ax.grid()\n", + "None" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bar plots using DataFrames" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bar Plot Example w/ Fire Hydrants\n", + "\n", + "- General review of pandas\n", + "- Some new bar plot options" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: read \"Fire_Hydrants.csv\" into a DataFrame\n", + "hdf = ???\n", + "hdf.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Extract just the column names\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Let's create a *bar plot* to visualize *colors* of fire hydrants." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Make a series called counts_series which stores the value counts of the \"nozzle_color\"\n", + "color_counts = ???\n", + "color_counts # what is wrong with this data?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: Clean the data ......use str.upper()\n", + "\n", + "color_counts = ???\n", + "color_counts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Make a horizontal bar plot of counts of colors and have the colors match\n", + "# use color list: [\"b\", \"g\", \"darkorange\", \"r\", \"c\", \"0.5\"]\n", + "ax = ???\n", + "ax.set_xlabel(\"Fire hydrant count\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Let's create a *bar plot* to visualize *style* of fire hydrants." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Do the same thing as we did for the colors but this time for the \"Style\"\n", + "style_counts = ???\n", + "style_counts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Grab the top 12 \n", + "top12 = ???\n", + "\n", + "# and them add an index to our Series for the sum of all the \"other\" for \n", + "top12[???] = ???" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the results\n", + "ax = ???(color = \"firebrick\")\n", + "ax.set_ylabel(\"Hydrant Count\")\n", + "ax.set_xlabel(\"Hydrant Type\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### In what *decade* were *pacers manufactured*?\n", + "### Take a peek at the *Style* column data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hdf[\"Style\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Which *column* gives *year* information?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hdf.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### How to get the *year_manufactured* for *pacers* and *others*?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's get the year manufactured for all of the \"Pacer\" hydrants.\n", + "pacer_years = ???\n", + "\n", + "# Note: We can do this either way\n", + "# pacer_years = hdf[\"year_manufactured\"][hdf[\"Style\"] == \"Pacer\"]\n", + "\n", + "pacer_years" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# then do the same for all the other data\n", + "other_years = ???\n", + "other_years" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### How to get the *decade* for *pacers*?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Round each year down to the start of the decade.\n", + "# e.g. 1987 --> 1980, 2003 --> 2000\n", + "pacer_decades = ???\n", + "pacer_decades" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### How to convert the *decades* back to *int*?\n", + "- `astype(...)` method\n", + "- `dropna(...)` method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Drop the NaN values, convert to int, and do value counts\n", + "pacer_decades = ???" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### How to *count the decades* for pacers?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pacer_decades_count = ???\n", + "pacer_decades_count" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Count the *decades* for others." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Do the same thing for other_years. Save to a variable called \"other_decades\"\n", + "other_decades = ???\n", + "other_decades_count = ???\n", + "other_decades_count" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build a DataFrame from a dictionary of key, Series" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_df = DataFrame(???)\n", + "plot_df # observe the NaN values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# make a bar plot\n", + "\n", + "ax = ???\n", + "ax.set_xlabel(\"Decade\")\n", + "ax.set_ylabel(\"Hydrant Count\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Ignore data from before 1950 using boolean indexing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ax = ???\n", + "ax.set_xlabel(\"Decade\")\n", + "ax.set_ylabel(\"Hydrant Count\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Stacked Bar Chart\n", + "`stacked` parameter accepts boolean value as argument" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ax = ???\n", + "ax.set_xlabel(\"Decade\")\n", + "ax.set_ylabel(\"Hydrant Count\")\n", + "None" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} -- GitLab