diff --git a/f22/andy_lec_notes/lec15_Oct12_CSV_Files/lec15_csvfiles_template.ipynb b/f22/andy_lec_notes/lec15_Oct12_CSV_Files/lec15_csvfiles_template.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..1d090a7674fa7e57e72aaa0f72b7d791ddc36d23 --- /dev/null +++ b/f22/andy_lec_notes/lec15_Oct12_CSV_Files/lec15_csvfiles_template.ipynb @@ -0,0 +1,644 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Warmup #1: Take a look at these list methods \n", + "# https://www.w3schools.com/python/python_ref_list.asp\n", + "dairy = [\"milk\", \"ice cream\", \"cheese\", \"yogurt\" ]\n", + "\n", + "#use the .index() method to get the index of \"ice cream\"\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "peanut butter is dairy\n", + "milk is dairy\n", + "bread is dairy\n", + "cheese is dairy\n", + "YOGURT is dairy\n" + ] + } + ], + "source": [ + "# Warmup #2: Because a list is a sequence, we can use the 'in' operator\n", + "food_shelf = [\"peanut butter\", \"milk\", \"bread\", \"cheese\", \"YOGURT\"]\n", + "for item in food_shelf:\n", + " if ... : \n", + " print(item, \"is dairy\")\n", + " else:\n", + " print(item, \"is not dairy\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Lecture 15: CSV Files\n", + "\n", + "\n", + "## Learning Objectives\n", + "After this lecture you will be able to...\n", + "- Open an Excel file and export it to a Comma Separated Value file.\n", + "\n", + "- Open a CSV file in TextEditor/Jupyter and connect the elements of the CSV file to the rows and columns in the spreadsheet.\n", + "\n", + "- Use pre-written Python code to read a CSV file into a list of lists.\n", + "\n", + "- Write Python statements with double list indexing to access any element of a CSV file via a list of lists.\n", + "\n", + "- Write code that answers questions about CSV data by writing for loops on lists of lists.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Open the file of student survey data in Jupyter\n", + "\n", + "# Then open it in Windows ... what program opened? " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Open a CSV file in TextEditor/Jupyter and connect the elements of the CSV file to the rows and columns in the spreadsheet." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# What do you notice? Take notes here\n", + "# " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use pre-written Python code to read a CSV file into a list of lists." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "# inspired by https://automatetheboringstuff.com/chapter14/\n", + "import csv\n", + "\n", + "def process_csv(filename):\n", + " # open the file, its a text file utf-8\n", + " exampleFile = open(filename, encoding=\"utf-8\") \n", + " \n", + " # prepare it for reading as a CSV object\n", + " exampleReader = csv.reader(exampleFile) \n", + " \n", + " # use the built-in list function to convert this into a list of lists\n", + " exampleData = list(exampleReader) \n", + " \n", + " # close the file to tidy up our workspace\n", + " exampleFile.close() \n", + " \n", + " # return the list of lists\n", + " return exampleData\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[['pizza topping',\n", + " 'state',\n", + " 'years',\n", + " 'sleep preference',\n", + " 'month',\n", + " 'pets',\n", + " 'lat-long'],\n", + " ['mushroom',\n", + " 'Florida',\n", + " '7',\n", + " 'early bird',\n", + " 'March',\n", + " '',\n", + " '30.263214888389417, -81.54792098150529'],\n", + " ['pineapple',\n", + " 'Wisconsin',\n", + " '4',\n", + " 'night owl',\n", + " 'April',\n", + " 'other',\n", + " '43.1581437, -89.2921125']]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Call the process_csv function and store the list of lists in cs220_csv\n", + "cs220_csv = process_csv('amfam_survey_data.csv')\n", + "cs220_csv[:3]" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['pizza topping',\n", + " 'state',\n", + " 'years',\n", + " 'sleep preference',\n", + " 'month',\n", + " 'pets',\n", + " 'lat-long']" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Store the header row into cs220_header\n", + "cs220_header = cs220_csv[0]\n", + "cs220_header" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[['mushroom',\n", + " 'Florida',\n", + " '7',\n", + " 'early bird',\n", + " 'March',\n", + " '',\n", + " '30.263214888389417, -81.54792098150529'],\n", + " ['pineapple',\n", + " 'Wisconsin',\n", + " '4',\n", + " 'night owl',\n", + " 'April',\n", + " 'other',\n", + " '43.1581437, -89.2921125'],\n", + " ['sausage',\n", + " 'Wisconsin',\n", + " '10',\n", + " 'early bird',\n", + " 'July',\n", + " 'other',\n", + " '43.15645, -89.28814'],\n", + " ['pepperoni',\n", + " 'WI',\n", + " '7',\n", + " 'no preference',\n", + " 'September',\n", + " 'dog,cat',\n", + " '43.073051, -89.401230'],\n", + " ['mushroom', 'madison', '7', 'early bird', 'November', '', ''],\n", + " ['pepperoni',\n", + " 'FL',\n", + " '1',\n", + " 'no preference',\n", + " 'December',\n", + " 'dog',\n", + " '42.35623761108948, -71.05691488946681'],\n", + " ['pepperoni',\n", + " 'Wisconsin',\n", + " '2',\n", + " 'night owl',\n", + " 'February',\n", + " '',\n", + " '43.159045128642774, -89.29146323507756'],\n", + " ['mushroom',\n", + " 'Florida',\n", + " '0.5',\n", + " 'night owl',\n", + " 'May',\n", + " 'other',\n", + " '43.160601, -89.287671'],\n", + " ['mushroom',\n", + " 'Wisconsin',\n", + " '10',\n", + " 'no preference',\n", + " 'January',\n", + " 'dog,fish',\n", + " '43.1562216,-89.2880086'],\n", + " ['pineapple',\n", + " 'Wisconsin',\n", + " '8',\n", + " 'night owl',\n", + " 'July',\n", + " 'dog',\n", + " '43.158655, -89.289895'],\n", + " ['sausage',\n", + " 'Minnesota',\n", + " '15',\n", + " 'no preference',\n", + " 'August',\n", + " 'dog,cat',\n", + " '45.13881645889933, -93.47636590830673'],\n", + " ['pepperoni',\n", + " 'New Jersey',\n", + " '1',\n", + " 'night owl',\n", + " 'May',\n", + " 'other',\n", + " '43.07148896663423, -89.40567798752735'],\n", + " ['basil',\n", + " 'Rhode Island',\n", + " '1',\n", + " 'night owl',\n", + " 'March',\n", + " 'dog',\n", + " '43.156490793353775, -89.28796434617352'],\n", + " ['mushroom', 'TX', '1', 'no preference', 'January', 'dog', ''],\n", + " ['pineapple',\n", + " 'Florida',\n", + " '3',\n", + " 'early bird',\n", + " 'July',\n", + " 'other',\n", + " '27.979191147972834, -82.33356380365498'],\n", + " ['sausage',\n", + " 'Wisconsin',\n", + " '0',\n", + " 'early bird',\n", + " 'December',\n", + " 'dog,cat',\n", + " '43.15631441766965, -89.28785659081201'],\n", + " ['pineapple',\n", + " 'Wisconsin',\n", + " '6',\n", + " 'no preference',\n", + " 'June',\n", + " 'dog',\n", + " '43.157716440341964, -89.28939262164963'],\n", + " ['mushroom',\n", + " 'Florida',\n", + " '7',\n", + " 'no preference',\n", + " 'July',\n", + " 'other',\n", + " '30.053546, -81.514610'],\n", + " ['sausage',\n", + " 'Florida',\n", + " '3',\n", + " 'early bird',\n", + " 'January',\n", + " 'dog,fish',\n", + " '30.263357, -81.547884'],\n", + " ['mac&cheese',\n", + " 'Wisconsin',\n", + " '5',\n", + " 'night owl',\n", + " 'July',\n", + " 'dog',\n", + " '43.158328032172754, -89.28946714938327'],\n", + " ['pepperoni',\n", + " 'Wisconsin',\n", + " '10',\n", + " 'early bird',\n", + " 'April',\n", + " 'other',\n", + " '43.1884213,-89.2762121'],\n", + " ['other',\n", + " 'Wisconsin',\n", + " '10',\n", + " 'early bird',\n", + " 'August',\n", + " 'other',\n", + " '43.15833, -89.28988'],\n", + " ['sausage',\n", + " 'WI',\n", + " '14',\n", + " 'night owl',\n", + " 'September',\n", + " 'dog,cat',\n", + " '43.15733597381252, -89.29013010509833'],\n", + " ['sausage',\n", + " 'Wisconsin',\n", + " '6',\n", + " 'no preference',\n", + " 'August',\n", + " 'dog,cat',\n", + " '43.159061371631616, -89.29141118826759'],\n", + " ['pepperoni',\n", + " 'Wisconsin',\n", + " '8',\n", + " 'early bird',\n", + " 'September',\n", + " 'dog,cat,fish',\n", + " '43.158359 -89.289972'],\n", + " ['pineapple',\n", + " 'Florida',\n", + " '8',\n", + " 'night owl',\n", + " 'October',\n", + " '',\n", + " '30.263432655702932, -81.54807118535949'],\n", + " ['pineapple',\n", + " 'TX',\n", + " '4',\n", + " 'night owl',\n", + " 'October',\n", + " 'dog',\n", + " '42.3558293029345, -71.05683171712127'],\n", + " ['other', 'WI', '2', 'early bird', 'June', '', ''],\n", + " ['mushroom',\n", + " 'Wisconsin',\n", + " '20',\n", + " 'early bird',\n", + " 'September',\n", + " 'dog',\n", + " '43.15826500058843, -89.28945716165009'],\n", + " ['sausage',\n", + " 'Wisconsin',\n", + " '8',\n", + " 'night owl',\n", + " 'June',\n", + " 'dog',\n", + " '43.15839022178169, -89.28998287477457'],\n", + " ['sausage',\n", + " 'Wisconsin',\n", + " '20',\n", + " 'night owl',\n", + " 'April',\n", + " 'bird',\n", + " '43.15648555750267, -89.28783647996661'],\n", + " ['pineapple', 'Texas', '0.5', 'early bird', 'August', 'other', '43, 89']]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Store all of the data rows into cs220_data\n", + "cs220_data = cs220_csv[1:]\n", + "cs220_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## CSVs as a List of Lists" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# Determine how many students completed the survey.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# show the entire 1st row of actual data\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Find the pizza topping for the 2nd student...by hardcoding its row and column....\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Find the lat-long of the 4th student...by hardcoding its row and column....\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Print out every student's sleep habit and pets\n", + "# use for \n", + "for row in cs220_data:\n", + " current_sleep_habit = None\n", + " pets = None\n", + " print(current_sleep_habit + '\\t\\t' + pets)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a list containing the average years of people who do not live in Wisconsin\n", + "\n", + "\n", + "\n", + "for row in cs220_data:\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## It would be nice to have a helper function!\n", + "A function that easily accesses a `cell`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How do we get the names of all the columns?\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the column index of \"Pizza topping\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We want to invoke something like...\n", + "# cell(24, <colName>)\n", + "def cell_v1(row_idx, col_name):\n", + " col_idx = ??? # get the index of col_name\n", + " val = ??? # get the value of cs220_data at the specified cell\n", + " return val" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Print out the lecture number of the 4th student... using the cell function\n", + "cell_v1(???, ???)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Redo above probllem cell function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# redo .... using the cell function\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Improve the cell function so it returns the appropriate type.\n", + "# If there is nothing in the cell, return None\n", + "# otherwise, use the col_name to convert to the expected type\n", + "def cell(row_idx, col_name):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # redo again using the improved cell function" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Practice problems" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What percentage of students chose a non-meat pizza topping?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What do you want to find out ? " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}