diff --git a/s24/AmFam_Ashwin/15_CSV_Files/Lecture Code/Lec_15_CSVs_Template.ipynb b/s24/AmFam_Ashwin/15_CSV_Files/Lecture Code/Lec_15_CSVs_Template.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..5fdbc86bdc4a7fe5f66218327406d70402bab023 --- /dev/null +++ b/s24/AmFam_Ashwin/15_CSV_Files/Lecture Code/Lec_15_CSVs_Template.ipynb @@ -0,0 +1,555 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Warmup 1: Take a look at some list methods\n", + "\n", + "List methods [here](https://www.w3schools.com/python/python_ref_list.asp)..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dairy = [\"milk\", \"ice cream\", \"cheese\", \"yogurt\", \"butter\"]\n", + "\n", + "# use the .index() method to get the index of \"ice cream\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Warmup 2: Because a list is a sequence, we can use the `in` operator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "food_shelf = [\"peanut butter\", \"milk\", \"bread\", \"cheese\", \"YOGURT\"]\n", + "for item in food_shelf:\n", + " if ...:\n", + " print(item, \"is dairy\")\n", + " else:\n", + " print(item, \"is not dairy\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CS220: Lecture 15\n", + "\n", + "\n", + "## Learning Objectives\n", + "After this lecture you will be able to...\n", + "- Open an Excel file and export it to a Comma Separated Value file.\n", + "- Open a CSV file in TextEditor/Jupyter and connect the elements of the CSV file to the rows and columns in the spreadsheet.\n", + "- Use pre-written Python code to read a CSV file into a list of lists.\n", + "- Write Python statements with double list indexing to access any element of a CSV file via a list of lists.\n", + "- Write code that answers questions about CSV data by writing for loops on lists of lists." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reading a CSV" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 1: Store the contents of the CSV file into Python lists" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# adapted from https://automatetheboringstuff.com/chapter14/\n", + "import csv\n", + "\n", + "def process_csv(filename):\n", + " # open the file, its a text file utf-8\n", + " example_file = open(filename, encoding=\"utf-8\")\n", + " \n", + " # prepare it for reading as a CSV object\n", + " example_reader = csv.reader(example_file)\n", + " \n", + " # use the built-in list function to convert this into a list of lists\n", + " example_data = list(example_reader)\n", + " \n", + " # close the file to tidy up our workspace\n", + " example_file.close()\n", + " \n", + " # return the list of lists\n", + " return example_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Call the `process_csv` function and store the list of lists in `cs220_csv`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cs220_csv = process_csv('cs220_survey_data.csv')\n", + "cs220_csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Store the header row into `cs220_header`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cs220_header = cs220_csv[0]\n", + "cs220_header" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Store all of the data rows into `cs220_data`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cs220_data = cs220_csv[1:]\n", + "cs220_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 2: CSVs as a List of Lists" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Print out the lecture number of the 4th student by hardcoding its row and column" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cs220_data[...][...] # [row][col]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Print out the sleeping habit for the 2nd student by hardcoding its row and column" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cs220_data[...][...] " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Print out how many students completed the survey" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(...)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Print out every student's sleep habits and major" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(len(cs220_data)):\n", + " current_sleep_habit = ...\n", + " current_major = ...\n", + " print(current_sleep_habit + '\\t\\t' + current_major)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Print out every students' age in 10 years\n", + "\n", + "Fix the bug in the code below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(...):\n", + " current_age = cs220_data[i][2]\n", + " print(current_age + 10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## It would be nice to have a helper function!\n", + "Let's introduce `cell`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Remember creating `cs220_header`?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cs220_header" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Get the column index of `\"Pizza topping\"`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cs220_header.index(...)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 3: Create a `cell` function\n", + "\n", + "We want to invoke something like...\n", + "* `cell(24, \"Pet owner\")`\n", + "* `cell(63, \"Zip Code\")`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def cell(row_idx, col_name):\n", + " col_idx = ... # get the index of col_name\n", + " val = ... # get the value of cs220_data at the specified cell\n", + " return val" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Print out the lecture number of the 4th student using the `cell` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cell(..., ...)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Print out every student's sleep habits and major using the `cell` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(len(cs220_data)):\n", + " current_sleep_habit = ...\n", + " current_major = ...\n", + " print(current_sleep_habit + '\\t\\t' + current_major)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Print out every students' age in 10 years using the `cell` function\n", + "\n", + "This does not quite work. Fix this code:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(len(cs220_data)):\n", + " current_age = cell(i, \"Age\")\n", + " if current_age != None:\n", + " print(current_age + 10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 4: Improve the `cell` function \n", + "\n", + "Improve the `cell` function so it returns the appropriate type. If there is **nothing** in the cell, return `None`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def cell(row_idx, col_name):\n", + " col_idx = cs220_header.index(col_name)\n", + " val = cs220_data[row_idx][col_idx]\n", + " if ...:\n", + " return None\n", + " elif ...:\n", + " return int(val)\n", + " else:\n", + " return val" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Print out every student's sleep habits and major using the `cell` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(len(cs220_data)):\n", + " current_age = cell(i, \"Age\")\n", + " if current_age != None:\n", + " print(current_age + 10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 5: Get the average age of each lecture" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "students_lec_001 = []\n", + "students_lec_002 = []\n", + "students_lec_003 = []\n", + "students_lec_004 = []\n", + "students_lec_005 = []\n", + "for i in range(len(cs220_data)):\n", + " current_lec = ...\n", + " current_age = ...\n", + " if ...: # TODO: check for missing data\n", + " continue\n", + " if current_lec == \"LEC001\":\n", + " students_lec_001.append(current_age)\n", + " elif current_lec == \"LEC002\":\n", + " students_lec_002.append(current_age)\n", + " elif current_lec == \"LEC003\":\n", + " students_lec_003.append(current_age)\n", + " elif current_lec == \"LEC004\":\n", + " students_lec_004.append(current_age)\n", + " elif current_lec == \"LEC005\":\n", + " students_lec_005.append(current_age)\n", + " \n", + "print(\"Average age for LEC001 is\", round(sum(students_lec_001) / len(students_lec_001), 2))\n", + "print(\"Average age for LEC002 is\", round(sum(students_lec_002) / len(students_lec_002), 2))\n", + "print(\"Average age for LEC003 is\", round(sum(students_lec_003) / len(students_lec_003), 2))\n", + "print(\"Average age for LEC004 is\", round(sum(students_lec_004) / len(students_lec_004), 2))\n", + "print(\"Average age for LEC005 is\", round(sum(students_lec_005) / len(students_lec_005), 2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Bonus challenge: Can you do this with a little less hardcoding?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lectures_of_ages = [\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " []\n", + "]\n", + "for i in range(len(cs220_data)):\n", + " current_lec = int(cell(i, \"Lecture\")[-1]) - 1 # Will be a number 0 - 4\n", + " current_age = cell(i, \"Age\")\n", + " if current_age != None:\n", + " lectures_of_ages[current_lec].append(current_age)\n", + "\n", + "# TODO: Print the average ages" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 6: What are the unique ages for each lecture?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(len(lectures_of_ages)):\n", + " ages_of_lecture_i = ...\n", + " unique_ages = ...\n", + " print(unique_ages)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## You try!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Complete the challenges below. First try completing the problem directly using the list of lists (e.g. double indexing `[][]`), then try using the `cell` function!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercise 1: Of all runners, how many are procrastinators? " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercise 2: What percentage of 18-year-olds have their major declared as \"Other\"?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercise 3: Does the oldest basil/spinach-loving Business major prefer cats, dogs, or neither?" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}