Skip to content
Snippets Groups Projects
Commit eb6334f2 authored by Andy Kuemmel's avatar Andy Kuemmel
Browse files

Update...

Update f22/andy_lec_notes/lec17_Oct17_Dictionaries1/lec17_dictionaries1_template.ipynb, f22/andy_lec_notes/lec17_Oct17_Dictionaries1/amfam_survey_data.csv
parent 091ef0e3
No related branches found
No related tags found
No related merge requests found
pizza topping,state,years,sleep preference,month,pets,lat-long
mushroom,Florida,7,early bird,March,,"30.263214888389417, -81.54792098150529"
pineapple,Wisconsin,4,night owl,April,other,"43.1581437, -89.2921125"
sausage,Wisconsin,10,early bird,July,other,"43.15645, -89.28814"
pepperoni,WI,7,no preference,September,"dog,cat","43.073051, -89.401230"
mushroom,madison,7,early bird,November,,
pepperoni,FL,1,no preference,December,dog,"42.35623761108948, -71.05691488946681"
pepperoni,Wisconsin,2,night owl,February,,"43.159045128642774, -89.29146323507756"
mushroom,Florida,0.5,night owl,May,other,"43.160601, -89.287671"
mushroom,Wisconsin,10,no preference,January,"dog,fish","43.1562216,-89.2880086"
pineapple,Wisconsin,8,night owl,July,dog,"43.158655, -89.289895"
sausage,Minnesota,15,no preference,August,"dog,cat","45.13881645889933, -93.47636590830673"
pepperoni,New Jersey,1,night owl,May,other,"43.07148896663423, -89.40567798752735"
basil,Rhode Island,1,night owl,March,dog,"43.156490793353775, -89.28796434617352"
mushroom,TX,1,no preference,January,dog,
pineapple,Florida,3,early bird,July,other,"27.979191147972834, -82.33356380365498"
sausage,Wisconsin,0,early bird,December,"dog,cat","43.15631441766965, -89.28785659081201"
pineapple,Wisconsin,6,no preference,June,dog,"43.157716440341964, -89.28939262164963"
mushroom,Florida,7,no preference,July,other,"30.053546, -81.514610"
sausage,Florida,3,early bird,January,"dog,fish","30.263357, -81.547884"
mac&cheese,Wisconsin,5,night owl,July,dog,"43.158328032172754, -89.28946714938327"
pepperoni,Wisconsin,10,early bird,April,other,"43.1884213,-89.2762121"
other,Wisconsin,10,early bird,August,other,"43.15833, -89.28988"
sausage,WI,14,night owl,September,"dog,cat","43.15733597381252, -89.29013010509833"
sausage,Wisconsin,6,no preference,August,"dog,cat","43.159061371631616, -89.29141118826759"
pepperoni,Wisconsin,8,early bird,September,"dog,cat,fish",43.158359 -89.289972
pineapple,Florida,8,night owl,October,,"30.263432655702932, -81.54807118535949"
pineapple,TX,4,night owl,October,dog,"42.3558293029345, -71.05683171712127"
other,WI,2,early bird,June,,
mushroom,Wisconsin,20,early bird,September,dog,"43.15826500058843, -89.28945716165009"
sausage,Wisconsin,8,night owl,June,dog,"43.15839022178169, -89.28998287477457"
sausage,Wisconsin,20,night owl,April,bird,"43.15648555750267, -89.28783647996661"
pineapple,Texas,0.5,early bird,August,other,"43, 89"
\ No newline at end of file
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import csv import csv
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Warmup 1: Read in the file 'cs220_survey_data.csv' into a lists of lists # Warmup 1: Read in the file 'afmfam_survey_data.csv' into a lists of lists
# source: Automate the Boring Stuff with Python Ch 12 # source: Automate the Boring Stuff with Python Ch 12
def process_csv(filename): def process_csv(filename):
exampleFile = open(filename, encoding="utf-8") exampleFile = open(filename, encoding="utf-8")
exampleReader = csv.reader(exampleFile) exampleReader = csv.reader(exampleFile)
exampleData = list(exampleReader) exampleData = list(exampleReader)
exampleFile.close() exampleFile.close()
return exampleData return exampleData
survey_data = None # change this survey_data = None # change this
# show the length of this list of lists # show the length of this list of lists
``` ```
%% Output
721
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Warmup 2: store the first row in a variable called header # Warmup 2: store the first row in a variable called header
header = None header = None
header header
``` ```
%% Output
['Lecture',
'Age',
'Primary major',
'Other majors',
'Zip Code',
'Pizza topping',
'Pet owner',
'Runner',
'Sleep habit',
'Procrastinator']
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Warmup 3: store the rest of the data in a variable called rows # Warmup 3: store the rest of the data in a variable called rows
rows = None rows = None
rows
# show the first 3 rows
``` ```
%% Output %% Output
[['LEC001', ---------------------------------------------------------------------------
'19', TypeError Traceback (most recent call last)
'Business: Other', Input In [5], in <cell line: 3>()
'', 1 # Warmup 3: store the rest of the data in a variable called rows
'53706', 2 rows = None
'pepperoni', ----> 3 rows[0]
'Yes', TypeError: 'NoneType' object is not subscriptable
'No',
'early bird',
'Yes'],
['LEC001',
'21',
'Other',
'Economics/Philosophy, Data Science Certificate',
'53703',
'pepperoni',
'Yes',
'No',
'no preference',
'Yes'],
['LEC003',
'19',
'Computer Science',
'Data science',
'53706',
'pineapple',
'Yes',
'Yes',
'night owl',
'Yes']]
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Warmup 4: show the last 3 rows of data # Warmup 4: show the last 3 rows of data
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Warmup 5: Write a function that counts the frequency of a value in a column # Warmup 5: Write a function that counts the frequency of a value in a column
def count_col_frequency(value, col_name): def count_col_frequency(value, col_name):
''' returns the frequency of value in col_name ''' ''' returns the frequency of value in col_name '''
count = 0
for row in rows: #Hint: use if row[header.index(col_name)] == value:
if row[header.index(col_name)] == value: return None
count += 1
return count
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#test your function #test your function
count_col_frequency("pineapple", "Pizza topping") count_col_frequency("pineapple", "Pizza topping")
``` ```
%% Output %% Output
80 80
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Warmup 6: Think about it: Is there an easy way to count *every* topping frequency? # Warmup 6: Think about it: Is there an easy way to count *every* topping frequency?
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## March 7: Dictionaries ## Lecture 17: Dictionaries
Learning Objectives: Learning Objectives:
- Use correct dictionary syntax - Use correct dictionary syntax
- to create a dictionary using either {} or dict() - to create a dictionary using either {} or dict()
- to lookup, insert, update, and pop key/value pairs - to lookup, insert, update, and pop key/value pairs
- Use a for loop, the in operator, and common methods when working with dictionaries. - Use a for loop, the in operator, and common methods when working with dictionaries.
- Write code that uses a dictionary - Write code that uses a dictionary
- to store frequencies - to store frequencies
- to iterate through all key/value pairs - to iterate through all key/value pairs
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
As we are getting more sophisticated in this course, its time to define... As we are getting more sophisticated in this course, its time to define...
### Data Structure <br> ### Data Structure <br>
a data structure is a collection of data values, the relationships among them, and the functions or operations that can be applied to the data (Wikipedia) a data structure is a collection of data values, the relationships among them, and the functions or operations that can be applied to the data (Wikipedia)
Python contains built-in Data Structures called Collections Python contains built-in Data Structures called Collections
![collections.png](attachment:collections.png) ![collections.png](attachment:collections.png)
Today you will learn how store data in Dictionaries. Today you will learn how store data in Dictionaries.
#### Dictionary <br> #### Dictionary <br>
A dictionary is like a list, but more general. In a list, the indices have to be integers; but a dictionary they can be any **immutable** type. A dictionary is like a list, but more general. In a list, the indices have to be integers; but a dictionary they can be any **immutable** type.
You can think of a dictionary as a mapping between a set of indices (which are called keys) and a set of values. Each key maps to a value. The association of a key and a value is called a key-value pair or sometimes an item. You can think of a dictionary as a mapping between a set of indices (which are called keys) and a set of values. Each key maps to a value. The association of a key and a value is called a key-value pair or sometimes an item.
(from Think Python, Chapter 11) (from Think Python, Chapter 11)
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# a dictionary that stores prices of bakery items # a dictionary that stores prices of bakery items
# create a dictionary of key/value pairs # create a dictionary of key/value pairs
# notice the curly brackets # notice the curly brackets
# notice it can span over more than one line, indenting doesn't matter # notice it can span over more than one line, indenting doesn't matter
price_dict = { 'pie': 3.95, price_dict = { 'pie': 3.95,
'ala mode':1.50, 'ala mode':1.50,
'donut': 1.25, 'muffin': 2.25, 'donut': 1.25, 'muffin': 2.25,
'brownie': 3.15, 'brownie': 3.15,
'cookie': 0.79, 'milk':1.65, 'loaf': 5.99, 'cookie': 0.79, 'milk':1.65, 'loaf': 5.99,
'hot dog': 4.99} # feel free to add some of your own here 'hot dog': 4.99} # feel free to add some of your own here
price_dict price_dict
``` ```
%% Output %% Output
{'pie': 3.95, {'pie': 3.95,
'ala mode': 1.5, 'ala mode': 1.5,
'donut': 1.25, 'donut': 1.25,
'muffin': 2.25, 'muffin': 2.25,
'brownie': 3.15, 'brownie': 3.15,
'cookie': 0.79, 'cookie': 0.79,
'milk': 1.65, 'milk': 1.65,
'loaf': 5.99, 'loaf': 5.99,
'hot dog': 4.99} 'hot dog': 4.99}
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# print the length of the dictionary # print the length of the dictionary
print(price_dict) # number of key/value pairs print(price_dict) # number of key/value pairs
#get the price for a certain item #get the price for a certain item
print(price_dict) # name of dict [ key] print(...) # name of dict [ key]
#get the price for donut #get the price for donut
print(price_dict) print(...)
# what's wrong with this line? # what's wrong with this line?
# print(price_dict[1.25]) # print(price_dict[1.25])
``` ```
%% Output %% Output
9 9
5.99 5.99
1.25 1.25
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Dictionaries are Mutable ### Dictionaries are Mutable
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# add a new key/value pair using [ ] notation # add a new key/value pair using [ ] notation
# add one of your own # add one of your own
``` ```
%% Output %% Output
{'pie': 3.95, {'pie': 3.95,
'ala mode': 1.5, 'ala mode': 1.5,
'donut': 1.25, 'donut': 1.25,
'muffin': 2.25, 'muffin': 2.25,
'brownie': 3.15, 'brownie': 3.15,
'cookie': 0.79, 'cookie': 0.79,
'milk': 1.65, 'milk': 1.65,
'loaf': 5.99, 'loaf': 5.99,
'hot dog': 4.99, 'hot dog': 4.99,
'drink': 2.49} 'drink': 2.49}
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# change the value associated with a key....syntax is like add # change the value associated with a key....syntax is like add
# now you add one # now you add one
price_dict price_dict
``` ```
%% Output %% Output
{'pie': 3.95, {'pie': 3.95,
'ala mode': 1.5, 'ala mode': 1.5,
'donut': 1.25, 'donut': 1.25,
'muffin': 2.25, 'muffin': 2.25,
'brownie': 3.15, 'brownie': 3.15,
'cookie': 0.79, 'cookie': 0.79,
'milk': 1.65, 'milk': 1.65,
'loaf': 5.99, 'loaf': 5.99,
'hot dog': 4.99, 'hot dog': 4.99,
'drink': 2.99} 'drink': 2.99}
%% Cell type:markdown id: tags:
### The .pop() method removes a key/value pair
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# use pop to delete a key/value pair # use pop to delete a key/value pair
price_dict.pop('hot dog') # or del(price_dict['hot dog']) price_dict.pop('hot dog') # or del(price_dict['hot dog'])
# delete another key/value pair # delete another key/value pair
price_dict.pop() price_dict.pop()
# try deleting someting that is not there # try deleting someting that is not there
#price_dict.pop('pizza') #price_dict.pop('pizza')
# fix this with an if statement # fix this with an if statement
if 'pizza' in price_dict: if 'pizza' in price_dict:
price_dict.pop('pizza') price_dict.pop('pizza')
``` ```
%% Cell type:markdown id: tags:
### The .keys() and .values() methods return a list-like object
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# get all keys and convert to a list # get all keys and convert to a list
print(price_dict.keys()) print(price_dict.keys())
``` ```
%% Output %% Output
dict_keys(['pie', 'ala mode', 'donut', 'muffin', 'brownie', 'cookie', 'milk']) dict_keys(['pie', 'ala mode', 'donut', 'muffin', 'brownie', 'cookie', 'milk'])
['pie', 'ala mode', 'donut', 'muffin', 'brownie', 'cookie', 'milk'] ['pie', 'ala mode', 'donut', 'muffin', 'brownie', 'cookie', 'milk']
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# get all values and convert to a list # get all values and convert to a list
print(price_dict.values()) print(price_dict.values())
``` ```
%% Output %% Output
dict_values([3.95, 1.5, 1.25, 2.25, 3.15, 0.79, 1.65]) dict_values([3.95, 1.5, 1.25, 2.25, 3.15, 0.79, 1.65])
[3.95, 1.5, 1.25, 2.25, 3.15, 0.79, 1.65] [3.95, 1.5, 1.25, 2.25, 3.15, 0.79, 1.65]
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# use 'in' price_dict, price_dict.keys(), price_dict.values() # use 'in' price_dict, price_dict.keys(), price_dict.values()
#print('donut' in price_dict) # default is to check the keys #print('donut' in price_dict) # default is to check the keys
#print(9.95 in price_dict) # default is NOT values #print(9.95 in price_dict) # default is NOT values
#print('apple' in price_dict.keys()) # can call out the keys #print('apple' in price_dict.keys()) # can call out the keys
#print(3.95 in price_dict.values()) # can check the values #print(3.95 in price_dict.values()) # can check the values
``` ```
%% Output %% Output
True True
False False
False False
True True
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Applications # Applications
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Example 1: given a list of items, find the total cost of the order # Example 1: given a list of items, find the total cost of the order
order = ['pie', 'donut', 'milk', 'cookie', 'tofu'] # add more items to the order order = ['pie', 'donut', 'milk', 'cookie', 'tofu'] # add more items to the order
print(order) print(order)
total_cost = 0 total_cost = 0
for item in order: for item in order:
pass pass
# find the total of the items in the order # find the total of the items in the order
print ("Your total is ${:.2f}".format(total_cost)) print ("Your total is ${:.2f}".format(total_cost))
``` ```
%% Output %% Output
['pie', 'donut', 'milk', 'cookie', 'tofu'] ['pie', 'donut', 'milk', 'cookie', 'tofu']
tofu is not in the dictionary tofu is not in the dictionary
Your total is $7.64 Your total is $7.64
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Example 2a: find the frequency of characters in a sentence # Example 2a: find the frequency of characters in a sentence
# start with an empty dictionary # start with an empty dictionary
letter_freq = {} letter_freq = {}
# letter_freq = dict() # other way # letter_freq = dict() # other way
sentence = "Meet me at the bike racks after school at 3:30 today." sentence = "Meet me at the bike racks after school at 3:30 today."
for letter in sentence: for letter in sentence:
pass pass
print(letter_freq) print(letter_freq)
``` ```
%% Output %% Output
{'m': 2, 'e': 6, 't': 6, ' ': 10, 'a': 5, 'h': 2, 'b': 1, 'i': 1, 'k': 2, 'r': 2, 'c': 2, 's': 2, 'f': 1, 'o': 3, 'l': 1, '3': 2, ':': 1, '0': 1, 'd': 1, 'y': 1, '.': 1} {'m': 2, 'e': 6, 't': 6, ' ': 10, 'a': 5, 'h': 2, 'b': 1, 'i': 1, 'k': 2, 'r': 2, 'c': 2, 's': 2, 'f': 1, 'o': 3, 'l': 1, '3': 2, ':': 1, '0': 1, 'd': 1, 'y': 1, '.': 1}
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Example 2b: find the letter that occurred the most # Example 2b: find the letter that occurred the most
most_used_key = None most_used_key = None
max_value = 0 max_value = 0
for key in letter_freq: for key in letter_freq:
pass pass
print("the character {} appeared {} times".format(str(most_used_key), max_value)) print("the character {} appeared {} times".format(str(most_used_key), max_value))
``` ```
%% Output %% Output
the character appeared 10 times the character appeared 10 times
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# think about it...why did we use None ? # think about it...why did we use None ?
# why use 0 ? # why use 0 ?
# why not use for i in range? # why not use for i in range?
for i in range (len(letter_freq)): for i in range (len(letter_freq)):
print (i) print (i)
``` ```
%% Output %% Output
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
NameError Traceback (most recent call last) NameError Traceback (most recent call last)
<ipython-input-18-51a323b4acb5> in <module> <ipython-input-18-51a323b4acb5> in <module>
3 # why not use for i in range? 3 # why not use for i in range?
4 4
----> 5 for i in range (len(letter_freq)): ----> 5 for i in range (len(letter_freq)):
6 print (i) 6 print (i)
7 7
NameError: name 'letter_freq' is not defined NameError: name 'letter_freq' is not defined
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Recall: survey data # Recall: survey data
# rows # rows
rows[-1] rows[-1]
``` ```
%% Output %% Output
['LEC003', ['LEC003',
'19', '19',
'Computer Science', 'Computer Science',
'Data science', 'Data science',
'53706', '53706',
'pineapple', 'pineapple',
'Yes', 'Yes',
'Yes', 'Yes',
'night owl', 'night owl',
'Yes'] 'Yes']
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Example 3a. Same as 2a, but use the survey_data # Example 3a. Same as 2a, but use the survey_data for pizza topping
major_freq = dict() # another way to make a dictionary major_freq = dict() # another way to make a dictionary
major_freq major_freq
``` ```
%% Output %% Output
{} {}
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Example 3b: use the algorithm from 2b to find the major with the highest frequency # Example 3b: use the algorithm from 2b to find the pizza topping with the highest frequency
# find the frequency of each major # find the frequency of each major
print("the major {} appeared {} times".format(str(most_used_key), max_value)) print("the pizza topping {} appeared {} times".format(str(most_used_key), max_value))
``` ```
%% Output %% Output
the major Engineering appeared 328 times ---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Input In [6], in <cell line: 6>()
1 # Example 3b: use the algorithm from 2b to find the pizza topping with the highest frequency
2
3 # find the frequency of each major
----> 6 print("the pizza topping {} appeared {} times".format(str(most_used_key), max_value))
NameError: name 'most_used_key' is not defined
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
![Screen%20Shot%202022-03-07%20at%208.32.53%20AM.png](attachment:Screen%20Shot%202022-03-07%20at%208.32.53%20AM.png) ![Screen%20Shot%202022-03-07%20at%208.32.53%20AM.png](attachment:Screen%20Shot%202022-03-07%20at%208.32.53%20AM.png)
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### After Lecture ### After Lecture
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# 1. Read the slides to learn how dictionaries relate to lists and sets. # 1. Read the slides to learn how dictionaries relate to lists and sets.
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# 2. Review this summary of common dictionary methods...do not need to memorize # 2. Review this summary of common dictionary methods...do not need to memorize
# https://www.w3schools.com/python/python_ref_dictionary.asp # https://www.w3schools.com/python/python_ref_dictionary.asp
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment