From a58c5160b1df3fde7bb3a61efe0fb488d48a4f1e Mon Sep 17 00:00:00 2001
From: gsingh58 <gurmail-singh@wisc.edu>
Date: Tue, 6 Feb 2024 08:16:41 -0600
Subject: [PATCH] lec5 notes updated

---
 lecture_material/05-oop1/solution.ipynb       | 1981 +++++++++++++++++
 .../05-oop1/template_lec_001.ipynb            | 1157 ++++++++++
 .../05-oop1/template_lec_002.ipynb            | 1157 ++++++++++
 3 files changed, 4295 insertions(+)
 create mode 100644 lecture_material/05-oop1/solution.ipynb
 create mode 100644 lecture_material/05-oop1/template_lec_001.ipynb
 create mode 100644 lecture_material/05-oop1/template_lec_002.ipynb

diff --git a/lecture_material/05-oop1/solution.ipynb b/lecture_material/05-oop1/solution.ipynb
new file mode 100644
index 0000000..f0fde29
--- /dev/null
+++ b/lecture_material/05-oop1/solution.ipynb
@@ -0,0 +1,1981 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "d617eefb",
+   "metadata": {},
+   "source": [
+    "# Performance 3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "783117c5-146f-454a-963e-ed2873b8a6d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# known import statements\n",
+    "import pandas as pd\n",
+    "import csv\n",
+    "from subprocess import check_output\n",
+    "\n",
+    "# new import statements\n",
+    "import zipfile\n",
+    "from io import TextIOWrapper"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4e2be82d",
+   "metadata": {},
+   "source": [
+    "### Let's take a look at the files inside the current working directory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "4eaa8a8d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['total 21M',\n",
+       " 'drwxrwxr-x 2 gurmail.singh gurmail.singh 4.0K Jan 25 20:58 01-repro1',\n",
+       " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Jan 25 21:24 02-repro2',\n",
+       " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Feb  1 20:12 03-performance1',\n",
+       " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Feb  5 22:48 04-performance2',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 7.7K Jan 30 20:12 Untitled.ipynb',\n",
+       " 'drwxrwxr-x 2 gurmail.singh gurmail.singh 4.0K Jan 27 10:17 img',\n",
+       " '-rw------- 1 gurmail.singh gurmail.singh  21K Feb  6 13:55 nohup.out',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 6.6K Jan 30 19:16 out.mp4',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 122K Feb  5 21:27 reading1.ipynb',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh  52K Feb  6 13:54 solution.ipynb',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh  29K Jan 30 14:26 template_lec_001.ipynb',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh  21M Feb  6 13:48 wi.zip',\n",
+       " '']"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b8c7dc7f",
+   "metadata": {},
+   "source": [
+    "### Let's `unzip` \"wi.zip\"."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "ed32cf4c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "b'Archive:  wi.zip\\n  inflating: wi.csv                  \\n'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "check_output([\"unzip\", \"wi.zip\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4eac1b48",
+   "metadata": {},
+   "source": [
+    "### Let's take a look at the files inside the current working directory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "a6852e43",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['total 198M',\n",
+       " 'drwxrwxr-x 2 gurmail.singh gurmail.singh 4.0K Jan 25 20:58 01-repro1',\n",
+       " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Jan 25 21:24 02-repro2',\n",
+       " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Feb  1 20:12 03-performance1',\n",
+       " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Feb  5 22:48 04-performance2',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 7.7K Jan 30 20:12 Untitled.ipynb',\n",
+       " 'drwxrwxr-x 2 gurmail.singh gurmail.singh 4.0K Jan 27 10:17 img',\n",
+       " '-rw------- 1 gurmail.singh gurmail.singh  21K Feb  6 13:55 nohup.out',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 6.6K Jan 30 19:16 out.mp4',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 122K Feb  5 21:27 reading1.ipynb',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh  52K Feb  6 13:54 solution.ipynb',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh  29K Jan 30 14:26 template_lec_001.ipynb',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 177M Jan 14  2022 wi.csv',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh  21M Feb  6 13:48 wi.zip',\n",
+       " '']"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8ba94151",
+   "metadata": {},
+   "source": [
+    "### Traditional way of reading data using pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "529a4bd2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_12578/3756477020.py:1: DtypeWarning: Columns (22,23,24,26,27,28,29,30,31,32,33,38,43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+      "  df = pd.read_csv(\"wi.csv\")\n"
+     ]
+    }
+   ],
+   "source": [
+    "df = pd.read_csv(\"wi.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "570485b8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>activity_year</th>\n",
+       "      <th>lei</th>\n",
+       "      <th>derived_msa-md</th>\n",
+       "      <th>state_code</th>\n",
+       "      <th>county_code</th>\n",
+       "      <th>census_tract</th>\n",
+       "      <th>conforming_loan_limit</th>\n",
+       "      <th>derived_loan_product_type</th>\n",
+       "      <th>derived_dwelling_category</th>\n",
+       "      <th>derived_ethnicity</th>\n",
+       "      <th>...</th>\n",
+       "      <th>denial_reason-2</th>\n",
+       "      <th>denial_reason-3</th>\n",
+       "      <th>denial_reason-4</th>\n",
+       "      <th>tract_population</th>\n",
+       "      <th>tract_minority_population_percent</th>\n",
+       "      <th>ffiec_msa_md_median_family_income</th>\n",
+       "      <th>tract_to_msa_income_percentage</th>\n",
+       "      <th>tract_owner_occupied_units</th>\n",
+       "      <th>tract_one_to_four_family_homes</th>\n",
+       "      <th>tract_median_age_of_housing_units</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2020</td>\n",
+       "      <td>549300FX7K8PTEQUU487</td>\n",
+       "      <td>31540</td>\n",
+       "      <td>WI</td>\n",
+       "      <td>55025.0</td>\n",
+       "      <td>5.502500e+10</td>\n",
+       "      <td>C</td>\n",
+       "      <td>Conventional:First Lien</td>\n",
+       "      <td>Single Family (1-4 Units):Site-Built</td>\n",
+       "      <td>Not Hispanic or Latino</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3572</td>\n",
+       "      <td>41.15</td>\n",
+       "      <td>96600</td>\n",
+       "      <td>64</td>\n",
+       "      <td>812</td>\n",
+       "      <td>910</td>\n",
+       "      <td>45</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2020</td>\n",
+       "      <td>549300FX7K8PTEQUU487</td>\n",
+       "      <td>99999</td>\n",
+       "      <td>WI</td>\n",
+       "      <td>55013.0</td>\n",
+       "      <td>5.501397e+10</td>\n",
+       "      <td>C</td>\n",
+       "      <td>Conventional:First Lien</td>\n",
+       "      <td>Single Family (1-4 Units):Site-Built</td>\n",
+       "      <td>Not Hispanic or Latino</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2333</td>\n",
+       "      <td>9.90</td>\n",
+       "      <td>68000</td>\n",
+       "      <td>87</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>2717</td>\n",
+       "      <td>34</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2020</td>\n",
+       "      <td>549300FX7K8PTEQUU487</td>\n",
+       "      <td>99999</td>\n",
+       "      <td>WI</td>\n",
+       "      <td>55127.0</td>\n",
+       "      <td>5.512700e+10</td>\n",
+       "      <td>C</td>\n",
+       "      <td>VA:First Lien</td>\n",
+       "      <td>Single Family (1-4 Units):Site-Built</td>\n",
+       "      <td>Not Hispanic or Latino</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>5943</td>\n",
+       "      <td>13.26</td>\n",
+       "      <td>68000</td>\n",
+       "      <td>104</td>\n",
+       "      <td>1394</td>\n",
+       "      <td>1856</td>\n",
+       "      <td>44</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2020</td>\n",
+       "      <td>549300FX7K8PTEQUU487</td>\n",
+       "      <td>99999</td>\n",
+       "      <td>WI</td>\n",
+       "      <td>55127.0</td>\n",
+       "      <td>5.512700e+10</td>\n",
+       "      <td>C</td>\n",
+       "      <td>Conventional:Subordinate Lien</td>\n",
+       "      <td>Single Family (1-4 Units):Site-Built</td>\n",
+       "      <td>Ethnicity Not Available</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>5650</td>\n",
+       "      <td>7.63</td>\n",
+       "      <td>68000</td>\n",
+       "      <td>124</td>\n",
+       "      <td>1712</td>\n",
+       "      <td>2104</td>\n",
+       "      <td>36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2020</td>\n",
+       "      <td>549300FX7K8PTEQUU487</td>\n",
+       "      <td>33460</td>\n",
+       "      <td>WI</td>\n",
+       "      <td>55109.0</td>\n",
+       "      <td>5.510912e+10</td>\n",
+       "      <td>C</td>\n",
+       "      <td>VA:First Lien</td>\n",
+       "      <td>Single Family (1-4 Units):Site-Built</td>\n",
+       "      <td>Not Hispanic or Latino</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>7210</td>\n",
+       "      <td>4.36</td>\n",
+       "      <td>97300</td>\n",
+       "      <td>96</td>\n",
+       "      <td>2101</td>\n",
+       "      <td>2566</td>\n",
+       "      <td>22</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 99 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   activity_year                   lei  derived_msa-md state_code  \\\n",
+       "0           2020  549300FX7K8PTEQUU487           31540         WI   \n",
+       "1           2020  549300FX7K8PTEQUU487           99999         WI   \n",
+       "2           2020  549300FX7K8PTEQUU487           99999         WI   \n",
+       "3           2020  549300FX7K8PTEQUU487           99999         WI   \n",
+       "4           2020  549300FX7K8PTEQUU487           33460         WI   \n",
+       "\n",
+       "   county_code  census_tract conforming_loan_limit  \\\n",
+       "0      55025.0  5.502500e+10                     C   \n",
+       "1      55013.0  5.501397e+10                     C   \n",
+       "2      55127.0  5.512700e+10                     C   \n",
+       "3      55127.0  5.512700e+10                     C   \n",
+       "4      55109.0  5.510912e+10                     C   \n",
+       "\n",
+       "       derived_loan_product_type             derived_dwelling_category  \\\n",
+       "0        Conventional:First Lien  Single Family (1-4 Units):Site-Built   \n",
+       "1        Conventional:First Lien  Single Family (1-4 Units):Site-Built   \n",
+       "2                  VA:First Lien  Single Family (1-4 Units):Site-Built   \n",
+       "3  Conventional:Subordinate Lien  Single Family (1-4 Units):Site-Built   \n",
+       "4                  VA:First Lien  Single Family (1-4 Units):Site-Built   \n",
+       "\n",
+       "         derived_ethnicity  ... denial_reason-2 denial_reason-3  \\\n",
+       "0   Not Hispanic or Latino  ...             NaN             NaN   \n",
+       "1   Not Hispanic or Latino  ...             NaN             NaN   \n",
+       "2   Not Hispanic or Latino  ...             NaN             NaN   \n",
+       "3  Ethnicity Not Available  ...             NaN             NaN   \n",
+       "4   Not Hispanic or Latino  ...             NaN             NaN   \n",
+       "\n",
+       "   denial_reason-4  tract_population  tract_minority_population_percent  \\\n",
+       "0              NaN              3572                              41.15   \n",
+       "1              NaN              2333                               9.90   \n",
+       "2              NaN              5943                              13.26   \n",
+       "3              NaN              5650                               7.63   \n",
+       "4              NaN              7210                               4.36   \n",
+       "\n",
+       "   ffiec_msa_md_median_family_income  tract_to_msa_income_percentage  \\\n",
+       "0                              96600                              64   \n",
+       "1                              68000                              87   \n",
+       "2                              68000                             104   \n",
+       "3                              68000                             124   \n",
+       "4                              97300                              96   \n",
+       "\n",
+       "   tract_owner_occupied_units  tract_one_to_four_family_homes  \\\n",
+       "0                         812                             910   \n",
+       "1                        1000                            2717   \n",
+       "2                        1394                            1856   \n",
+       "3                        1712                            2104   \n",
+       "4                        2101                            2566   \n",
+       "\n",
+       "   tract_median_age_of_housing_units  \n",
+       "0                                 45  \n",
+       "1                                 34  \n",
+       "2                                 44  \n",
+       "3                                 36  \n",
+       "4                                 22  \n",
+       "\n",
+       "[5 rows x 99 columns]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head(5) # Top 5 rows within the DataFrame"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bad7dce4",
+   "metadata": {},
+   "source": [
+    "### How can we see all the column names?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "d0a98751",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['activity_year', 'lei', 'derived_msa-md', 'state_code', 'county_code',\n",
+       "       'census_tract', 'conforming_loan_limit', 'derived_loan_product_type',\n",
+       "       'derived_dwelling_category', 'derived_ethnicity', 'derived_race',\n",
+       "       'derived_sex', 'action_taken', 'purchaser_type', 'preapproval',\n",
+       "       'loan_type', 'loan_purpose', 'lien_status', 'reverse_mortgage',\n",
+       "       'open-end_line_of_credit', 'business_or_commercial_purpose',\n",
+       "       'loan_amount', 'loan_to_value_ratio', 'interest_rate', 'rate_spread',\n",
+       "       'hoepa_status', 'total_loan_costs', 'total_points_and_fees',\n",
+       "       'origination_charges', 'discount_points', 'lender_credits', 'loan_term',\n",
+       "       'prepayment_penalty_term', 'intro_rate_period', 'negative_amortization',\n",
+       "       'interest_only_payment', 'balloon_payment',\n",
+       "       'other_nonamortizing_features', 'property_value', 'construction_method',\n",
+       "       'occupancy_type', 'manufactured_home_secured_property_type',\n",
+       "       'manufactured_home_land_property_interest', 'total_units',\n",
+       "       'multifamily_affordable_units', 'income', 'debt_to_income_ratio',\n",
+       "       'applicant_credit_score_type', 'co-applicant_credit_score_type',\n",
+       "       'applicant_ethnicity-1', 'applicant_ethnicity-2',\n",
+       "       'applicant_ethnicity-3', 'applicant_ethnicity-4',\n",
+       "       'applicant_ethnicity-5', 'co-applicant_ethnicity-1',\n",
+       "       'co-applicant_ethnicity-2', 'co-applicant_ethnicity-3',\n",
+       "       'co-applicant_ethnicity-4', 'co-applicant_ethnicity-5',\n",
+       "       'applicant_ethnicity_observed', 'co-applicant_ethnicity_observed',\n",
+       "       'applicant_race-1', 'applicant_race-2', 'applicant_race-3',\n",
+       "       'applicant_race-4', 'applicant_race-5', 'co-applicant_race-1',\n",
+       "       'co-applicant_race-2', 'co-applicant_race-3', 'co-applicant_race-4',\n",
+       "       'co-applicant_race-5', 'applicant_race_observed',\n",
+       "       'co-applicant_race_observed', 'applicant_sex', 'co-applicant_sex',\n",
+       "       'applicant_sex_observed', 'co-applicant_sex_observed', 'applicant_age',\n",
+       "       'co-applicant_age', 'applicant_age_above_62',\n",
+       "       'co-applicant_age_above_62', 'submission_of_application',\n",
+       "       'initially_payable_to_institution', 'aus-1', 'aus-2', 'aus-3', 'aus-4',\n",
+       "       'aus-5', 'denial_reason-1', 'denial_reason-2', 'denial_reason-3',\n",
+       "       'denial_reason-4', 'tract_population',\n",
+       "       'tract_minority_population_percent',\n",
+       "       'ffiec_msa_md_median_family_income', 'tract_to_msa_income_percentage',\n",
+       "       'tract_owner_occupied_units', 'tract_one_to_four_family_homes',\n",
+       "       'tract_median_age_of_housing_units'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "26311838",
+   "metadata": {},
+   "source": [
+    "### How to extract `interest_rate`?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "c4bae34a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0          NaN\n",
+       "1          3.0\n",
+       "2          NaN\n",
+       "3         3.75\n",
+       "4          2.5\n",
+       "          ... \n",
+       "468267    2.25\n",
+       "468268     2.5\n",
+       "468269    4.25\n",
+       "468270     NaN\n",
+       "468271     NaN\n",
+       "Name: interest_rate, Length: 468272, dtype: object"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df[\"interest_rate\"] # observe that there are missing values"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "148b1243",
+   "metadata": {},
+   "source": [
+    "### How to count unique values in a column `Series`?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "f310b537",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "interest_rate\n",
+       "Exempt    37959\n",
+       "3.0       22584\n",
+       "2.75      22480\n",
+       "3.25      21343\n",
+       "2.875     21201\n",
+       "          ...  \n",
+       "3.023         1\n",
+       "2.632         1\n",
+       "3.345         1\n",
+       "3.364         1\n",
+       "2.32          1\n",
+       "Name: count, Length: 2080, dtype: int64"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df[\"interest_rate\"].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e0da9824",
+   "metadata": {},
+   "source": [
+    "### Let's eliminiate the strings (Exempt) and missing values (NaN).\n",
+    "Let's try `pd.to_numeric(...)`. We need a way to specify that strings need to be converted into NaN values."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "3beaae6e",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "Unable to parse string \"Exempt\" at position 1505",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "File \u001b[0;32mlib.pyx:2368\u001b[0m, in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mValueError\u001b[0m: Unable to parse string \"Exempt\"",
+      "\nDuring handling of the above exception, another exception occurred:\n",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[10], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_numeric\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minterest_rate\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m \n\u001b[1;32m      2\u001b[0m \u001b[38;5;66;03m# TODO: open the documentation and figure out what parameter will help us\u001b[39;00m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;66;03m# Recall that we can press shift + tab after a function name to open the documentation\u001b[39;00m\n",
+      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/tools/numeric.py:222\u001b[0m, in \u001b[0;36mto_numeric\u001b[0;34m(arg, errors, downcast, dtype_backend)\u001b[0m\n\u001b[1;32m    220\u001b[0m coerce_numeric \u001b[38;5;241m=\u001b[39m errors \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    221\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 222\u001b[0m     values, new_mask \u001b[38;5;241m=\u001b[39m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmaybe_convert_numeric\u001b[49m\u001b[43m(\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# type: ignore[call-overload]  # noqa: E501\u001b[39;49;00m\n\u001b[1;32m    223\u001b[0m \u001b[43m        \u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    224\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mset\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    225\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcoerce_numeric\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoerce_numeric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    226\u001b[0m \u001b[43m        \u001b[49m\u001b[43mconvert_to_masked_nullable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype_backend\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mno_default\u001b[49m\n\u001b[1;32m    227\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mvalues_dtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mStringDtype\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    228\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    229\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m):\n\u001b[1;32m    230\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
+      "File \u001b[0;32mlib.pyx:2410\u001b[0m, in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mValueError\u001b[0m: Unable to parse string \"Exempt\" at position 1505"
+     ]
+    }
+   ],
+   "source": [
+    "pd.to_numeric(df[\"interest_rate\"]) \n",
+    "# TODO: open the documentation and figure out what parameter will help us\n",
+    "# Recall that we can press shift + tab after a function name to open the documentation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "9c342dce",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0          NaN\n",
+       "1         3.00\n",
+       "2          NaN\n",
+       "3         3.75\n",
+       "4         2.50\n",
+       "          ... \n",
+       "468267    2.25\n",
+       "468268    2.50\n",
+       "468269    4.25\n",
+       "468270     NaN\n",
+       "468271     NaN\n",
+       "Name: interest_rate, Length: 468272, dtype: float64"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "546b218b",
+   "metadata": {},
+   "source": [
+    "### Let's drop the NaN values and compute average interest rate."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "e4f21269",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1         3.00\n",
+       "3         3.75\n",
+       "4         2.50\n",
+       "5         2.75\n",
+       "7         3.75\n",
+       "          ... \n",
+       "468265    4.50\n",
+       "468266    2.50\n",
+       "468267    2.25\n",
+       "468268    2.50\n",
+       "468269    4.25\n",
+       "Name: interest_rate, Length: 324658, dtype: float64"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\").dropna()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "113dd8a5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3.266264315063852"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\").dropna().mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8bfd7d99-805b-4843-a3cc-4109e403d1f1",
+   "metadata": {},
+   "source": [
+    "### Clearing memory using re-assignment.\n",
+    "In python, you can clear memory used up for an object simply by getting rid of all the active references. But we cannot do that for the current notebook because we used \"df\" to perform other operations, so there are more than one active references. In fact, we don't even have access to some of the active references. In that case, you can only free up the memory after you \"shutdown\" the current notebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "90a99ef9-6b88-4bc5-8b41-57bf64aab41a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = \"some string\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8b902599",
+   "metadata": {},
+   "source": [
+    "### How can we read the data without creating an uncompressed version called \"wi.csv\"?\n",
+    "\n",
+    "- Why would we want to do something like that?\n",
+    "    1. lower memory usage (we can try to load information on one loan at a time, instead of all the loans): that will still work for average interest rate computation\n",
+    "    2. lower storage usage (you can directly work with compressed data)\n",
+    "    \n",
+    "**IMPORTANT**: do not run this cell code unless you shutdown the notebook - your kernel will crash (you will run out of memory space)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4c59ae54",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# IMPORTANT: do not run this cell code unless you shutdown the notebook - your kernel will crash (you will run out of memory space)\n",
+    "f = open(\"wi.csv\")\n",
+    "# instead of passing relative path of file name, we can pass a file object instance reference\n",
+    "df = pd.read_csv(f) \n",
+    "f.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "36c1faa7-67f5-4135-9b89-1a5e897f5c68",
+   "metadata": {},
+   "source": [
+    "### Let's free up memory and delete \"wi.csv\"."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "4bd8134d-1dea-449d-bea6-34a62b6d38b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = \"some string\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "8aad54fc-eb70-4e7d-bcbc-334cb81d11ac",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['total 21M',\n",
+       " 'drwxrwxr-x 2 gurmail.singh gurmail.singh 4.0K Jan 25 20:58 01-repro1',\n",
+       " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Jan 25 21:24 02-repro2',\n",
+       " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Feb  1 20:12 03-performance1',\n",
+       " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Feb  5 22:48 04-performance2',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 7.7K Jan 30 20:12 Untitled.ipynb',\n",
+       " 'drwxrwxr-x 2 gurmail.singh gurmail.singh 4.0K Jan 27 10:17 img',\n",
+       " '-rw------- 1 gurmail.singh gurmail.singh  21K Feb  6 13:56 nohup.out',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 6.6K Jan 30 19:16 out.mp4',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 122K Feb  5 21:27 reading1.ipynb',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh  50K Feb  6 13:56 solution.ipynb',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh  29K Jan 30 14:26 template_lec_001.ipynb',\n",
+       " '-rw-rw-r-- 1 gurmail.singh gurmail.singh  21M Feb  6 13:48 wi.zip',\n",
+       " '']"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "check_output([\"rm\", \"wi.csv\"])\n",
+    "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "38c7d946",
+   "metadata": {},
+   "source": [
+    "### How can we read data directly from a zip file?\n",
+    "`zipfile.ZipFile(...)`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7d81fe7c",
+   "metadata": {},
+   "source": [
+    "### Goals:\n",
+    "1. directly access the data without decompressing: `zipfile.ZipFile(...)` - saves storage space by directly opening a zip file\n",
+    "2. only look at one row at a time: `csv.DictReader(...)` - saves memory space by enabling us to read one row at a time (as `dict`)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "2a158f48",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_12609/3313127204.py:5: DtypeWarning: Columns (22,23,24,26,27,28,29,30,31,32,33,38,43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+      "  df = pd.read_csv(f)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# code for goal 1\n",
+    "zf = zipfile.ZipFile(\"wi.zip\")\n",
+    "f = zf.open(\"wi.csv\")\n",
+    "\n",
+    "df = pd.read_csv(f) \n",
+    "\n",
+    "f.close()\n",
+    "zf.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "a5e7c032-0def-4eb2-b5b9-04f7240911f8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Free up the memory again\n",
+    "df = \"some string\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "4fddde38",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "Error",
+     "evalue": "iterator should return strings, not bytes (the file should be opened in text mode)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mError\u001b[0m                                     Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[8], line 7\u001b[0m\n\u001b[1;32m      3\u001b[0m f \u001b[38;5;241m=\u001b[39m zf\u001b[38;5;241m.\u001b[39mopen(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwi.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m      5\u001b[0m reader \u001b[38;5;241m=\u001b[39m csv\u001b[38;5;241m.\u001b[39mDictReader(f)\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m row \u001b[38;5;129;01min\u001b[39;00m reader:\n\u001b[1;32m      8\u001b[0m     \u001b[38;5;28mprint\u001b[39m(row)\n\u001b[1;32m      9\u001b[0m     \u001b[38;5;28;01mbreak\u001b[39;00m\n",
+      "File \u001b[0;32m/usr/lib/python3.10/csv.py:110\u001b[0m, in \u001b[0;36mDictReader.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    107\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__next__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m    108\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mline_num \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m    109\u001b[0m         \u001b[38;5;66;03m# Used only for its side effect.\u001b[39;00m\n\u001b[0;32m--> 110\u001b[0m         \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfieldnames\u001b[49m\n\u001b[1;32m    111\u001b[0m     row \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreader)\n\u001b[1;32m    112\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mline_num \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreader\u001b[38;5;241m.\u001b[39mline_num\n",
+      "File \u001b[0;32m/usr/lib/python3.10/csv.py:97\u001b[0m, in \u001b[0;36mDictReader.fieldnames\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     95\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fieldnames \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m     96\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 97\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fieldnames \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreader\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     98\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n\u001b[1;32m     99\u001b[0m         \u001b[38;5;28;01mpass\u001b[39;00m\n",
+      "\u001b[0;31mError\u001b[0m: iterator should return strings, not bytes (the file should be opened in text mode)"
+     ]
+    }
+   ],
+   "source": [
+    "# code for goal 2 & goal 1\n",
+    "zf = zipfile.ZipFile(\"wi.zip\")\n",
+    "f = zf.open(\"wi.csv\")\n",
+    "\n",
+    "reader = csv.DictReader(f)\n",
+    "\n",
+    "for row in reader:\n",
+    "    print(row)\n",
+    "    break\n",
+    "\n",
+    "f.close()\n",
+    "zf.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c1e9cfba",
+   "metadata": {},
+   "source": [
+    "### Let's learn more modes for `open` built-in function\n",
+    "- `open(..., mode=\"r\")`   => text (default)\n",
+    "- `open(..., mode=\"rb\")`  => bytes\n",
+    "- `zf.open(...)`          => always bytes\n",
+    "\n",
+    "With `zipfile` module there isn't a way for us to specify that we need text.\n",
+    "\n",
+    "### `TextIOWrapper` inside `io` module enables us to convert `bytes` into `str`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "6f52b0ae",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'activity_year': '2020', 'lei': '549300FX7K8PTEQUU487', 'derived_msa-md': '31540', 'state_code': 'WI', 'county_code': '55025', 'census_tract': '55025002402', 'conforming_loan_limit': 'C', 'derived_loan_product_type': 'Conventional:First Lien', 'derived_dwelling_category': 'Single Family (1-4 Units):Site-Built', 'derived_ethnicity': 'Not Hispanic or Latino', 'derived_race': 'White', 'derived_sex': 'Male', 'action_taken': '3', 'purchaser_type': '0', 'preapproval': '2', 'loan_type': '1', 'loan_purpose': '4', 'lien_status': '1', 'reverse_mortgage': '2', 'open-end_line_of_credit': '1', 'business_or_commercial_purpose': '2', 'loan_amount': '225000.0', 'loan_to_value_ratio': '78.671', 'interest_rate': 'NA', 'rate_spread': 'NA', 'hoepa_status': '3', 'total_loan_costs': 'NA', 'total_points_and_fees': 'NA', 'origination_charges': 'NA', 'discount_points': 'NA', 'lender_credits': 'NA', 'loan_term': '360', 'prepayment_penalty_term': 'NA', 'intro_rate_period': '1', 'negative_amortization': '2', 'interest_only_payment': '2', 'balloon_payment': '2', 'other_nonamortizing_features': '2', 'property_value': '285000', 'construction_method': '1', 'occupancy_type': '1', 'manufactured_home_secured_property_type': '3', 'manufactured_home_land_property_interest': '5', 'total_units': '1', 'multifamily_affordable_units': 'NA', 'income': '0', 'debt_to_income_ratio': '>60%', 'applicant_credit_score_type': '1', 'co-applicant_credit_score_type': '10', 'applicant_ethnicity-1': '2', 'applicant_ethnicity-2': '', 'applicant_ethnicity-3': '', 'applicant_ethnicity-4': '', 'applicant_ethnicity-5': '', 'co-applicant_ethnicity-1': '5', 'co-applicant_ethnicity-2': '', 'co-applicant_ethnicity-3': '', 'co-applicant_ethnicity-4': '', 'co-applicant_ethnicity-5': '', 'applicant_ethnicity_observed': '2', 'co-applicant_ethnicity_observed': '4', 'applicant_race-1': '5', 'applicant_race-2': '', 'applicant_race-3': '', 'applicant_race-4': '', 'applicant_race-5': '', 'co-applicant_race-1': '8', 'co-applicant_race-2': '', 'co-applicant_race-3': '', 'co-applicant_race-4': '', 'co-applicant_race-5': '', 'applicant_race_observed': '2', 'co-applicant_race_observed': '4', 'applicant_sex': '1', 'co-applicant_sex': '5', 'applicant_sex_observed': '2', 'co-applicant_sex_observed': '4', 'applicant_age': '55-64', 'co-applicant_age': '9999', 'applicant_age_above_62': 'Yes', 'co-applicant_age_above_62': 'NA', 'submission_of_application': '1', 'initially_payable_to_institution': '1', 'aus-1': '6', 'aus-2': '', 'aus-3': '', 'aus-4': '', 'aus-5': '', 'denial_reason-1': '1', 'denial_reason-2': '', 'denial_reason-3': '', 'denial_reason-4': '', 'tract_population': '3572', 'tract_minority_population_percent': '41.1499999999999986', 'ffiec_msa_md_median_family_income': '96600', 'tract_to_msa_income_percentage': '64', 'tract_owner_occupied_units': '812', 'tract_one_to_four_family_homes': '910', 'tract_median_age_of_housing_units': '45'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# code for goal 2 & goal 1\n",
+    "zf = zipfile.ZipFile(\"wi.zip\")\n",
+    "f = zf.open(\"wi.csv\")\n",
+    "\n",
+    "reader = csv.DictReader(TextIOWrapper(f))\n",
+    "\n",
+    "for row in reader:\n",
+    "    print(row)\n",
+    "    break\n",
+    "\n",
+    "f.close()\n",
+    "zf.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3f138285",
+   "metadata": {},
+   "source": [
+    "### Let's go back to calculating average interest rate.\n",
+    "- Algorithm / Pseudocode steps:\n",
+    "    1. print \"interest rate\" and type of \"interest rate\"\n",
+    "    2. convert \"interest rate\" into `float` - how can we handle errors? `try` ... `except` ... (*IMPORTANT*: always have your `except` block catch specific exceptions)\n",
+    "    3. calculate running total, count for each row of data\n",
+    "    4. calculate average"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "6be87e1a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3.266264315063054"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "zf = zipfile.ZipFile(\"wi.zip\")\n",
+    "f = zf.open(\"wi.csv\")\n",
+    "\n",
+    "total = 0\n",
+    "count = 0\n",
+    "\n",
+    "reader = csv.DictReader(TextIOWrapper(f))\n",
+    "\n",
+    "for row in reader:\n",
+    "    try:\n",
+    "        total += float(row[\"interest_rate\"])\n",
+    "        count += 1\n",
+    "    except ValueError:\n",
+    "        pass # do nothing\n",
+    "\n",
+    "f.close()\n",
+    "zf.close()\n",
+    "\n",
+    "total / count"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c000b405",
+   "metadata": {},
+   "source": [
+    "### Let's generalize the code to read \"interest rate\" into a function.\n",
+    "\n",
+    "- This does make things worse because we are going back to reading all the data before doing the computation.\n",
+    "- But this sets us up to learn about generators."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "aaf33408",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3.266264315063054"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def get_rates_v1():\n",
+    "    rates = []\n",
+    "    \n",
+    "    zf = zipfile.ZipFile(\"wi.zip\")\n",
+    "    f = zf.open(\"wi.csv\")\n",
+    "\n",
+    "    reader = csv.DictReader(TextIOWrapper(f))\n",
+    "    \n",
+    "    for row in reader:\n",
+    "        try:\n",
+    "            rates.append(float(row[\"interest_rate\"]))\n",
+    "        except ValueError:\n",
+    "            pass # do nothing\n",
+    "\n",
+    "    f.close()\n",
+    "    zf.close()\n",
+    "    \n",
+    "    return rates\n",
+    "\n",
+    "rates = get_rates_v1()\n",
+    "sum(rates) / len(rates)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6eced472",
+   "metadata": {},
+   "source": [
+    "### Using a generator\n",
+    "- `yield` each value\n",
+    "- use `next` to get the next value => internally `for` loop invokes `next` for each iteration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "22c8a41b-1356-4aad-8f92-c3db19ce4c03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_rates_v2():\n",
+    "    print(\"Starting generator\")\n",
+    "    \n",
+    "    zf = zipfile.ZipFile(\"wi.zip\")\n",
+    "    f = zf.open(\"wi.csv\")\n",
+    "    \n",
+    "    reader = csv.DictReader(TextIOWrapper(f))\n",
+    "    \n",
+    "    for row in reader:\n",
+    "        try:\n",
+    "            yield float(row[\"interest_rate\"])\n",
+    "        except ValueError:\n",
+    "            pass # do nothing\n",
+    "\n",
+    "    f.close()\n",
+    "    zf.close()\n",
+    "\n",
+    "rates = get_rates_v2()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "509a76db",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Starting generator\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "3.0"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "next(rates) # gives us the next value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "8d6fc162",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3.75"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "next(rates) # gives us the next value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "a863d383",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2.5"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "next(rates) # gives us the next value"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "69848d5a",
+   "metadata": {},
+   "source": [
+    "### Let's use `for` loop to keep getting all the rates.\n",
+    "\n",
+    "- `len` function doesn't work with generators\n",
+    "- indexing doesn't work with generators"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "d31f9f9f",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "object of type 'generator' has no len()",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mrates\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[0;31mTypeError\u001b[0m: object of type 'generator' has no len()"
+     ]
+    }
+   ],
+   "source": [
+    "len(rates)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "0c73ca83",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "'generator' object is not subscriptable",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[17], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mrates\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m4\u001b[39;49m\u001b[43m]\u001b[49m\n",
+      "\u001b[0;31mTypeError\u001b[0m: 'generator' object is not subscriptable"
+     ]
+    }
+   ],
+   "source": [
+    "rates[4]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "bdddd858",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Starting generator\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "3.266264315063054"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rates = get_rates_v2()\n",
+    "\n",
+    "total = 0\n",
+    "count = 0\n",
+    "\n",
+    "for rate in rates: # keeps calling next(rates) to get values from yield\n",
+    "    total += rate\n",
+    "    count += 1\n",
+    "    \n",
+    "total / count"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c93d951c",
+   "metadata": {},
+   "source": [
+    "This approach doesn't work for median calculation. Why? Remember we have to sort, so we need all values in memory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "ace73646-1b9c-45d6-881f-832a5e170766",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Starting generator\n"
+     ]
+    }
+   ],
+   "source": [
+    "rates = list(get_rates_v2())\n",
+    "rates.sort()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "18d452b1",
+   "metadata": {},
+   "source": [
+    "# OOP 1: Classes\n",
+    "\n",
+    "- Creating new types using classes\n",
+    "- Types have specific attributes and methods (special functions)\n",
+    "- Using new types (classes), we can create object instances of those types\n",
+    "- class creation and instantiation syntax: \n",
+    "```python\n",
+    "class Person:\n",
+    "    # some code\n",
+    "p1 = Person() # object instantiation using constructor\n",
+    "p2 = Person() # object instantiation using constructor\n",
+    "```\n",
+    "- attribute / method access syntax:\n",
+    "```python\n",
+    "p1.fname = \"...\" # attribute initialization\n",
+    "p1.lname = \"...\" # attribute initialization\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "350c45f3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p1 = {\"fname\": \"Bob\", \"lname\": \"Baker\"}\n",
+    "\n",
+    "p2 = dict()\n",
+    "p2[\"fname\"] = \"Cindy\"\n",
+    "p2[\"lname\"] = \"Cooper\"\n",
+    "\n",
+    "p3 = {\"Fname\": \"Alice\", \"lname\": \"Anderson\"}\n",
+    "\n",
+    "# TODO: Let's define a Person class\n",
+    "class Person:\n",
+    "    pass\n",
+    "\n",
+    "p4 = Person()\n",
+    "p4.fname = \"Meena\"\n",
+    "p4.lname = \"Syamkumar\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "691f9170",
+   "metadata": {},
+   "source": [
+    "### Let's create a `Dog` class."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "2b462cdd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Dog:\n",
+    "    pass # eventually we will learn how to write code inside a class"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a8a6311b",
+   "metadata": {},
+   "source": [
+    "### Let's create `Dog` object instances and add attributes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "5c65755d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dog1 = Dog()\n",
+    "dog1.name = \"Jimmy\"\n",
+    "dog1.age = 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "3a7a3c62",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dog2 = Dog()\n",
+    "dog2.name = \"Buster\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "244ddc14",
+   "metadata": {},
+   "source": [
+    "### Let's define a `speak` function that will make the `Dog` bark.\n",
+    "- Algorithm / pseudocode steps:\n",
+    "    1. puppies bark thrice (age < 2)\n",
+    "    2. dogs bark once"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fe66d607",
+   "metadata": {},
+   "source": [
+    "### `f-strings`\n",
+    "\n",
+    "- aka formatted string literals\n",
+    "- easier and quicker way of formatting `str` than `str.format(...)` method\n",
+    "\n",
+    "- Syntax: \n",
+    "```python\n",
+    "f\"{} ...\"\n",
+    "```\n",
+    "- inside `{}` you can specify a variable or even call a function or a method"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "a9e7f827",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def speak(dog):\n",
+    "    if dog.age < 2:\n",
+    "        print(f\"{dog.name}: bark bark bark!\")\n",
+    "    else:\n",
+    "        print(f\"{dog.name}: bark!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "33815738",
+   "metadata": {},
+   "source": [
+    "### Let's invoke `speak` for dog1 and dog2."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "d92fc8a4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Jimmy: bark!\n"
+     ]
+    }
+   ],
+   "source": [
+    "speak(dog1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "1de32f33",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "'Dog' object has no attribute 'age'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[26], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mspeak\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdog2\u001b[49m\u001b[43m)\u001b[49m\n",
+      "Cell \u001b[0;32mIn[24], line 2\u001b[0m, in \u001b[0;36mspeak\u001b[0;34m(dog)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mspeak\u001b[39m(dog):\n\u001b[0;32m----> 2\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mdog\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mage\u001b[49m \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[1;32m      3\u001b[0m         \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdog\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: bark bark bark!\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m      4\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "\u001b[0;31mAttributeError\u001b[0m: 'Dog' object has no attribute 'age'"
+     ]
+    }
+   ],
+   "source": [
+    "speak(dog2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4424e7cd",
+   "metadata": {},
+   "source": [
+    "### How can we standardize the attribute initialization to avoid bugs?\n",
+    "\n",
+    "- Eventually we will learn about how to define methods inside the class, which will include `__init__` method.\n",
+    "- For now, let's define an `init` function."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "9420fad8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def init(dog, name, how_old):\n",
+    "    dog.name = name\n",
+    "    dog.age = how_old"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "10397e6b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Spark: bark!\n"
+     ]
+    }
+   ],
+   "source": [
+    "dog2 = Dog()\n",
+    "init(dog2, \"Spark\", 10)\n",
+    "speak(dog2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "780546f0",
+   "metadata": {},
+   "source": [
+    "### What if there are two `speak` functions? Let's define a Cat class and corresponding `speak` function."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "68f59e90",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Cat:\n",
+    "    pass\n",
+    "\n",
+    "cat1 = Cat()\n",
+    "\n",
+    "def speak(cat):\n",
+    "    \"\"\"\n",
+    "    Cats meow!\n",
+    "    \"\"\"\n",
+    "    print(\"meow!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3e59a318",
+   "metadata": {},
+   "source": [
+    "### What will be the output of the below function calls?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "d99e9a47",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "meow!\n",
+      "meow!\n",
+      "meow!\n"
+     ]
+    }
+   ],
+   "source": [
+    "speak(dog1)\n",
+    "speak(dog2)\n",
+    "speak(cat1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f28b640d",
+   "metadata": {},
+   "source": [
+    "### We lost the previous definition of the `speak` function because it is a function. What if `speak` were a method instead?\n",
+    "\n",
+    "### **IMPORTANT**: it is not recommended to re-define same `class`. This is shown only for example purposes. You must always go back to the original cell and update the definition there."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "245b0f24",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Dog:\n",
+    "    # regular method\n",
+    "    def init(dog, name, how_old): \n",
+    "        dog.name = name\n",
+    "        dog.age = how_old\n",
+    "    \n",
+    "    # regular method\n",
+    "    def speak(dog):\n",
+    "        \"\"\"\n",
+    "        Puppies (age < 2) bark thrice, whereas dogs bark once.\n",
+    "        \"\"\"\n",
+    "        if dog.age < 2:\n",
+    "            #print(dog.name + \": bark bark bark!\")\n",
+    "            print(f\"{dog.name}: bark bark bark!\")\n",
+    "        else:\n",
+    "            #print(dog.name + \": bark!\")\n",
+    "            print(f\"{dog.name}: bark!\")\n",
+    "\n",
+    "class Cat:\n",
+    "    def speak(cat):\n",
+    "        \"\"\"\n",
+    "        Cats meow!\n",
+    "        \"\"\"\n",
+    "        print(\"meow!\")\n",
+    "        \n",
+    "# Let's create object instances\n",
+    "dog1 = Dog()\n",
+    "Dog.init(dog1, \"Jimmy\", 1)\n",
+    "\n",
+    "dog2 = Dog()\n",
+    "Dog.init(dog2, \"Buster\", 10)\n",
+    "\n",
+    "cat1 = Cat()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "537614a0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Jimmy: bark bark bark!\n",
+      "Buster: bark!\n",
+      "meow!\n"
+     ]
+    }
+   ],
+   "source": [
+    "# speak now is a method, so we need to use . attribute operator for invocation\n",
+    "Dog.speak(dog1)\n",
+    "Dog.speak(dog2)\n",
+    "Cat.speak(cat1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0f175520",
+   "metadata": {},
+   "source": [
+    "### Type-based dispatch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "300fd480",
+   "metadata": {},
+   "source": [
+    "#### Let's create a list of animals and print `type` of each animal."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "87d16d48",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class '__main__.Dog'>\n",
+      "<class '__main__.Dog'>\n",
+      "<class '__main__.Cat'>\n"
+     ]
+    }
+   ],
+   "source": [
+    "animals = [dog1, dog2, cat1]\n",
+    "\n",
+    "for animal in animals:\n",
+    "    print(type(animal))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f82dacea",
+   "metadata": {},
+   "source": [
+    "#### Even though `type` output displays additional details, in essense type is just name of the class: `Dog`, `Cat`, etc.,."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "a88e4859",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "type(dog1) == Dog"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "3a765271",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "type(cat1) == Cat"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "81bb5aee",
+   "metadata": {},
+   "source": [
+    "#### Let's invoke speak for all animals."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "a6696743",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Jimmy: bark bark bark!\n",
+      "Buster: bark!\n",
+      "meow!\n"
+     ]
+    }
+   ],
+   "source": [
+    "# v1: bad version\n",
+    "for animal in animals:\n",
+    "    if type(animal) == Dog:\n",
+    "        Dog.speak(animal)\n",
+    "    elif type(animal) == Cat:\n",
+    "        Cat.speak(animal)\n",
+    "    # this conditional will keep growing as we add more and \n",
+    "    # more animal classes!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "920e7522",
+   "metadata": {},
+   "source": [
+    "#### Here is a slightly better version"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "31100bf6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Jimmy: bark bark bark!\n",
+      "Buster: bark!\n",
+      "meow!\n"
+     ]
+    }
+   ],
+   "source": [
+    "for animal in animals:\n",
+    "    type(animal).speak(animal)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8ef0373c",
+   "metadata": {},
+   "source": [
+    "#### Notice how the animal is redundant. There is a better way to invoke methods.\n",
+    "\n",
+    "- Syntax: `obj_ref.method()`\n",
+    "- `obj_ref` itself will be the first argument to the method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "50e4d329",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Jimmy: bark bark bark!\n",
+      "Buster: bark!\n",
+      "meow!\n"
+     ]
+    }
+   ],
+   "source": [
+    "for animal in animals:\n",
+    "    # this is equivalent to type(animal).speak(animal)\n",
+    "    animal.speak()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "000bd875",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "Dog.speak() takes 1 positional argument but 2 were given",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[44], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdog1\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mspeak\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhello\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;66;03m# Observe how TypeError says 1 positional argument expected\u001b[39;00m\n",
+      "\u001b[0;31mTypeError\u001b[0m: Dog.speak() takes 1 positional argument but 2 were given"
+     ]
+    }
+   ],
+   "source": [
+    "dog1.speak(\"hello\")\n",
+    "# Observe how TypeError says 1 positional argument expected"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "887fabde-5e7a-4fb7-bb3b-cc69a2b7b6a4",
+   "metadata": {},
+   "source": [
+    "## `self`\n",
+    "\n",
+    "- refers to the current object instance (aka receiver) inside a class\n",
+    "- attribute access inside the class **must** always use `self.<attribute>` syntax"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "id": "152f774c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Jimmy: bark bark bark!\n",
+      "Buster: bark!\n"
+     ]
+    }
+   ],
+   "source": [
+    "class Dog:\n",
+    "    # regular method\n",
+    "    def init(self, name, how_old): \n",
+    "        self.name = name\n",
+    "        self.age = how_old\n",
+    "    \n",
+    "    # regular method\n",
+    "    def speak(self):\n",
+    "        \"\"\"\n",
+    "        Puppies (age < 2) bark thrice, whereas dogs bark once.\n",
+    "        \"\"\"\n",
+    "        if self.age < 2:\n",
+    "            #print(dog.name + \": bark bark bark!\")\n",
+    "            print(f\"{self.name}: bark bark bark!\")\n",
+    "        else:\n",
+    "            #print(dog.name + \": bark!\")\n",
+    "            print(f\"{self.name}: bark!\")\n",
+    "\n",
+    "\n",
+    "# Let's create Dog object instances\n",
+    "dog1 = Dog() \n",
+    "Dog.init(dog1, \"Jimmy\", 1)\n",
+    "\n",
+    "dog2 = Dog()\n",
+    "init(dog2, \"Buster\", 10)\n",
+    "\n",
+    "# Invoke speak for dog1 and dog2\n",
+    "dog1.speak()\n",
+    "dog2.speak()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d6284d16",
+   "metadata": {},
+   "source": [
+    "# OOP: Special Methods\n",
+    "\n",
+    "\"Special methods\" is a technical term referring to methods that get called automatically. In Python, they usually begin and end with double underscores.\n",
+    "- **Note:** you could define a regular method with `__<method>__`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6454cdaf",
+   "metadata": {},
+   "source": [
+    "### `__init__` special method (aka Constructor)\n",
+    "\n",
+    "- automatically invoked when creating an object instance\n",
+    "- only one possible constructor in Python"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "d7820ba3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Creating a dog!\n",
+      "Creating a dog!\n",
+      "Jimmy: bark bark bark!\n",
+      "Buster: bark!\n"
+     ]
+    }
+   ],
+   "source": [
+    "# This is the correct and final version of Dog class\n",
+    "class Dog:\n",
+    "    # special method\n",
+    "    def __init__(self, name, how_old): \n",
+    "        print(\"Creating a dog!\")\n",
+    "        self.name = name\n",
+    "        self.age = how_old\n",
+    "    \n",
+    "    # regular method\n",
+    "    def speak(self):\n",
+    "        \"\"\"\n",
+    "        Puppies (age < 2) bark thrice, whereas dogs bark once.\n",
+    "        \"\"\"\n",
+    "        if self.age < 2:\n",
+    "            #print(dog.name + \": bark bark bark!\")\n",
+    "            print(f\"{self.name}: bark bark bark!\")\n",
+    "        else:\n",
+    "            #print(dog.name + \": bark!\")\n",
+    "            print(f\"{self.name}: bark!\")\n",
+    "\n",
+    "\n",
+    "# Let's create Dog object instances\n",
+    "dog1 = Dog(\"Jimmy\", 1)\n",
+    "dog2 = Dog(\"Buster\", 10)\n",
+    "\n",
+    "# Invoke speak for dog1 and dog2\n",
+    "dog1.speak()\n",
+    "dog2.speak()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/lecture_material/05-oop1/template_lec_001.ipynb b/lecture_material/05-oop1/template_lec_001.ipynb
new file mode 100644
index 0000000..1cf280b
--- /dev/null
+++ b/lecture_material/05-oop1/template_lec_001.ipynb
@@ -0,0 +1,1157 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "1a6cc54c",
+   "metadata": {},
+   "source": [
+    "# Performance 3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "783117c5-146f-454a-963e-ed2873b8a6d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# known import statements\n",
+    "import pandas as pd\n",
+    "import csv\n",
+    "from subprocess import check_output\n",
+    "\n",
+    "# new import statements\n",
+    "import zipfile\n",
+    "from io import TextIOWrapper"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "66db2ad0",
+   "metadata": {},
+   "source": [
+    "### Let's take a look at the files inside the current working directory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6cef713e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c76f819d",
+   "metadata": {},
+   "source": [
+    "### Let's `unzip` \"wi.zip\"."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0e87ec01",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "check_output([\"unzip\", \"wi.zip\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "274fa49a",
+   "metadata": {},
+   "source": [
+    "### Let's take a look at the files inside the current working directory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a2da3cd0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "90b11343",
+   "metadata": {},
+   "source": [
+    "### Traditional way of reading data using pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a3175526",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\"wi.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "13e6e034",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.head(5) # Top 5 rows within the DataFrame"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c79984c",
+   "metadata": {},
+   "source": [
+    "### How can we see all the column names?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "08d9501d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a519f383",
+   "metadata": {},
+   "source": [
+    "### How to extract `interest_rate`?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "400b885c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df # observe that there are missing values"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "890c6d2c",
+   "metadata": {},
+   "source": [
+    "### How to count unique values in a column `Series`?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ca108069",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"interest_rate\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "715853ee",
+   "metadata": {},
+   "source": [
+    "### Let's eliminiate the strings (Exempt) and missing values (NaN).\n",
+    "Let's try `pd.to_numeric(...)`. We need a way to specify that strings need to be converted into NaN values."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "69b00b57",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# TODO: open the documentation and figure out what parameter will help us\n",
+    "# Recall that we can press shift + tab after a function name to open the documentation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c51c8952",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "60344f67",
+   "metadata": {},
+   "source": [
+    "### Let's drop the NaN values and compute average interest rate."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0de2786f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba74550a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\").dropna()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ed860a64-6d5f-4169-9217-073e54979028",
+   "metadata": {},
+   "source": [
+    "### Clearing memory using re-assignment.\n",
+    "In python, you can clear memory used up for an object simply by getting rid of all the active references. But we cannot do that for the current notebook because we used \"df\" to perform other operations, so there are more than one active references. In fact, we don't even have access to some of the active references. In that case, you can only free up the memory after you \"shutdown\" the current notebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f28cb0b7-beb5-44e0-a4ad-9d10dbd427bd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = \"some string\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d0d56c80",
+   "metadata": {},
+   "source": [
+    "### How can we read the data without creating an uncompressed version called \"wi.csv\"?\n",
+    "\n",
+    "- Why would we want to do something like that?\n",
+    "    1. lower memory usage (we can try to load information on one loan at a time, instead of all the loans): that will still work for average interest rate computation\n",
+    "    2. lower storage usage (you can directly work with compressed data)\n",
+    "    \n",
+    "**IMPORTANT**: do not run this cell code unless you shutdown the notebook - your kernel will crash (you will run out of memory space)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b24c0723",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# IMPORTANT: do not run this cell code unless you shutdown the notebook - your kernel will crash (you will run out of memory space)\n",
+    "f = open(\"wi.csv\")\n",
+    "# instead of passing relative path of file name, we can pass a file object instance reference\n",
+    "df = pd.read_csv(f) \n",
+    "f.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f8a9da46-53a5-4a7a-9d0e-6102aed9ea13",
+   "metadata": {},
+   "source": [
+    "### Let's free up memory and delete \"wi.csv\"."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "457ac313-eb56-445b-b899-65a060ac8b07",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = \"some string\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f26e069f-d5c5-46e0-bc91-8a8e55aae427",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "check_output([\"rm\", \"wi.csv\"])\n",
+    "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "16a150df",
+   "metadata": {},
+   "source": [
+    "### How can we read data directly from a zip file?\n",
+    "`zipfile.ZipFile(...)`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0eff57fa",
+   "metadata": {},
+   "source": [
+    "### Goals:\n",
+    "1. directly access the data without decompressing: `zipfile.ZipFile(...)` - saves storage space by directly opening a zip file\n",
+    "2. only look at one row at a time: `csv.DictReader(...)` - saves memory space by enabling us to read one row at a time (as `dict`)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a85ca8a2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# code for goal 1\n",
+    "\n",
+    "f = open(\"wi.csv\")\n",
+    "df = pd.read_csv(f) \n",
+    "f.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4eee3add-cb92-4654-ab29-2b5ae68c10d5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Free up the memory again\n",
+    "df = \"some string\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6a194b9b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# code for goal 2 & goal 1\n",
+    "zf = zipfile.ZipFile(\"wi.zip\")\n",
+    "f = zf.open(\"wi.csv\")\n",
+    "\n",
+    "df = pd.read_csv(f) \n",
+    "\n",
+    "f.close()\n",
+    "zf.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ca72997f",
+   "metadata": {},
+   "source": [
+    "### Let's learn more modes for `open` built-in function\n",
+    "- `open(..., mode=\"r\")`   => text (default)\n",
+    "- `open(..., mode=\"rb\")`  => bytes\n",
+    "- `zf.open(...)`          => always bytes\n",
+    "\n",
+    "With `zipfile` module there isn't a way for us to specify that we need text.\n",
+    "\n",
+    "### `TextIOWrapper` inside `io` module enables us to convert `bytes` into `str`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ccc4954f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# code for goal 2 & goal 1\n",
+    "zf = zipfile.ZipFile(\"wi.zip\")\n",
+    "f = zf.open(\"wi.csv\")\n",
+    "\n",
+    "reader = csv.DictReader(f)\n",
+    "\n",
+    "for row in reader:\n",
+    "    print(row)\n",
+    "    break\n",
+    "\n",
+    "f.close()\n",
+    "zf.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8af3971c",
+   "metadata": {},
+   "source": [
+    "### Let's go back to calculating average interest rate.\n",
+    "- Algorithm / Pseudocode steps:\n",
+    "    1. print \"interest rate\" and type of \"interest rate\"\n",
+    "    2. convert \"interest rate\" into `float` - how can we handle errors? `try` ... `except` ... (*IMPORTANT*: always have your `except` block catch specific exceptions)\n",
+    "    3. calculate running total, count for each row of data\n",
+    "    4. calculate average"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ad2b730",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "zf = zipfile.ZipFile(\"wi.zip\")\n",
+    "f = zf.open(\"wi.csv\")\n",
+    "\n",
+    "reader = csv.DictReader(TextIOWrapper(f))\n",
+    "\n",
+    "for row in reader:\n",
+    "    print(row)\n",
+    "    break\n",
+    "\n",
+    "f.close()\n",
+    "zf.close()\n",
+    "\n",
+    "total / count"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "51e7081b",
+   "metadata": {},
+   "source": [
+    "### Let's generalize the code to read \"interest rate\" into a function.\n",
+    "\n",
+    "- This does make things worse because we are going back to reading all the data before doing the computation.\n",
+    "- But this sets us up to learn about generators."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "79334762",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_rates_v1():\n",
+    "    pass"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "41f4bfe4",
+   "metadata": {},
+   "source": [
+    "### Using a generator\n",
+    "- `yield` each value\n",
+    "- use `next` to get the next value => internally `for` loop invokes `next` for each iteration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22c8a41b-1356-4aad-8f92-c3db19ce4c03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_rates_v2():\n",
+    "    pass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fe2f060b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "next(rates) # gives us the next value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ea5c188d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "next(rates) # gives us the next value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3101b71c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "next(rates) # gives us the next value"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b47aab60",
+   "metadata": {},
+   "source": [
+    "### Let's use `for` loop to keep getting all the rates.\n",
+    "\n",
+    "- `len` function doesn't work with generators\n",
+    "- indexing doesn't work with generators"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5a074669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(rates)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d536ef4d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rates[4]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f5dbd13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rates = get_rates_v2()\n",
+    "\n",
+    "total = 0\n",
+    "count = 0\n",
+    "\n",
+    " # keeps calling next(rates) to get values from yield\n",
+    "\n",
+    "    \n",
+    "total / count"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ff9e77e5",
+   "metadata": {},
+   "source": [
+    "This approach doesn't work for median calculation. Why? Remember we have to sort, so we need all values in memory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ace73646-1b9c-45d6-881f-832a5e170766",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rates = list(get_rates_v2())\n",
+    "rates.sort()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f6ff9655",
+   "metadata": {},
+   "source": [
+    "# OOP 1: Classes\n",
+    "\n",
+    "- Creating new types using classes\n",
+    "- Types have specific attributes and methods (special functions)\n",
+    "- Using new types (classes), we can create object instances of those types\n",
+    "- class creation and instantiation syntax: \n",
+    "```python\n",
+    "class Person:\n",
+    "    # some code\n",
+    "p1 = Person() # object instantiation using constructor\n",
+    "p2 = Person() # object instantiation using constructor\n",
+    "```\n",
+    "- attribute / method access syntax:\n",
+    "```python\n",
+    "p1.fname = \"...\" # attribute initialization\n",
+    "p1.lname = \"...\" # attribute initialization\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9b693dec",
+   "metadata": {},
+   "source": [
+    "#### PythonTutor example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8372eccf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p1 = {\"fname\": \"Bob\", \"lname\": \"Baker\"}\n",
+    "\n",
+    "p2 = dict()\n",
+    "p2[\"fname\"] = \"Cindy\"\n",
+    "p2[\"lname\"] = \"Cooper\"\n",
+    "\n",
+    "p3 = {\"Fname\": \"Alice\", \"lname\": \"Anderson\"}\n",
+    "\n",
+    "# TODO: Let's define a Person class"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6427a25c",
+   "metadata": {},
+   "source": [
+    "### Let's create a `Dog` class."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5154fd9e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# eventually we will learn how to write code inside a class"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7d3a0c2a",
+   "metadata": {},
+   "source": [
+    "### Let's create `Dog` object instances and add attributes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "db5558da",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0600c5b8",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "93b2e3a9",
+   "metadata": {},
+   "source": [
+    "### Let's define a `speak` function that will make the `Dog` bark.\n",
+    "- Algorithm / pseudocode steps:\n",
+    "    1. puppies bark thrice (age < 2)\n",
+    "    2. dogs bark once"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2abbf327",
+   "metadata": {},
+   "source": [
+    "### `f-strings`\n",
+    "\n",
+    "- aka formatted string literals\n",
+    "- easier and quicker way of formatting `str` than `str.format(...)` method\n",
+    "\n",
+    "- Syntax: \n",
+    "```python\n",
+    "f\"{} ...\"\n",
+    "```\n",
+    "- inside `{}` you can specify a variable or even call a function or a method"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1592767d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def speak(dog):\n",
+    "    if dog.age < 2:\n",
+    "        pass\n",
+    "    else:\n",
+    "        pass"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b3cbd57e",
+   "metadata": {},
+   "source": [
+    "### Let's invoke `speak` for dog1 and dog2."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f9fa462",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "speak(dog1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "61b30ab8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "speak(dog2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "38c31465",
+   "metadata": {},
+   "source": [
+    "### How can we standardize the attribute initialization to avoid bugs?\n",
+    "\n",
+    "- Eventually we will learn about how to define methods inside the class, which will include `__init__` method.\n",
+    "- For now, let's define an `init` function."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2682bdb7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def init(???):\n",
+    "    pass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4cc6c3a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dog2 = Dog()\n",
+    "init(???)\n",
+    "speak(dog2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "79bfd963",
+   "metadata": {},
+   "source": [
+    "### What if there are two `speak` functions? Let's define a Cat class and corresponding `speak` function."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "595a80e8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Cat:\n",
+    "    pass\n",
+    "\n",
+    "cat1 = Cat()\n",
+    "\n",
+    "def speak(cat):\n",
+    "    \"\"\"\n",
+    "    Cats meow!\n",
+    "    \"\"\"\n",
+    "    print(\"meow!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b41e88d5",
+   "metadata": {},
+   "source": [
+    "### What will be the output of the below function calls?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c800545f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "speak(dog1)\n",
+    "speak(dog2)\n",
+    "speak(cat1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bf69e4ff",
+   "metadata": {},
+   "source": [
+    "### We lost the previous definition of the `speak` function because it is a function. What if `speak` were a method instead?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "be101310",
+   "metadata": {},
+   "source": [
+    "### **IMPORTANT**: it is not recommended to re-define same `class`. This is shown only for example purposes. You must always go back to the original cell and update the definition there."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7ef5425a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Dog:\n",
+    "    pass # eventually we will learn how to write code inside a class\n",
+    "\n",
+    "# Regular function that accepts an object instance of the new type\n",
+    "def speak(dog):\n",
+    "    \"\"\"\n",
+    "    Puppies (age < 2) bark thrice, whereas dogs bark once.\n",
+    "    \"\"\"\n",
+    "    if dog.age < 2:\n",
+    "        #print(dog.name + \": bark bark bark!\")\n",
+    "        print(f\"{dog.name}: bark bark bark!\")\n",
+    "    else:\n",
+    "        #print(dog.name + \": bark!\")\n",
+    "        print(f\"{dog.name}: bark!\")\n",
+    "        \n",
+    "# Regular function that accepts an object instance of the new type along with attribute values\n",
+    "def init(dog, name, how_old):\n",
+    "    dog.name = name\n",
+    "    dog.age = how_old\n",
+    "        \n",
+    "class Cat:\n",
+    "    pass\n",
+    "\n",
+    "def speak(cat):\n",
+    "    \"\"\"\n",
+    "    Cats meow!\n",
+    "    \"\"\"\n",
+    "    print(\"meow!\")\n",
+    "    \n",
+    "# Let's create object instances\n",
+    "dog1 = Dog()\n",
+    "init(dog1, \"Jimmy\", 1)\n",
+    "\n",
+    "dog2 = Dog()\n",
+    "init(dog2, \"Buster\", 10)\n",
+    "\n",
+    "cat1 = Cat()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d24e147a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# speak now is a method, so we need to use . attribute operator for invocation\n",
+    "speak(dog1)\n",
+    "speak(dog2)\n",
+    "speak(cat1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "90da8bc6",
+   "metadata": {},
+   "source": [
+    "### Type-based dispatch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "92f3c459",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "animals = [dog1, dog2, cat1]\n",
+    "\n",
+    "for animal in animals:\n",
+    "    print(type(animal))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9680a740",
+   "metadata": {},
+   "source": [
+    "#### Even though `type` output displays additional details, in essense type is just name of the class: `Dog`, `Cat`, etc.,."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7787e0fe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "type(dog1) == Dog"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "49f9f9b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "type(cat1) == Cat"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1e280958",
+   "metadata": {},
+   "source": [
+    "#### Let's invoke speak for all animals."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3cd9d9fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# v1: bad version\n",
+    "for animal in animals:\n",
+    "    if type(animal) == Dog:\n",
+    "        Dog.speak(animal)\n",
+    "    elif type(animal) == Cat:\n",
+    "        Cat.speak(animal)\n",
+    "    # this conditional will keep growing as we add more and \n",
+    "    # more animal classes!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6cda6524",
+   "metadata": {},
+   "source": [
+    "#### Here is a slightly better version"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "423471d5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for animal in animals:\n",
+    "    type(animal).speak(animal)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f1f81bb9",
+   "metadata": {},
+   "source": [
+    "### Method invocation (most commonly used syntax)\n",
+    "\n",
+    "Notice how the animal is redundant. There is a better way to invoke methods.\n",
+    "\n",
+    "- Syntax: `obj_ref.method()`\n",
+    "- `obj_ref` itself will be the first argument to the method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "78ece030",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for animal in animals:\n",
+    "    # this is equivalent to type(animal).speak(animal)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3a264105",
+   "metadata": {},
+   "source": [
+    "#### Let's try passing an argument to `speak` method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4270ed59",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dog1.speak(\"hello\")\n",
+    "# Observe how TypeError says 1 positional argument expected"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dc77d748",
+   "metadata": {},
+   "source": [
+    "## `self`\n",
+    "\n",
+    "- dedicated special variable that refers to the current object instance (aka receiver) inside a class\n",
+    "- attribute access inside the class **must** always use `self.<attribute>` syntax"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9af178a3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Dog:\n",
+    "    # regular method\n",
+    "    def init(dog, name, how_old): \n",
+    "        dog.name = name\n",
+    "        dog.age = how_old\n",
+    "    \n",
+    "    # regular method\n",
+    "    def speak(dog):\n",
+    "        \"\"\"\n",
+    "        Puppies (age < 2) bark thrice, whereas dogs bark once.\n",
+    "        \"\"\"\n",
+    "        if dog.age < 2:\n",
+    "            #print(dog.name + \": bark bark bark!\")\n",
+    "            print(f\"{dog.name}: bark bark bark!\")\n",
+    "        else:\n",
+    "            #print(dog.name + \": bark!\")\n",
+    "            print(f\"{dog.name}: bark!\")\n",
+    "\n",
+    "# Let's create Dog object instances\n",
+    "dog1 = Dog() \n",
+    "dog1.init(\"Jimmy\", 1)\n",
+    "\n",
+    "dog2 = Dog()\n",
+    "dog2.init(\"Buster\", 10)\n",
+    "\n",
+    "# Invoke speak for dog1 and dog2\n",
+    "dog1.speak()\n",
+    "dog2.speak()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "32631b72",
+   "metadata": {},
+   "source": [
+    "# OOP: Special Methods\n",
+    "\n",
+    "\"Special methods\" is a technical term referring to methods that get called automatically. In Python, they usually begin and end with double underscores.\n",
+    "- **Note:** you could define a regular method with `__<method>__`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2f7640b1",
+   "metadata": {},
+   "source": [
+    "### `__init__` special method (aka Constructor)\n",
+    "\n",
+    "- automatically invoked when creating an object instance\n",
+    "- only one possible constructor in Python"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "66ac7d8c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is the correct and final version of Dog class\n",
+    "class Dog:\n",
+    "    # regular method\n",
+    "    def init(self, name, how_old): \n",
+    "        self.name = name\n",
+    "        self.age = how_old\n",
+    "    \n",
+    "    # regular method\n",
+    "    def speak(self):\n",
+    "        \"\"\"\n",
+    "        Puppies (age < 2) bark thrice, whereas dogs bark once.\n",
+    "        \"\"\"\n",
+    "        if self.age < 2:\n",
+    "            #print(dog.name + \": bark bark bark!\")\n",
+    "            print(f\"{self.name}: bark bark bark!\")\n",
+    "        else:\n",
+    "            #print(dog.name + \": bark!\")\n",
+    "            print(f\"{self.name}: bark!\")\n",
+    "\n",
+    "\n",
+    "# Let's create Dog object instances\n",
+    "dog1 = Dog() \n",
+    "dog1.init(\"Jimmy\", 1)\n",
+    "\n",
+    "dog2 = Dog()\n",
+    "dog2.init(\"Buster\", 10)\n",
+    "\n",
+    "# Invoke speak for dog1 and dog2\n",
+    "dog1.speak()\n",
+    "dog2.speak()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/lecture_material/05-oop1/template_lec_002.ipynb b/lecture_material/05-oop1/template_lec_002.ipynb
new file mode 100644
index 0000000..cb2f581
--- /dev/null
+++ b/lecture_material/05-oop1/template_lec_002.ipynb
@@ -0,0 +1,1157 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "1a6cc54c",
+   "metadata": {},
+   "source": [
+    "# Performance 3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "783117c5-146f-454a-963e-ed2873b8a6d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# known import statements\n",
+    "import pandas as pd\n",
+    "import csv\n",
+    "from subprocess import check_output\n",
+    "\n",
+    "# new import statements\n",
+    "import zipfile\n",
+    "from io import TextIOWrapper"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "66db2ad0",
+   "metadata": {},
+   "source": [
+    "### Let's take a look at the files inside the current working directory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6cef713e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c76f819d",
+   "metadata": {},
+   "source": [
+    "### Let's `unzip` \"wi.zip\"."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0e87ec01",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "check_output([\"unzip\", \"wi.zip\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "274fa49a",
+   "metadata": {},
+   "source": [
+    "### Let's take a look at the files inside the current working directory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a2da3cd0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "90b11343",
+   "metadata": {},
+   "source": [
+    "### Traditional way of reading data using pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a3175526",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\"wi.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "13e6e034",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.head(5) # Top 5 rows within the DataFrame"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c79984c",
+   "metadata": {},
+   "source": [
+    "### How can we see all the column names?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "08d9501d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a519f383",
+   "metadata": {},
+   "source": [
+    "### How to extract `interest_rate`?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "400b885c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df # observe that there are missing values"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "890c6d2c",
+   "metadata": {},
+   "source": [
+    "### How to count unique values in a column `Series`?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ca108069",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"interest_rate\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "715853ee",
+   "metadata": {},
+   "source": [
+    "### Let's eliminiate the strings (Exempt) and missing values (NaN).\n",
+    "Let's try `pd.to_numeric(...)`. We need a way to specify that strings need to be converted into NaN values."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "69b00b57",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# TODO: open the documentation and figure out what parameter will help us\n",
+    "# Recall that we can press shift + tab after a function name to open the documentation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c51c8952",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "60344f67",
+   "metadata": {},
+   "source": [
+    "### Let's drop the NaN values and compute average interest rate."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0de2786f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba74550a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\").dropna()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ed860a64-6d5f-4169-9217-073e54979028",
+   "metadata": {},
+   "source": [
+    "### Clearing memory using re-assignment.\n",
+    "In python, you can clear memory used up for an object simply by getting rid of all the active references. But we cannot do that for the current notebook because we used \"df\" to perform other operations, so there are more than one active references. In fact, we don't even have access to some of the active references. In that case, you can only free up the memory after you \"shutdown\" the current notebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f28cb0b7-beb5-44e0-a4ad-9d10dbd427bd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = \"some string\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d0d56c80",
+   "metadata": {},
+   "source": [
+    "### How can we read the data without creating an uncompressed version called \"wi.csv\"?\n",
+    "\n",
+    "- Why would we want to do something like that?\n",
+    "    1. lower memory usage (we can try to load information on one loan at a time, instead of all the loans): that will still work for average interest rate computation\n",
+    "    2. lower storage usage (you can directly work with compressed data)\n",
+    "    \n",
+    "**IMPORTANT**: do not run this cell code unless you shutdown the notebook - your kernel will crash (you will run out of memory space)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b24c0723",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# IMPORTANT: do not run this cell code unless you shutdown the notebook - your kernel will crash (you will run out of memory space)\n",
+    "f = open(\"wi.csv\")\n",
+    "# instead of passing relative path of file name, we can pass a file object instance reference\n",
+    "df = pd.read_csv(f) \n",
+    "f.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f8a9da46-53a5-4a7a-9d0e-6102aed9ea13",
+   "metadata": {},
+   "source": [
+    "### Let's free up memory and delete \"wi.csv\"."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "457ac313-eb56-445b-b899-65a060ac8b07",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = \"some string\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f26e069f-d5c5-46e0-bc91-8a8e55aae427",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "check_output([\"rm\", \"wi.csv\"])\n",
+    "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "16a150df",
+   "metadata": {},
+   "source": [
+    "### How can we read data directly from a zip file?\n",
+    "`zipfile.ZipFile(...)`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0eff57fa",
+   "metadata": {},
+   "source": [
+    "### Goals:\n",
+    "1. directly access the data without decompressing: `zipfile.ZipFile(...)` - saves storage space by directly opening a zip file\n",
+    "2. only look at one row at a time: `csv.DictReader(...)` - saves memory space by enabling us to read one row at a time (as `dict`)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a85ca8a2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# code for goal 1\n",
+    "\n",
+    "f = open(\"wi.csv\")\n",
+    "df = pd.read_csv(f) \n",
+    "f.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4eee3add-cb92-4654-ab29-2b5ae68c10d5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Free up the memory again\n",
+    "df = \"some string\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6a194b9b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# code for goal 2 & goal 1\n",
+    "zf = zipfile.ZipFile(\"wi.zip\")\n",
+    "f = zf.open(\"wi.csv\")\n",
+    "\n",
+    "df = pd.read_csv(f) \n",
+    "\n",
+    "f.close()\n",
+    "zf.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ca72997f",
+   "metadata": {},
+   "source": [
+    "### Let's learn more modes for `open` built-in function\n",
+    "- `open(..., mode=\"r\")`   => text (default)\n",
+    "- `open(..., mode=\"rb\")`  => bytes\n",
+    "- `zf.open(...)`          => always bytes\n",
+    "\n",
+    "With `zipfile` module there isn't a way for us to specify that we need text.\n",
+    "\n",
+    "### `TextIOWrapper` inside `io` module enables us to convert `bytes` into `str`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ccc4954f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# code for goal 2 & goal 1\n",
+    "zf = zipfile.ZipFile(\"wi.zip\")\n",
+    "f = zf.open(\"wi.csv\")\n",
+    "\n",
+    "reader = csv.DictReader(f)\n",
+    "\n",
+    "for row in reader:\n",
+    "    print(row)\n",
+    "    break\n",
+    "\n",
+    "f.close()\n",
+    "zf.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8af3971c",
+   "metadata": {},
+   "source": [
+    "### Let's go back to calculating average interest rate.\n",
+    "- Algorithm / Pseudocode steps:\n",
+    "    1. print \"interest rate\" and type of \"interest rate\"\n",
+    "    2. convert \"interest rate\" into `float` - how can we handle errors? `try` ... `except` ... (*IMPORTANT*: always have your `except` block catch specific exceptions)\n",
+    "    3. calculate running total, count for each row of data\n",
+    "    4. calculate average"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ad2b730",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "zf = zipfile.ZipFile(\"wi.zip\")\n",
+    "f = zf.open(\"wi.csv\")\n",
+    "\n",
+    "reader = csv.DictReader(TextIOWrapper(f))\n",
+    "\n",
+    "for row in reader:\n",
+    "    print(row)\n",
+    "    break\n",
+    "\n",
+    "f.close()\n",
+    "zf.close()\n",
+    "\n",
+    "total / count"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "51e7081b",
+   "metadata": {},
+   "source": [
+    "### Let's generalize the code to read \"interest rate\" into a function.\n",
+    "\n",
+    "- This does make things worse because we are going back to reading all the data before doing the computation.\n",
+    "- But this sets us up to learn about generators."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "79334762",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_rates_v1():\n",
+    "    pass"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "41f4bfe4",
+   "metadata": {},
+   "source": [
+    "### Using a generator\n",
+    "- `yield` each value\n",
+    "- use `next` to get the next value => internally `for` loop invokes `next` for each iteration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22c8a41b-1356-4aad-8f92-c3db19ce4c03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_rates_v2():\n",
+    "    pass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fe2f060b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "next(rates) # gives us the next value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ea5c188d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "next(rates) # gives us the next value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3101b71c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "next(rates) # gives us the next value"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b47aab60",
+   "metadata": {},
+   "source": [
+    "### Let's use `for` loop to keep getting all the rates.\n",
+    "\n",
+    "- `len` function doesn't work with generators\n",
+    "- indexing doesn't work with generators"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5a074669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(rates)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d536ef4d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rates[4]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f5dbd13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rates = get_rates_v2()\n",
+    "\n",
+    "total = 0\n",
+    "count = 0\n",
+    "\n",
+    " # keeps calling next(rates) to get values from yield\n",
+    "\n",
+    "    \n",
+    "total / count"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ff9e77e5",
+   "metadata": {},
+   "source": [
+    "This approach doesn't work for median calculation. Why? Remember we have to sort, so we need all values in memory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ace73646-1b9c-45d6-881f-832a5e170766",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rates = list(get_rates_v2())\n",
+    "rates.sort()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f6ff9655",
+   "metadata": {},
+   "source": [
+    "# OOP 1: Classes\n",
+    "\n",
+    "- Creating new types using classes\n",
+    "- Types have specific attributes and methods (special functions)\n",
+    "- Using new types (classes), we can create object instances of those types\n",
+    "- class creation and instantiation syntax: \n",
+    "```python\n",
+    "class Person:\n",
+    "    # some code\n",
+    "p1 = Person() # object instantiation using constructor\n",
+    "p2 = Person() # object instantiation using constructor\n",
+    "```\n",
+    "- attribute / method access syntax:\n",
+    "```python\n",
+    "p1.fname = \"...\" # attribute initialization\n",
+    "p1.lname = \"...\" # attribute initialization\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0593b013",
+   "metadata": {},
+   "source": [
+    "#### PythonTutor example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a99b7194",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p1 = {\"fname\": \"Bob\", \"lname\": \"Baker\"}\n",
+    "\n",
+    "p2 = dict()\n",
+    "p2[\"fname\"] = \"Cindy\"\n",
+    "p2[\"lname\"] = \"Cooper\"\n",
+    "\n",
+    "p3 = {\"Fname\": \"Alice\", \"lname\": \"Anderson\"}\n",
+    "\n",
+    "# TODO: Let's define a Person class"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6427a25c",
+   "metadata": {},
+   "source": [
+    "### Let's create a `Dog` class."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5154fd9e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# eventually we will learn how to write code inside a class"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7d3a0c2a",
+   "metadata": {},
+   "source": [
+    "### Let's create `Dog` object instances and add attributes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "db5558da",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0600c5b8",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "93b2e3a9",
+   "metadata": {},
+   "source": [
+    "### Let's define a `speak` function that will make the `Dog` bark.\n",
+    "- Algorithm / pseudocode steps:\n",
+    "    1. puppies bark thrice (age < 2)\n",
+    "    2. dogs bark once"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1084ec49",
+   "metadata": {},
+   "source": [
+    "### `f-strings`\n",
+    "\n",
+    "- aka formatted string literals\n",
+    "- easier and quicker way of formatting `str` than `str.format(...)` method\n",
+    "\n",
+    "- Syntax: \n",
+    "```python\n",
+    "f\"{} ...\"\n",
+    "```\n",
+    "- inside `{}` you can specify a variable or even call a function or a method"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1592767d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def speak(dog):\n",
+    "    if dog.age < 2:\n",
+    "        pass\n",
+    "    else:\n",
+    "        pass"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b3cbd57e",
+   "metadata": {},
+   "source": [
+    "### Let's invoke `speak` for dog1 and dog2."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f9fa462",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "speak(dog1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "61b30ab8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "speak(dog2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "38c31465",
+   "metadata": {},
+   "source": [
+    "### How can we standardize the attribute initialization to avoid bugs?\n",
+    "\n",
+    "- Eventually we will learn about how to define methods inside the class, which will include `__init__` method.\n",
+    "- For now, let's define an `init` function."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2682bdb7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def init(???):\n",
+    "    pass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4cc6c3a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dog2 = Dog()\n",
+    "init(???)\n",
+    "speak(dog2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "144913e6",
+   "metadata": {},
+   "source": [
+    "### What if there are two `speak` functions? Let's define a Cat class and corresponding `speak` function."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eb5d407e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Cat:\n",
+    "    pass\n",
+    "\n",
+    "cat1 = Cat()\n",
+    "\n",
+    "def speak(cat):\n",
+    "    \"\"\"\n",
+    "    Cats meow!\n",
+    "    \"\"\"\n",
+    "    print(\"meow!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ab8106b8",
+   "metadata": {},
+   "source": [
+    "### What will be the output of the below function calls?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "747a8cc1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "speak(dog1)\n",
+    "speak(dog2)\n",
+    "speak(cat1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "94dae812",
+   "metadata": {},
+   "source": [
+    "### We lost the previous definition of the `speak` function because it is a function. What if `speak` were a method instead?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "51c27380",
+   "metadata": {},
+   "source": [
+    "### **IMPORTANT**: it is not recommended to re-define same `class`. This is shown only for example purposes. You must always go back to the original cell and update the definition there."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d311c1fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Dog:\n",
+    "    pass # eventually we will learn how to write code inside a class\n",
+    "\n",
+    "# Regular function that accepts an object instance of the new type\n",
+    "def speak(dog):\n",
+    "    \"\"\"\n",
+    "    Puppies (age < 2) bark thrice, whereas dogs bark once.\n",
+    "    \"\"\"\n",
+    "    if dog.age < 2:\n",
+    "        #print(dog.name + \": bark bark bark!\")\n",
+    "        print(f\"{dog.name}: bark bark bark!\")\n",
+    "    else:\n",
+    "        #print(dog.name + \": bark!\")\n",
+    "        print(f\"{dog.name}: bark!\")\n",
+    "        \n",
+    "# Regular function that accepts an object instance of the new type along with attribute values\n",
+    "def init(dog, name, how_old):\n",
+    "    dog.name = name\n",
+    "    dog.age = how_old\n",
+    "        \n",
+    "class Cat:\n",
+    "    pass\n",
+    "\n",
+    "def speak(cat):\n",
+    "    \"\"\"\n",
+    "    Cats meow!\n",
+    "    \"\"\"\n",
+    "    print(\"meow!\")\n",
+    "    \n",
+    "# Let's create object instances\n",
+    "dog1 = Dog()\n",
+    "init(dog1, \"Jimmy\", 1)\n",
+    "\n",
+    "dog2 = Dog()\n",
+    "init(dog2, \"Buster\", 10)\n",
+    "\n",
+    "cat1 = Cat()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62165284",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# speak now is a method, so we need to use . attribute operator for invocation\n",
+    "speak(dog1)\n",
+    "speak(dog2)\n",
+    "speak(cat1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "809c208e",
+   "metadata": {},
+   "source": [
+    "### Type-based dispatch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f930cc92",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "animals = [dog1, dog2, cat1]\n",
+    "\n",
+    "for animal in animals:\n",
+    "    print(type(animal))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "44b76bbb",
+   "metadata": {},
+   "source": [
+    "#### Even though `type` output displays additional details, in essense type is just name of the class: `Dog`, `Cat`, etc.,."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fc89bc34",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "type(dog1) == Dog"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c1dff233",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "type(cat1) == Cat"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c9311d53",
+   "metadata": {},
+   "source": [
+    "#### Let's invoke speak for all animals."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1f6f851",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# v1: bad version\n",
+    "for animal in animals:\n",
+    "    if type(animal) == Dog:\n",
+    "        Dog.speak(animal)\n",
+    "    elif type(animal) == Cat:\n",
+    "        Cat.speak(animal)\n",
+    "    # this conditional will keep growing as we add more and \n",
+    "    # more animal classes!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fc2d9ce4",
+   "metadata": {},
+   "source": [
+    "#### Here is a slightly better version"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8d69a30",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for animal in animals:\n",
+    "    type(animal).speak(animal)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "187b4671",
+   "metadata": {},
+   "source": [
+    "### Method invocation (most commonly used syntax)\n",
+    "\n",
+    "Notice how the animal is redundant. There is a better way to invoke methods.\n",
+    "\n",
+    "- Syntax: `obj_ref.method()`\n",
+    "- `obj_ref` itself will be the first argument to the method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d5f3794",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for animal in animals:\n",
+    "    # this is equivalent to type(animal).speak(animal)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3c7ddcd5",
+   "metadata": {},
+   "source": [
+    "#### Let's try passing an argument to `speak` method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b55d4a90",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dog1.speak(\"hello\")\n",
+    "# Observe how TypeError says 1 positional argument expected"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c201d284",
+   "metadata": {},
+   "source": [
+    "## `self`\n",
+    "\n",
+    "- dedicated special variable that refers to the current object instance (aka receiver) inside a class\n",
+    "- attribute access inside the class **must** always use `self.<attribute>` syntax"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "50ce0774",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Dog:\n",
+    "    # regular method\n",
+    "    def init(dog, name, how_old): \n",
+    "        dog.name = name\n",
+    "        dog.age = how_old\n",
+    "    \n",
+    "    # regular method\n",
+    "    def speak(dog):\n",
+    "        \"\"\"\n",
+    "        Puppies (age < 2) bark thrice, whereas dogs bark once.\n",
+    "        \"\"\"\n",
+    "        if dog.age < 2:\n",
+    "            #print(dog.name + \": bark bark bark!\")\n",
+    "            print(f\"{dog.name}: bark bark bark!\")\n",
+    "        else:\n",
+    "            #print(dog.name + \": bark!\")\n",
+    "            print(f\"{dog.name}: bark!\")\n",
+    "\n",
+    "# Let's create Dog object instances\n",
+    "dog1 = Dog() \n",
+    "dog1.init(\"Jimmy\", 1)\n",
+    "\n",
+    "dog2 = Dog()\n",
+    "dog2.init(\"Buster\", 10)\n",
+    "\n",
+    "# Invoke speak for dog1 and dog2\n",
+    "dog1.speak()\n",
+    "dog2.speak()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f801c756",
+   "metadata": {},
+   "source": [
+    "# OOP: Special Methods\n",
+    "\n",
+    "\"Special methods\" is a technical term referring to methods that get called automatically. In Python, they usually begin and end with double underscores.\n",
+    "- **Note:** you could define a regular method with `__<method>__`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "67b222d8",
+   "metadata": {},
+   "source": [
+    "### `__init__` special method (aka Constructor)\n",
+    "\n",
+    "- automatically invoked when creating an object instance\n",
+    "- only one possible constructor in Python"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c71681c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is the correct and final version of Dog class\n",
+    "class Dog:\n",
+    "    # regular method\n",
+    "    def init(self, name, how_old): \n",
+    "        self.name = name\n",
+    "        self.age = how_old\n",
+    "    \n",
+    "    # regular method\n",
+    "    def speak(self):\n",
+    "        \"\"\"\n",
+    "        Puppies (age < 2) bark thrice, whereas dogs bark once.\n",
+    "        \"\"\"\n",
+    "        if self.age < 2:\n",
+    "            #print(dog.name + \": bark bark bark!\")\n",
+    "            print(f\"{self.name}: bark bark bark!\")\n",
+    "        else:\n",
+    "            #print(dog.name + \": bark!\")\n",
+    "            print(f\"{self.name}: bark!\")\n",
+    "\n",
+    "\n",
+    "# Let's create Dog object instances\n",
+    "dog1 = Dog() \n",
+    "dog1.init(\"Jimmy\", 1)\n",
+    "\n",
+    "dog2 = Dog()\n",
+    "dog2.init(\"Buster\", 10)\n",
+    "\n",
+    "# Invoke speak for dog1 and dog2\n",
+    "dog1.speak()\n",
+    "dog2.speak()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
-- 
GitLab