From a58c5160b1df3fde7bb3a61efe0fb488d48a4f1e Mon Sep 17 00:00:00 2001 From: gsingh58 <gurmail-singh@wisc.edu> Date: Tue, 6 Feb 2024 08:16:41 -0600 Subject: [PATCH] lec5 notes updated --- lecture_material/05-oop1/solution.ipynb | 1981 +++++++++++++++++ .../05-oop1/template_lec_001.ipynb | 1157 ++++++++++ .../05-oop1/template_lec_002.ipynb | 1157 ++++++++++ 3 files changed, 4295 insertions(+) create mode 100644 lecture_material/05-oop1/solution.ipynb create mode 100644 lecture_material/05-oop1/template_lec_001.ipynb create mode 100644 lecture_material/05-oop1/template_lec_002.ipynb diff --git a/lecture_material/05-oop1/solution.ipynb b/lecture_material/05-oop1/solution.ipynb new file mode 100644 index 0000000..f0fde29 --- /dev/null +++ b/lecture_material/05-oop1/solution.ipynb @@ -0,0 +1,1981 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d617eefb", + "metadata": {}, + "source": [ + "# Performance 3" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "783117c5-146f-454a-963e-ed2873b8a6d3", + "metadata": {}, + "outputs": [], + "source": [ + "# known import statements\n", + "import pandas as pd\n", + "import csv\n", + "from subprocess import check_output\n", + "\n", + "# new import statements\n", + "import zipfile\n", + "from io import TextIOWrapper" + ] + }, + { + "cell_type": "markdown", + "id": "4e2be82d", + "metadata": {}, + "source": [ + "### Let's take a look at the files inside the current working directory." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4eaa8a8d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['total 21M',\n", + " 'drwxrwxr-x 2 gurmail.singh gurmail.singh 4.0K Jan 25 20:58 01-repro1',\n", + " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Jan 25 21:24 02-repro2',\n", + " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Feb 1 20:12 03-performance1',\n", + " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Feb 5 22:48 04-performance2',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 7.7K Jan 30 20:12 Untitled.ipynb',\n", + " 'drwxrwxr-x 2 gurmail.singh gurmail.singh 4.0K Jan 27 10:17 img',\n", + " '-rw------- 1 gurmail.singh gurmail.singh 21K Feb 6 13:55 nohup.out',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 6.6K Jan 30 19:16 out.mp4',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 122K Feb 5 21:27 reading1.ipynb',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 52K Feb 6 13:54 solution.ipynb',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 29K Jan 30 14:26 template_lec_001.ipynb',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 21M Feb 6 13:48 wi.zip',\n", + " '']" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "b8c7dc7f", + "metadata": {}, + "source": [ + "### Let's `unzip` \"wi.zip\"." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ed32cf4c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "b'Archive: wi.zip\\n inflating: wi.csv \\n'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "check_output([\"unzip\", \"wi.zip\"])" + ] + }, + { + "cell_type": "markdown", + "id": "4eac1b48", + "metadata": {}, + "source": [ + "### Let's take a look at the files inside the current working directory." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a6852e43", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['total 198M',\n", + " 'drwxrwxr-x 2 gurmail.singh gurmail.singh 4.0K Jan 25 20:58 01-repro1',\n", + " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Jan 25 21:24 02-repro2',\n", + " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Feb 1 20:12 03-performance1',\n", + " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Feb 5 22:48 04-performance2',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 7.7K Jan 30 20:12 Untitled.ipynb',\n", + " 'drwxrwxr-x 2 gurmail.singh gurmail.singh 4.0K Jan 27 10:17 img',\n", + " '-rw------- 1 gurmail.singh gurmail.singh 21K Feb 6 13:55 nohup.out',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 6.6K Jan 30 19:16 out.mp4',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 122K Feb 5 21:27 reading1.ipynb',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 52K Feb 6 13:54 solution.ipynb',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 29K Jan 30 14:26 template_lec_001.ipynb',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 177M Jan 14 2022 wi.csv',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 21M Feb 6 13:48 wi.zip',\n", + " '']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "8ba94151", + "metadata": {}, + "source": [ + "### Traditional way of reading data using pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "529a4bd2", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_12578/3756477020.py:1: DtypeWarning: Columns (22,23,24,26,27,28,29,30,31,32,33,38,43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(\"wi.csv\")\n" + ] + } + ], + "source": [ + "df = pd.read_csv(\"wi.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "570485b8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>activity_year</th>\n", + " <th>lei</th>\n", + " <th>derived_msa-md</th>\n", + " <th>state_code</th>\n", + " <th>county_code</th>\n", + " <th>census_tract</th>\n", + " <th>conforming_loan_limit</th>\n", + " <th>derived_loan_product_type</th>\n", + " <th>derived_dwelling_category</th>\n", + " <th>derived_ethnicity</th>\n", + " <th>...</th>\n", + " <th>denial_reason-2</th>\n", + " <th>denial_reason-3</th>\n", + " <th>denial_reason-4</th>\n", + " <th>tract_population</th>\n", + " <th>tract_minority_population_percent</th>\n", + " <th>ffiec_msa_md_median_family_income</th>\n", + " <th>tract_to_msa_income_percentage</th>\n", + " <th>tract_owner_occupied_units</th>\n", + " <th>tract_one_to_four_family_homes</th>\n", + " <th>tract_median_age_of_housing_units</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>2020</td>\n", + " <td>549300FX7K8PTEQUU487</td>\n", + " <td>31540</td>\n", + " <td>WI</td>\n", + " <td>55025.0</td>\n", + " <td>5.502500e+10</td>\n", + " <td>C</td>\n", + " <td>Conventional:First Lien</td>\n", + " <td>Single Family (1-4 Units):Site-Built</td>\n", + " <td>Not Hispanic or Latino</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>3572</td>\n", + " <td>41.15</td>\n", + " <td>96600</td>\n", + " <td>64</td>\n", + " <td>812</td>\n", + " <td>910</td>\n", + " <td>45</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2020</td>\n", + " <td>549300FX7K8PTEQUU487</td>\n", + " <td>99999</td>\n", + " <td>WI</td>\n", + " <td>55013.0</td>\n", + " <td>5.501397e+10</td>\n", + " <td>C</td>\n", + " <td>Conventional:First Lien</td>\n", + " <td>Single Family (1-4 Units):Site-Built</td>\n", + " <td>Not Hispanic or Latino</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2333</td>\n", + " <td>9.90</td>\n", + " <td>68000</td>\n", + " <td>87</td>\n", + " <td>1000</td>\n", + " <td>2717</td>\n", + " <td>34</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2020</td>\n", + " <td>549300FX7K8PTEQUU487</td>\n", + " <td>99999</td>\n", + " <td>WI</td>\n", + " <td>55127.0</td>\n", + " <td>5.512700e+10</td>\n", + " <td>C</td>\n", + " <td>VA:First Lien</td>\n", + " <td>Single Family (1-4 Units):Site-Built</td>\n", + " <td>Not Hispanic or Latino</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>5943</td>\n", + " <td>13.26</td>\n", + " <td>68000</td>\n", + " <td>104</td>\n", + " <td>1394</td>\n", + " <td>1856</td>\n", + " <td>44</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>2020</td>\n", + " <td>549300FX7K8PTEQUU487</td>\n", + " <td>99999</td>\n", + " <td>WI</td>\n", + " <td>55127.0</td>\n", + " <td>5.512700e+10</td>\n", + " <td>C</td>\n", + " <td>Conventional:Subordinate Lien</td>\n", + " <td>Single Family (1-4 Units):Site-Built</td>\n", + " <td>Ethnicity Not Available</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>5650</td>\n", + " <td>7.63</td>\n", + " <td>68000</td>\n", + " <td>124</td>\n", + " <td>1712</td>\n", + " <td>2104</td>\n", + " <td>36</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>2020</td>\n", + " <td>549300FX7K8PTEQUU487</td>\n", + " <td>33460</td>\n", + " <td>WI</td>\n", + " <td>55109.0</td>\n", + " <td>5.510912e+10</td>\n", + " <td>C</td>\n", + " <td>VA:First Lien</td>\n", + " <td>Single Family (1-4 Units):Site-Built</td>\n", + " <td>Not Hispanic or Latino</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>7210</td>\n", + " <td>4.36</td>\n", + " <td>97300</td>\n", + " <td>96</td>\n", + " <td>2101</td>\n", + " <td>2566</td>\n", + " <td>22</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 99 columns</p>\n", + "</div>" + ], + "text/plain": [ + " activity_year lei derived_msa-md state_code \\\n", + "0 2020 549300FX7K8PTEQUU487 31540 WI \n", + "1 2020 549300FX7K8PTEQUU487 99999 WI \n", + "2 2020 549300FX7K8PTEQUU487 99999 WI \n", + "3 2020 549300FX7K8PTEQUU487 99999 WI \n", + "4 2020 549300FX7K8PTEQUU487 33460 WI \n", + "\n", + " county_code census_tract conforming_loan_limit \\\n", + "0 55025.0 5.502500e+10 C \n", + "1 55013.0 5.501397e+10 C \n", + "2 55127.0 5.512700e+10 C \n", + "3 55127.0 5.512700e+10 C \n", + "4 55109.0 5.510912e+10 C \n", + "\n", + " derived_loan_product_type derived_dwelling_category \\\n", + "0 Conventional:First Lien Single Family (1-4 Units):Site-Built \n", + "1 Conventional:First Lien Single Family (1-4 Units):Site-Built \n", + "2 VA:First Lien Single Family (1-4 Units):Site-Built \n", + "3 Conventional:Subordinate Lien Single Family (1-4 Units):Site-Built \n", + "4 VA:First Lien Single Family (1-4 Units):Site-Built \n", + "\n", + " derived_ethnicity ... denial_reason-2 denial_reason-3 \\\n", + "0 Not Hispanic or Latino ... NaN NaN \n", + "1 Not Hispanic or Latino ... NaN NaN \n", + "2 Not Hispanic or Latino ... NaN NaN \n", + "3 Ethnicity Not Available ... NaN NaN \n", + "4 Not Hispanic or Latino ... NaN NaN \n", + "\n", + " denial_reason-4 tract_population tract_minority_population_percent \\\n", + "0 NaN 3572 41.15 \n", + "1 NaN 2333 9.90 \n", + "2 NaN 5943 13.26 \n", + "3 NaN 5650 7.63 \n", + "4 NaN 7210 4.36 \n", + "\n", + " ffiec_msa_md_median_family_income tract_to_msa_income_percentage \\\n", + "0 96600 64 \n", + "1 68000 87 \n", + "2 68000 104 \n", + "3 68000 124 \n", + "4 97300 96 \n", + "\n", + " tract_owner_occupied_units tract_one_to_four_family_homes \\\n", + "0 812 910 \n", + "1 1000 2717 \n", + "2 1394 1856 \n", + "3 1712 2104 \n", + "4 2101 2566 \n", + "\n", + " tract_median_age_of_housing_units \n", + "0 45 \n", + "1 34 \n", + "2 44 \n", + "3 36 \n", + "4 22 \n", + "\n", + "[5 rows x 99 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(5) # Top 5 rows within the DataFrame" + ] + }, + { + "cell_type": "markdown", + "id": "bad7dce4", + "metadata": {}, + "source": [ + "### How can we see all the column names?" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "d0a98751", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['activity_year', 'lei', 'derived_msa-md', 'state_code', 'county_code',\n", + " 'census_tract', 'conforming_loan_limit', 'derived_loan_product_type',\n", + " 'derived_dwelling_category', 'derived_ethnicity', 'derived_race',\n", + " 'derived_sex', 'action_taken', 'purchaser_type', 'preapproval',\n", + " 'loan_type', 'loan_purpose', 'lien_status', 'reverse_mortgage',\n", + " 'open-end_line_of_credit', 'business_or_commercial_purpose',\n", + " 'loan_amount', 'loan_to_value_ratio', 'interest_rate', 'rate_spread',\n", + " 'hoepa_status', 'total_loan_costs', 'total_points_and_fees',\n", + " 'origination_charges', 'discount_points', 'lender_credits', 'loan_term',\n", + " 'prepayment_penalty_term', 'intro_rate_period', 'negative_amortization',\n", + " 'interest_only_payment', 'balloon_payment',\n", + " 'other_nonamortizing_features', 'property_value', 'construction_method',\n", + " 'occupancy_type', 'manufactured_home_secured_property_type',\n", + " 'manufactured_home_land_property_interest', 'total_units',\n", + " 'multifamily_affordable_units', 'income', 'debt_to_income_ratio',\n", + " 'applicant_credit_score_type', 'co-applicant_credit_score_type',\n", + " 'applicant_ethnicity-1', 'applicant_ethnicity-2',\n", + " 'applicant_ethnicity-3', 'applicant_ethnicity-4',\n", + " 'applicant_ethnicity-5', 'co-applicant_ethnicity-1',\n", + " 'co-applicant_ethnicity-2', 'co-applicant_ethnicity-3',\n", + " 'co-applicant_ethnicity-4', 'co-applicant_ethnicity-5',\n", + " 'applicant_ethnicity_observed', 'co-applicant_ethnicity_observed',\n", + " 'applicant_race-1', 'applicant_race-2', 'applicant_race-3',\n", + " 'applicant_race-4', 'applicant_race-5', 'co-applicant_race-1',\n", + " 'co-applicant_race-2', 'co-applicant_race-3', 'co-applicant_race-4',\n", + " 'co-applicant_race-5', 'applicant_race_observed',\n", + " 'co-applicant_race_observed', 'applicant_sex', 'co-applicant_sex',\n", + " 'applicant_sex_observed', 'co-applicant_sex_observed', 'applicant_age',\n", + " 'co-applicant_age', 'applicant_age_above_62',\n", + " 'co-applicant_age_above_62', 'submission_of_application',\n", + " 'initially_payable_to_institution', 'aus-1', 'aus-2', 'aus-3', 'aus-4',\n", + " 'aus-5', 'denial_reason-1', 'denial_reason-2', 'denial_reason-3',\n", + " 'denial_reason-4', 'tract_population',\n", + " 'tract_minority_population_percent',\n", + " 'ffiec_msa_md_median_family_income', 'tract_to_msa_income_percentage',\n", + " 'tract_owner_occupied_units', 'tract_one_to_four_family_homes',\n", + " 'tract_median_age_of_housing_units'],\n", + " dtype='object')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "markdown", + "id": "26311838", + "metadata": {}, + "source": [ + "### How to extract `interest_rate`?" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "c4bae34a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 NaN\n", + "1 3.0\n", + "2 NaN\n", + "3 3.75\n", + "4 2.5\n", + " ... \n", + "468267 2.25\n", + "468268 2.5\n", + "468269 4.25\n", + "468270 NaN\n", + "468271 NaN\n", + "Name: interest_rate, Length: 468272, dtype: object" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"interest_rate\"] # observe that there are missing values" + ] + }, + { + "cell_type": "markdown", + "id": "148b1243", + "metadata": {}, + "source": [ + "### How to count unique values in a column `Series`?" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f310b537", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "interest_rate\n", + "Exempt 37959\n", + "3.0 22584\n", + "2.75 22480\n", + "3.25 21343\n", + "2.875 21201\n", + " ... \n", + "3.023 1\n", + "2.632 1\n", + "3.345 1\n", + "3.364 1\n", + "2.32 1\n", + "Name: count, Length: 2080, dtype: int64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"interest_rate\"].value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "e0da9824", + "metadata": {}, + "source": [ + "### Let's eliminiate the strings (Exempt) and missing values (NaN).\n", + "Let's try `pd.to_numeric(...)`. We need a way to specify that strings need to be converted into NaN values." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "3beaae6e", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Unable to parse string \"Exempt\" at position 1505", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32mlib.pyx:2368\u001b[0m, in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Unable to parse string \"Exempt\"", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[10], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_numeric\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minterest_rate\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m \n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m# TODO: open the documentation and figure out what parameter will help us\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Recall that we can press shift + tab after a function name to open the documentation\u001b[39;00m\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/tools/numeric.py:222\u001b[0m, in \u001b[0;36mto_numeric\u001b[0;34m(arg, errors, downcast, dtype_backend)\u001b[0m\n\u001b[1;32m 220\u001b[0m coerce_numeric \u001b[38;5;241m=\u001b[39m errors \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 221\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 222\u001b[0m values, new_mask \u001b[38;5;241m=\u001b[39m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmaybe_convert_numeric\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore[call-overload] # noqa: E501\u001b[39;49;00m\n\u001b[1;32m 223\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 224\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mset\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 225\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoerce_numeric\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoerce_numeric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 226\u001b[0m \u001b[43m \u001b[49m\u001b[43mconvert_to_masked_nullable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype_backend\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mno_default\u001b[49m\n\u001b[1;32m 227\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mvalues_dtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mStringDtype\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m):\n\u001b[1;32m 230\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", + "File \u001b[0;32mlib.pyx:2410\u001b[0m, in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Unable to parse string \"Exempt\" at position 1505" + ] + } + ], + "source": [ + "pd.to_numeric(df[\"interest_rate\"]) \n", + "# TODO: open the documentation and figure out what parameter will help us\n", + "# Recall that we can press shift + tab after a function name to open the documentation" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "9c342dce", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 NaN\n", + "1 3.00\n", + "2 NaN\n", + "3 3.75\n", + "4 2.50\n", + " ... \n", + "468267 2.25\n", + "468268 2.50\n", + "468269 4.25\n", + "468270 NaN\n", + "468271 NaN\n", + "Name: interest_rate, Length: 468272, dtype: float64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\")" + ] + }, + { + "cell_type": "markdown", + "id": "546b218b", + "metadata": {}, + "source": [ + "### Let's drop the NaN values and compute average interest rate." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "e4f21269", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1 3.00\n", + "3 3.75\n", + "4 2.50\n", + "5 2.75\n", + "7 3.75\n", + " ... \n", + "468265 4.50\n", + "468266 2.50\n", + "468267 2.25\n", + "468268 2.50\n", + "468269 4.25\n", + "Name: interest_rate, Length: 324658, dtype: float64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\").dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "113dd8a5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3.266264315063852" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\").dropna().mean()" + ] + }, + { + "cell_type": "markdown", + "id": "8bfd7d99-805b-4843-a3cc-4109e403d1f1", + "metadata": {}, + "source": [ + "### Clearing memory using re-assignment.\n", + "In python, you can clear memory used up for an object simply by getting rid of all the active references. But we cannot do that for the current notebook because we used \"df\" to perform other operations, so there are more than one active references. In fact, we don't even have access to some of the active references. In that case, you can only free up the memory after you \"shutdown\" the current notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "90a99ef9-6b88-4bc5-8b41-57bf64aab41a", + "metadata": {}, + "outputs": [], + "source": [ + "df = \"some string\"" + ] + }, + { + "cell_type": "markdown", + "id": "8b902599", + "metadata": {}, + "source": [ + "### How can we read the data without creating an uncompressed version called \"wi.csv\"?\n", + "\n", + "- Why would we want to do something like that?\n", + " 1. lower memory usage (we can try to load information on one loan at a time, instead of all the loans): that will still work for average interest rate computation\n", + " 2. lower storage usage (you can directly work with compressed data)\n", + " \n", + "**IMPORTANT**: do not run this cell code unless you shutdown the notebook - your kernel will crash (you will run out of memory space)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c59ae54", + "metadata": {}, + "outputs": [], + "source": [ + "# IMPORTANT: do not run this cell code unless you shutdown the notebook - your kernel will crash (you will run out of memory space)\n", + "f = open(\"wi.csv\")\n", + "# instead of passing relative path of file name, we can pass a file object instance reference\n", + "df = pd.read_csv(f) \n", + "f.close()" + ] + }, + { + "cell_type": "markdown", + "id": "36c1faa7-67f5-4135-9b89-1a5e897f5c68", + "metadata": {}, + "source": [ + "### Let's free up memory and delete \"wi.csv\"." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "4bd8134d-1dea-449d-bea6-34a62b6d38b7", + "metadata": {}, + "outputs": [], + "source": [ + "df = \"some string\"" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "8aad54fc-eb70-4e7d-bcbc-334cb81d11ac", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['total 21M',\n", + " 'drwxrwxr-x 2 gurmail.singh gurmail.singh 4.0K Jan 25 20:58 01-repro1',\n", + " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Jan 25 21:24 02-repro2',\n", + " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Feb 1 20:12 03-performance1',\n", + " 'drwxrwxr-x 3 gurmail.singh gurmail.singh 4.0K Feb 5 22:48 04-performance2',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 7.7K Jan 30 20:12 Untitled.ipynb',\n", + " 'drwxrwxr-x 2 gurmail.singh gurmail.singh 4.0K Jan 27 10:17 img',\n", + " '-rw------- 1 gurmail.singh gurmail.singh 21K Feb 6 13:56 nohup.out',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 6.6K Jan 30 19:16 out.mp4',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 122K Feb 5 21:27 reading1.ipynb',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 50K Feb 6 13:56 solution.ipynb',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 29K Jan 30 14:26 template_lec_001.ipynb',\n", + " '-rw-rw-r-- 1 gurmail.singh gurmail.singh 21M Feb 6 13:48 wi.zip',\n", + " '']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "check_output([\"rm\", \"wi.csv\"])\n", + "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "38c7d946", + "metadata": {}, + "source": [ + "### How can we read data directly from a zip file?\n", + "`zipfile.ZipFile(...)`" + ] + }, + { + "cell_type": "markdown", + "id": "7d81fe7c", + "metadata": {}, + "source": [ + "### Goals:\n", + "1. directly access the data without decompressing: `zipfile.ZipFile(...)` - saves storage space by directly opening a zip file\n", + "2. only look at one row at a time: `csv.DictReader(...)` - saves memory space by enabling us to read one row at a time (as `dict`)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2a158f48", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_12609/3313127204.py:5: DtypeWarning: Columns (22,23,24,26,27,28,29,30,31,32,33,38,43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(f)\n" + ] + } + ], + "source": [ + "# code for goal 1\n", + "zf = zipfile.ZipFile(\"wi.zip\")\n", + "f = zf.open(\"wi.csv\")\n", + "\n", + "df = pd.read_csv(f) \n", + "\n", + "f.close()\n", + "zf.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a5e7c032-0def-4eb2-b5b9-04f7240911f8", + "metadata": {}, + "outputs": [], + "source": [ + "# Free up the memory again\n", + "df = \"some string\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4fddde38", + "metadata": {}, + "outputs": [ + { + "ename": "Error", + "evalue": "iterator should return strings, not bytes (the file should be opened in text mode)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 7\u001b[0m\n\u001b[1;32m 3\u001b[0m f \u001b[38;5;241m=\u001b[39m zf\u001b[38;5;241m.\u001b[39mopen(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwi.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5\u001b[0m reader \u001b[38;5;241m=\u001b[39m csv\u001b[38;5;241m.\u001b[39mDictReader(f)\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m row \u001b[38;5;129;01min\u001b[39;00m reader:\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(row)\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", + "File \u001b[0;32m/usr/lib/python3.10/csv.py:110\u001b[0m, in \u001b[0;36mDictReader.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__next__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mline_num \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 109\u001b[0m \u001b[38;5;66;03m# Used only for its side effect.\u001b[39;00m\n\u001b[0;32m--> 110\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfieldnames\u001b[49m\n\u001b[1;32m 111\u001b[0m row \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreader)\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mline_num \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreader\u001b[38;5;241m.\u001b[39mline_num\n", + "File \u001b[0;32m/usr/lib/python3.10/csv.py:97\u001b[0m, in \u001b[0;36mDictReader.fieldnames\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fieldnames \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 97\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fieldnames \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreader\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n", + "\u001b[0;31mError\u001b[0m: iterator should return strings, not bytes (the file should be opened in text mode)" + ] + } + ], + "source": [ + "# code for goal 2 & goal 1\n", + "zf = zipfile.ZipFile(\"wi.zip\")\n", + "f = zf.open(\"wi.csv\")\n", + "\n", + "reader = csv.DictReader(f)\n", + "\n", + "for row in reader:\n", + " print(row)\n", + " break\n", + "\n", + "f.close()\n", + "zf.close()" + ] + }, + { + "cell_type": "markdown", + "id": "c1e9cfba", + "metadata": {}, + "source": [ + "### Let's learn more modes for `open` built-in function\n", + "- `open(..., mode=\"r\")` => text (default)\n", + "- `open(..., mode=\"rb\")` => bytes\n", + "- `zf.open(...)` => always bytes\n", + "\n", + "With `zipfile` module there isn't a way for us to specify that we need text.\n", + "\n", + "### `TextIOWrapper` inside `io` module enables us to convert `bytes` into `str`" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6f52b0ae", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'activity_year': '2020', 'lei': '549300FX7K8PTEQUU487', 'derived_msa-md': '31540', 'state_code': 'WI', 'county_code': '55025', 'census_tract': '55025002402', 'conforming_loan_limit': 'C', 'derived_loan_product_type': 'Conventional:First Lien', 'derived_dwelling_category': 'Single Family (1-4 Units):Site-Built', 'derived_ethnicity': 'Not Hispanic or Latino', 'derived_race': 'White', 'derived_sex': 'Male', 'action_taken': '3', 'purchaser_type': '0', 'preapproval': '2', 'loan_type': '1', 'loan_purpose': '4', 'lien_status': '1', 'reverse_mortgage': '2', 'open-end_line_of_credit': '1', 'business_or_commercial_purpose': '2', 'loan_amount': '225000.0', 'loan_to_value_ratio': '78.671', 'interest_rate': 'NA', 'rate_spread': 'NA', 'hoepa_status': '3', 'total_loan_costs': 'NA', 'total_points_and_fees': 'NA', 'origination_charges': 'NA', 'discount_points': 'NA', 'lender_credits': 'NA', 'loan_term': '360', 'prepayment_penalty_term': 'NA', 'intro_rate_period': '1', 'negative_amortization': '2', 'interest_only_payment': '2', 'balloon_payment': '2', 'other_nonamortizing_features': '2', 'property_value': '285000', 'construction_method': '1', 'occupancy_type': '1', 'manufactured_home_secured_property_type': '3', 'manufactured_home_land_property_interest': '5', 'total_units': '1', 'multifamily_affordable_units': 'NA', 'income': '0', 'debt_to_income_ratio': '>60%', 'applicant_credit_score_type': '1', 'co-applicant_credit_score_type': '10', 'applicant_ethnicity-1': '2', 'applicant_ethnicity-2': '', 'applicant_ethnicity-3': '', 'applicant_ethnicity-4': '', 'applicant_ethnicity-5': '', 'co-applicant_ethnicity-1': '5', 'co-applicant_ethnicity-2': '', 'co-applicant_ethnicity-3': '', 'co-applicant_ethnicity-4': '', 'co-applicant_ethnicity-5': '', 'applicant_ethnicity_observed': '2', 'co-applicant_ethnicity_observed': '4', 'applicant_race-1': '5', 'applicant_race-2': '', 'applicant_race-3': '', 'applicant_race-4': '', 'applicant_race-5': '', 'co-applicant_race-1': '8', 'co-applicant_race-2': '', 'co-applicant_race-3': '', 'co-applicant_race-4': '', 'co-applicant_race-5': '', 'applicant_race_observed': '2', 'co-applicant_race_observed': '4', 'applicant_sex': '1', 'co-applicant_sex': '5', 'applicant_sex_observed': '2', 'co-applicant_sex_observed': '4', 'applicant_age': '55-64', 'co-applicant_age': '9999', 'applicant_age_above_62': 'Yes', 'co-applicant_age_above_62': 'NA', 'submission_of_application': '1', 'initially_payable_to_institution': '1', 'aus-1': '6', 'aus-2': '', 'aus-3': '', 'aus-4': '', 'aus-5': '', 'denial_reason-1': '1', 'denial_reason-2': '', 'denial_reason-3': '', 'denial_reason-4': '', 'tract_population': '3572', 'tract_minority_population_percent': '41.1499999999999986', 'ffiec_msa_md_median_family_income': '96600', 'tract_to_msa_income_percentage': '64', 'tract_owner_occupied_units': '812', 'tract_one_to_four_family_homes': '910', 'tract_median_age_of_housing_units': '45'}\n" + ] + } + ], + "source": [ + "# code for goal 2 & goal 1\n", + "zf = zipfile.ZipFile(\"wi.zip\")\n", + "f = zf.open(\"wi.csv\")\n", + "\n", + "reader = csv.DictReader(TextIOWrapper(f))\n", + "\n", + "for row in reader:\n", + " print(row)\n", + " break\n", + "\n", + "f.close()\n", + "zf.close()" + ] + }, + { + "cell_type": "markdown", + "id": "3f138285", + "metadata": {}, + "source": [ + "### Let's go back to calculating average interest rate.\n", + "- Algorithm / Pseudocode steps:\n", + " 1. print \"interest rate\" and type of \"interest rate\"\n", + " 2. convert \"interest rate\" into `float` - how can we handle errors? `try` ... `except` ... (*IMPORTANT*: always have your `except` block catch specific exceptions)\n", + " 3. calculate running total, count for each row of data\n", + " 4. calculate average" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6be87e1a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3.266264315063054" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "zf = zipfile.ZipFile(\"wi.zip\")\n", + "f = zf.open(\"wi.csv\")\n", + "\n", + "total = 0\n", + "count = 0\n", + "\n", + "reader = csv.DictReader(TextIOWrapper(f))\n", + "\n", + "for row in reader:\n", + " try:\n", + " total += float(row[\"interest_rate\"])\n", + " count += 1\n", + " except ValueError:\n", + " pass # do nothing\n", + "\n", + "f.close()\n", + "zf.close()\n", + "\n", + "total / count" + ] + }, + { + "cell_type": "markdown", + "id": "c000b405", + "metadata": {}, + "source": [ + "### Let's generalize the code to read \"interest rate\" into a function.\n", + "\n", + "- This does make things worse because we are going back to reading all the data before doing the computation.\n", + "- But this sets us up to learn about generators." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "aaf33408", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3.266264315063054" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def get_rates_v1():\n", + " rates = []\n", + " \n", + " zf = zipfile.ZipFile(\"wi.zip\")\n", + " f = zf.open(\"wi.csv\")\n", + "\n", + " reader = csv.DictReader(TextIOWrapper(f))\n", + " \n", + " for row in reader:\n", + " try:\n", + " rates.append(float(row[\"interest_rate\"]))\n", + " except ValueError:\n", + " pass # do nothing\n", + "\n", + " f.close()\n", + " zf.close()\n", + " \n", + " return rates\n", + "\n", + "rates = get_rates_v1()\n", + "sum(rates) / len(rates)" + ] + }, + { + "cell_type": "markdown", + "id": "6eced472", + "metadata": {}, + "source": [ + "### Using a generator\n", + "- `yield` each value\n", + "- use `next` to get the next value => internally `for` loop invokes `next` for each iteration" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "22c8a41b-1356-4aad-8f92-c3db19ce4c03", + "metadata": {}, + "outputs": [], + "source": [ + "def get_rates_v2():\n", + " print(\"Starting generator\")\n", + " \n", + " zf = zipfile.ZipFile(\"wi.zip\")\n", + " f = zf.open(\"wi.csv\")\n", + " \n", + " reader = csv.DictReader(TextIOWrapper(f))\n", + " \n", + " for row in reader:\n", + " try:\n", + " yield float(row[\"interest_rate\"])\n", + " except ValueError:\n", + " pass # do nothing\n", + "\n", + " f.close()\n", + " zf.close()\n", + "\n", + "rates = get_rates_v2()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "509a76db", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting generator\n" + ] + }, + { + "data": { + "text/plain": [ + "3.0" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "next(rates) # gives us the next value" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "8d6fc162", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3.75" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "next(rates) # gives us the next value" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "a863d383", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2.5" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "next(rates) # gives us the next value" + ] + }, + { + "cell_type": "markdown", + "id": "69848d5a", + "metadata": {}, + "source": [ + "### Let's use `for` loop to keep getting all the rates.\n", + "\n", + "- `len` function doesn't work with generators\n", + "- indexing doesn't work with generators" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "d31f9f9f", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "object of type 'generator' has no len()", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mrates\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: object of type 'generator' has no len()" + ] + } + ], + "source": [ + "len(rates)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "0c73ca83", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "'generator' object is not subscriptable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[17], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mrates\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m4\u001b[39;49m\u001b[43m]\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: 'generator' object is not subscriptable" + ] + } + ], + "source": [ + "rates[4]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "bdddd858", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting generator\n" + ] + }, + { + "data": { + "text/plain": [ + "3.266264315063054" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rates = get_rates_v2()\n", + "\n", + "total = 0\n", + "count = 0\n", + "\n", + "for rate in rates: # keeps calling next(rates) to get values from yield\n", + " total += rate\n", + " count += 1\n", + " \n", + "total / count" + ] + }, + { + "cell_type": "markdown", + "id": "c93d951c", + "metadata": {}, + "source": [ + "This approach doesn't work for median calculation. Why? Remember we have to sort, so we need all values in memory." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "ace73646-1b9c-45d6-881f-832a5e170766", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting generator\n" + ] + } + ], + "source": [ + "rates = list(get_rates_v2())\n", + "rates.sort()" + ] + }, + { + "cell_type": "markdown", + "id": "18d452b1", + "metadata": {}, + "source": [ + "# OOP 1: Classes\n", + "\n", + "- Creating new types using classes\n", + "- Types have specific attributes and methods (special functions)\n", + "- Using new types (classes), we can create object instances of those types\n", + "- class creation and instantiation syntax: \n", + "```python\n", + "class Person:\n", + " # some code\n", + "p1 = Person() # object instantiation using constructor\n", + "p2 = Person() # object instantiation using constructor\n", + "```\n", + "- attribute / method access syntax:\n", + "```python\n", + "p1.fname = \"...\" # attribute initialization\n", + "p1.lname = \"...\" # attribute initialization\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "350c45f3", + "metadata": {}, + "outputs": [], + "source": [ + "p1 = {\"fname\": \"Bob\", \"lname\": \"Baker\"}\n", + "\n", + "p2 = dict()\n", + "p2[\"fname\"] = \"Cindy\"\n", + "p2[\"lname\"] = \"Cooper\"\n", + "\n", + "p3 = {\"Fname\": \"Alice\", \"lname\": \"Anderson\"}\n", + "\n", + "# TODO: Let's define a Person class\n", + "class Person:\n", + " pass\n", + "\n", + "p4 = Person()\n", + "p4.fname = \"Meena\"\n", + "p4.lname = \"Syamkumar\"" + ] + }, + { + "cell_type": "markdown", + "id": "691f9170", + "metadata": {}, + "source": [ + "### Let's create a `Dog` class." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "2b462cdd", + "metadata": {}, + "outputs": [], + "source": [ + "class Dog:\n", + " pass # eventually we will learn how to write code inside a class" + ] + }, + { + "cell_type": "markdown", + "id": "a8a6311b", + "metadata": {}, + "source": [ + "### Let's create `Dog` object instances and add attributes." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "5c65755d", + "metadata": {}, + "outputs": [], + "source": [ + "dog1 = Dog()\n", + "dog1.name = \"Jimmy\"\n", + "dog1.age = 2" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "3a7a3c62", + "metadata": {}, + "outputs": [], + "source": [ + "dog2 = Dog()\n", + "dog2.name = \"Buster\"" + ] + }, + { + "cell_type": "markdown", + "id": "244ddc14", + "metadata": {}, + "source": [ + "### Let's define a `speak` function that will make the `Dog` bark.\n", + "- Algorithm / pseudocode steps:\n", + " 1. puppies bark thrice (age < 2)\n", + " 2. dogs bark once" + ] + }, + { + "cell_type": "markdown", + "id": "fe66d607", + "metadata": {}, + "source": [ + "### `f-strings`\n", + "\n", + "- aka formatted string literals\n", + "- easier and quicker way of formatting `str` than `str.format(...)` method\n", + "\n", + "- Syntax: \n", + "```python\n", + "f\"{} ...\"\n", + "```\n", + "- inside `{}` you can specify a variable or even call a function or a method" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "a9e7f827", + "metadata": {}, + "outputs": [], + "source": [ + "def speak(dog):\n", + " if dog.age < 2:\n", + " print(f\"{dog.name}: bark bark bark!\")\n", + " else:\n", + " print(f\"{dog.name}: bark!\")" + ] + }, + { + "cell_type": "markdown", + "id": "33815738", + "metadata": {}, + "source": [ + "### Let's invoke `speak` for dog1 and dog2." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "d92fc8a4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Jimmy: bark!\n" + ] + } + ], + "source": [ + "speak(dog1)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "1de32f33", + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'Dog' object has no attribute 'age'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[26], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mspeak\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdog2\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[24], line 2\u001b[0m, in \u001b[0;36mspeak\u001b[0;34m(dog)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mspeak\u001b[39m(dog):\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mdog\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mage\u001b[49m \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdog\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: bark bark bark!\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "\u001b[0;31mAttributeError\u001b[0m: 'Dog' object has no attribute 'age'" + ] + } + ], + "source": [ + "speak(dog2)" + ] + }, + { + "cell_type": "markdown", + "id": "4424e7cd", + "metadata": {}, + "source": [ + "### How can we standardize the attribute initialization to avoid bugs?\n", + "\n", + "- Eventually we will learn about how to define methods inside the class, which will include `__init__` method.\n", + "- For now, let's define an `init` function." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "9420fad8", + "metadata": {}, + "outputs": [], + "source": [ + "def init(dog, name, how_old):\n", + " dog.name = name\n", + " dog.age = how_old" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "10397e6b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark: bark!\n" + ] + } + ], + "source": [ + "dog2 = Dog()\n", + "init(dog2, \"Spark\", 10)\n", + "speak(dog2)" + ] + }, + { + "cell_type": "markdown", + "id": "780546f0", + "metadata": {}, + "source": [ + "### What if there are two `speak` functions? Let's define a Cat class and corresponding `speak` function." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "68f59e90", + "metadata": {}, + "outputs": [], + "source": [ + "class Cat:\n", + " pass\n", + "\n", + "cat1 = Cat()\n", + "\n", + "def speak(cat):\n", + " \"\"\"\n", + " Cats meow!\n", + " \"\"\"\n", + " print(\"meow!\")" + ] + }, + { + "cell_type": "markdown", + "id": "3e59a318", + "metadata": {}, + "source": [ + "### What will be the output of the below function calls?" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "d99e9a47", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "meow!\n", + "meow!\n", + "meow!\n" + ] + } + ], + "source": [ + "speak(dog1)\n", + "speak(dog2)\n", + "speak(cat1)" + ] + }, + { + "cell_type": "markdown", + "id": "f28b640d", + "metadata": {}, + "source": [ + "### We lost the previous definition of the `speak` function because it is a function. What if `speak` were a method instead?\n", + "\n", + "### **IMPORTANT**: it is not recommended to re-define same `class`. This is shown only for example purposes. You must always go back to the original cell and update the definition there." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "245b0f24", + "metadata": {}, + "outputs": [], + "source": [ + "class Dog:\n", + " # regular method\n", + " def init(dog, name, how_old): \n", + " dog.name = name\n", + " dog.age = how_old\n", + " \n", + " # regular method\n", + " def speak(dog):\n", + " \"\"\"\n", + " Puppies (age < 2) bark thrice, whereas dogs bark once.\n", + " \"\"\"\n", + " if dog.age < 2:\n", + " #print(dog.name + \": bark bark bark!\")\n", + " print(f\"{dog.name}: bark bark bark!\")\n", + " else:\n", + " #print(dog.name + \": bark!\")\n", + " print(f\"{dog.name}: bark!\")\n", + "\n", + "class Cat:\n", + " def speak(cat):\n", + " \"\"\"\n", + " Cats meow!\n", + " \"\"\"\n", + " print(\"meow!\")\n", + " \n", + "# Let's create object instances\n", + "dog1 = Dog()\n", + "Dog.init(dog1, \"Jimmy\", 1)\n", + "\n", + "dog2 = Dog()\n", + "Dog.init(dog2, \"Buster\", 10)\n", + "\n", + "cat1 = Cat()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "537614a0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Jimmy: bark bark bark!\n", + "Buster: bark!\n", + "meow!\n" + ] + } + ], + "source": [ + "# speak now is a method, so we need to use . attribute operator for invocation\n", + "Dog.speak(dog1)\n", + "Dog.speak(dog2)\n", + "Cat.speak(cat1)" + ] + }, + { + "cell_type": "markdown", + "id": "0f175520", + "metadata": {}, + "source": [ + "### Type-based dispatch" + ] + }, + { + "cell_type": "markdown", + "id": "300fd480", + "metadata": {}, + "source": [ + "#### Let's create a list of animals and print `type` of each animal." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "87d16d48", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<class '__main__.Dog'>\n", + "<class '__main__.Dog'>\n", + "<class '__main__.Cat'>\n" + ] + } + ], + "source": [ + "animals = [dog1, dog2, cat1]\n", + "\n", + "for animal in animals:\n", + " print(type(animal))" + ] + }, + { + "cell_type": "markdown", + "id": "f82dacea", + "metadata": {}, + "source": [ + "#### Even though `type` output displays additional details, in essense type is just name of the class: `Dog`, `Cat`, etc.,." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "a88e4859", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(dog1) == Dog" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "3a765271", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(cat1) == Cat" + ] + }, + { + "cell_type": "markdown", + "id": "81bb5aee", + "metadata": {}, + "source": [ + "#### Let's invoke speak for all animals." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "a6696743", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Jimmy: bark bark bark!\n", + "Buster: bark!\n", + "meow!\n" + ] + } + ], + "source": [ + "# v1: bad version\n", + "for animal in animals:\n", + " if type(animal) == Dog:\n", + " Dog.speak(animal)\n", + " elif type(animal) == Cat:\n", + " Cat.speak(animal)\n", + " # this conditional will keep growing as we add more and \n", + " # more animal classes!" + ] + }, + { + "cell_type": "markdown", + "id": "920e7522", + "metadata": {}, + "source": [ + "#### Here is a slightly better version" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "31100bf6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Jimmy: bark bark bark!\n", + "Buster: bark!\n", + "meow!\n" + ] + } + ], + "source": [ + "for animal in animals:\n", + " type(animal).speak(animal)" + ] + }, + { + "cell_type": "markdown", + "id": "8ef0373c", + "metadata": {}, + "source": [ + "#### Notice how the animal is redundant. There is a better way to invoke methods.\n", + "\n", + "- Syntax: `obj_ref.method()`\n", + "- `obj_ref` itself will be the first argument to the method." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "50e4d329", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Jimmy: bark bark bark!\n", + "Buster: bark!\n", + "meow!\n" + ] + } + ], + "source": [ + "for animal in animals:\n", + " # this is equivalent to type(animal).speak(animal)\n", + " animal.speak()" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "000bd875", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Dog.speak() takes 1 positional argument but 2 were given", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[44], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdog1\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mspeak\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhello\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m# Observe how TypeError says 1 positional argument expected\u001b[39;00m\n", + "\u001b[0;31mTypeError\u001b[0m: Dog.speak() takes 1 positional argument but 2 were given" + ] + } + ], + "source": [ + "dog1.speak(\"hello\")\n", + "# Observe how TypeError says 1 positional argument expected" + ] + }, + { + "cell_type": "markdown", + "id": "887fabde-5e7a-4fb7-bb3b-cc69a2b7b6a4", + "metadata": {}, + "source": [ + "## `self`\n", + "\n", + "- refers to the current object instance (aka receiver) inside a class\n", + "- attribute access inside the class **must** always use `self.<attribute>` syntax" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "152f774c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Jimmy: bark bark bark!\n", + "Buster: bark!\n" + ] + } + ], + "source": [ + "class Dog:\n", + " # regular method\n", + " def init(self, name, how_old): \n", + " self.name = name\n", + " self.age = how_old\n", + " \n", + " # regular method\n", + " def speak(self):\n", + " \"\"\"\n", + " Puppies (age < 2) bark thrice, whereas dogs bark once.\n", + " \"\"\"\n", + " if self.age < 2:\n", + " #print(dog.name + \": bark bark bark!\")\n", + " print(f\"{self.name}: bark bark bark!\")\n", + " else:\n", + " #print(dog.name + \": bark!\")\n", + " print(f\"{self.name}: bark!\")\n", + "\n", + "\n", + "# Let's create Dog object instances\n", + "dog1 = Dog() \n", + "Dog.init(dog1, \"Jimmy\", 1)\n", + "\n", + "dog2 = Dog()\n", + "init(dog2, \"Buster\", 10)\n", + "\n", + "# Invoke speak for dog1 and dog2\n", + "dog1.speak()\n", + "dog2.speak()" + ] + }, + { + "cell_type": "markdown", + "id": "d6284d16", + "metadata": {}, + "source": [ + "# OOP: Special Methods\n", + "\n", + "\"Special methods\" is a technical term referring to methods that get called automatically. In Python, they usually begin and end with double underscores.\n", + "- **Note:** you could define a regular method with `__<method>__`." + ] + }, + { + "cell_type": "markdown", + "id": "6454cdaf", + "metadata": {}, + "source": [ + "### `__init__` special method (aka Constructor)\n", + "\n", + "- automatically invoked when creating an object instance\n", + "- only one possible constructor in Python" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "d7820ba3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating a dog!\n", + "Creating a dog!\n", + "Jimmy: bark bark bark!\n", + "Buster: bark!\n" + ] + } + ], + "source": [ + "# This is the correct and final version of Dog class\n", + "class Dog:\n", + " # special method\n", + " def __init__(self, name, how_old): \n", + " print(\"Creating a dog!\")\n", + " self.name = name\n", + " self.age = how_old\n", + " \n", + " # regular method\n", + " def speak(self):\n", + " \"\"\"\n", + " Puppies (age < 2) bark thrice, whereas dogs bark once.\n", + " \"\"\"\n", + " if self.age < 2:\n", + " #print(dog.name + \": bark bark bark!\")\n", + " print(f\"{self.name}: bark bark bark!\")\n", + " else:\n", + " #print(dog.name + \": bark!\")\n", + " print(f\"{self.name}: bark!\")\n", + "\n", + "\n", + "# Let's create Dog object instances\n", + "dog1 = Dog(\"Jimmy\", 1)\n", + "dog2 = Dog(\"Buster\", 10)\n", + "\n", + "# Invoke speak for dog1 and dog2\n", + "dog1.speak()\n", + "dog2.speak()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/lecture_material/05-oop1/template_lec_001.ipynb b/lecture_material/05-oop1/template_lec_001.ipynb new file mode 100644 index 0000000..1cf280b --- /dev/null +++ b/lecture_material/05-oop1/template_lec_001.ipynb @@ -0,0 +1,1157 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1a6cc54c", + "metadata": {}, + "source": [ + "# Performance 3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "783117c5-146f-454a-963e-ed2873b8a6d3", + "metadata": {}, + "outputs": [], + "source": [ + "# known import statements\n", + "import pandas as pd\n", + "import csv\n", + "from subprocess import check_output\n", + "\n", + "# new import statements\n", + "import zipfile\n", + "from io import TextIOWrapper" + ] + }, + { + "cell_type": "markdown", + "id": "66db2ad0", + "metadata": {}, + "source": [ + "### Let's take a look at the files inside the current working directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6cef713e", + "metadata": {}, + "outputs": [], + "source": [ + "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "c76f819d", + "metadata": {}, + "source": [ + "### Let's `unzip` \"wi.zip\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e87ec01", + "metadata": {}, + "outputs": [], + "source": [ + "check_output([\"unzip\", \"wi.zip\"])" + ] + }, + { + "cell_type": "markdown", + "id": "274fa49a", + "metadata": {}, + "source": [ + "### Let's take a look at the files inside the current working directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2da3cd0", + "metadata": {}, + "outputs": [], + "source": [ + "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "90b11343", + "metadata": {}, + "source": [ + "### Traditional way of reading data using pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3175526", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"wi.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13e6e034", + "metadata": {}, + "outputs": [], + "source": [ + "df.head(5) # Top 5 rows within the DataFrame" + ] + }, + { + "cell_type": "markdown", + "id": "5c79984c", + "metadata": {}, + "source": [ + "### How can we see all the column names?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08d9501d", + "metadata": {}, + "outputs": [], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "markdown", + "id": "a519f383", + "metadata": {}, + "source": [ + "### How to extract `interest_rate`?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "400b885c", + "metadata": {}, + "outputs": [], + "source": [ + "df # observe that there are missing values" + ] + }, + { + "cell_type": "markdown", + "id": "890c6d2c", + "metadata": {}, + "source": [ + "### How to count unique values in a column `Series`?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca108069", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"interest_rate\"]" + ] + }, + { + "cell_type": "markdown", + "id": "715853ee", + "metadata": {}, + "source": [ + "### Let's eliminiate the strings (Exempt) and missing values (NaN).\n", + "Let's try `pd.to_numeric(...)`. We need a way to specify that strings need to be converted into NaN values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69b00b57", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# TODO: open the documentation and figure out what parameter will help us\n", + "# Recall that we can press shift + tab after a function name to open the documentation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c51c8952", + "metadata": {}, + "outputs": [], + "source": [ + "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\")" + ] + }, + { + "cell_type": "markdown", + "id": "60344f67", + "metadata": {}, + "source": [ + "### Let's drop the NaN values and compute average interest rate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0de2786f", + "metadata": {}, + "outputs": [], + "source": [ + "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba74550a", + "metadata": {}, + "outputs": [], + "source": [ + "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\").dropna()" + ] + }, + { + "cell_type": "markdown", + "id": "ed860a64-6d5f-4169-9217-073e54979028", + "metadata": {}, + "source": [ + "### Clearing memory using re-assignment.\n", + "In python, you can clear memory used up for an object simply by getting rid of all the active references. But we cannot do that for the current notebook because we used \"df\" to perform other operations, so there are more than one active references. In fact, we don't even have access to some of the active references. In that case, you can only free up the memory after you \"shutdown\" the current notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f28cb0b7-beb5-44e0-a4ad-9d10dbd427bd", + "metadata": {}, + "outputs": [], + "source": [ + "df = \"some string\"" + ] + }, + { + "cell_type": "markdown", + "id": "d0d56c80", + "metadata": {}, + "source": [ + "### How can we read the data without creating an uncompressed version called \"wi.csv\"?\n", + "\n", + "- Why would we want to do something like that?\n", + " 1. lower memory usage (we can try to load information on one loan at a time, instead of all the loans): that will still work for average interest rate computation\n", + " 2. lower storage usage (you can directly work with compressed data)\n", + " \n", + "**IMPORTANT**: do not run this cell code unless you shutdown the notebook - your kernel will crash (you will run out of memory space)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b24c0723", + "metadata": {}, + "outputs": [], + "source": [ + "# IMPORTANT: do not run this cell code unless you shutdown the notebook - your kernel will crash (you will run out of memory space)\n", + "f = open(\"wi.csv\")\n", + "# instead of passing relative path of file name, we can pass a file object instance reference\n", + "df = pd.read_csv(f) \n", + "f.close()" + ] + }, + { + "cell_type": "markdown", + "id": "f8a9da46-53a5-4a7a-9d0e-6102aed9ea13", + "metadata": {}, + "source": [ + "### Let's free up memory and delete \"wi.csv\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "457ac313-eb56-445b-b899-65a060ac8b07", + "metadata": {}, + "outputs": [], + "source": [ + "df = \"some string\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f26e069f-d5c5-46e0-bc91-8a8e55aae427", + "metadata": {}, + "outputs": [], + "source": [ + "check_output([\"rm\", \"wi.csv\"])\n", + "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "16a150df", + "metadata": {}, + "source": [ + "### How can we read data directly from a zip file?\n", + "`zipfile.ZipFile(...)`" + ] + }, + { + "cell_type": "markdown", + "id": "0eff57fa", + "metadata": {}, + "source": [ + "### Goals:\n", + "1. directly access the data without decompressing: `zipfile.ZipFile(...)` - saves storage space by directly opening a zip file\n", + "2. only look at one row at a time: `csv.DictReader(...)` - saves memory space by enabling us to read one row at a time (as `dict`)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a85ca8a2", + "metadata": {}, + "outputs": [], + "source": [ + "# code for goal 1\n", + "\n", + "f = open(\"wi.csv\")\n", + "df = pd.read_csv(f) \n", + "f.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4eee3add-cb92-4654-ab29-2b5ae68c10d5", + "metadata": {}, + "outputs": [], + "source": [ + "# Free up the memory again\n", + "df = \"some string\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a194b9b", + "metadata": {}, + "outputs": [], + "source": [ + "# code for goal 2 & goal 1\n", + "zf = zipfile.ZipFile(\"wi.zip\")\n", + "f = zf.open(\"wi.csv\")\n", + "\n", + "df = pd.read_csv(f) \n", + "\n", + "f.close()\n", + "zf.close()" + ] + }, + { + "cell_type": "markdown", + "id": "ca72997f", + "metadata": {}, + "source": [ + "### Let's learn more modes for `open` built-in function\n", + "- `open(..., mode=\"r\")` => text (default)\n", + "- `open(..., mode=\"rb\")` => bytes\n", + "- `zf.open(...)` => always bytes\n", + "\n", + "With `zipfile` module there isn't a way for us to specify that we need text.\n", + "\n", + "### `TextIOWrapper` inside `io` module enables us to convert `bytes` into `str`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ccc4954f", + "metadata": {}, + "outputs": [], + "source": [ + "# code for goal 2 & goal 1\n", + "zf = zipfile.ZipFile(\"wi.zip\")\n", + "f = zf.open(\"wi.csv\")\n", + "\n", + "reader = csv.DictReader(f)\n", + "\n", + "for row in reader:\n", + " print(row)\n", + " break\n", + "\n", + "f.close()\n", + "zf.close()" + ] + }, + { + "cell_type": "markdown", + "id": "8af3971c", + "metadata": {}, + "source": [ + "### Let's go back to calculating average interest rate.\n", + "- Algorithm / Pseudocode steps:\n", + " 1. print \"interest rate\" and type of \"interest rate\"\n", + " 2. convert \"interest rate\" into `float` - how can we handle errors? `try` ... `except` ... (*IMPORTANT*: always have your `except` block catch specific exceptions)\n", + " 3. calculate running total, count for each row of data\n", + " 4. calculate average" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ad2b730", + "metadata": {}, + "outputs": [], + "source": [ + "zf = zipfile.ZipFile(\"wi.zip\")\n", + "f = zf.open(\"wi.csv\")\n", + "\n", + "reader = csv.DictReader(TextIOWrapper(f))\n", + "\n", + "for row in reader:\n", + " print(row)\n", + " break\n", + "\n", + "f.close()\n", + "zf.close()\n", + "\n", + "total / count" + ] + }, + { + "cell_type": "markdown", + "id": "51e7081b", + "metadata": {}, + "source": [ + "### Let's generalize the code to read \"interest rate\" into a function.\n", + "\n", + "- This does make things worse because we are going back to reading all the data before doing the computation.\n", + "- But this sets us up to learn about generators." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79334762", + "metadata": {}, + "outputs": [], + "source": [ + "def get_rates_v1():\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "id": "41f4bfe4", + "metadata": {}, + "source": [ + "### Using a generator\n", + "- `yield` each value\n", + "- use `next` to get the next value => internally `for` loop invokes `next` for each iteration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22c8a41b-1356-4aad-8f92-c3db19ce4c03", + "metadata": {}, + "outputs": [], + "source": [ + "def get_rates_v2():\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe2f060b", + "metadata": {}, + "outputs": [], + "source": [ + "next(rates) # gives us the next value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea5c188d", + "metadata": {}, + "outputs": [], + "source": [ + "next(rates) # gives us the next value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3101b71c", + "metadata": {}, + "outputs": [], + "source": [ + "next(rates) # gives us the next value" + ] + }, + { + "cell_type": "markdown", + "id": "b47aab60", + "metadata": {}, + "source": [ + "### Let's use `for` loop to keep getting all the rates.\n", + "\n", + "- `len` function doesn't work with generators\n", + "- indexing doesn't work with generators" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a074669", + "metadata": {}, + "outputs": [], + "source": [ + "len(rates)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d536ef4d", + "metadata": {}, + "outputs": [], + "source": [ + "rates[4]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f5dbd13", + "metadata": {}, + "outputs": [], + "source": [ + "rates = get_rates_v2()\n", + "\n", + "total = 0\n", + "count = 0\n", + "\n", + " # keeps calling next(rates) to get values from yield\n", + "\n", + " \n", + "total / count" + ] + }, + { + "cell_type": "markdown", + "id": "ff9e77e5", + "metadata": {}, + "source": [ + "This approach doesn't work for median calculation. Why? Remember we have to sort, so we need all values in memory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ace73646-1b9c-45d6-881f-832a5e170766", + "metadata": {}, + "outputs": [], + "source": [ + "rates = list(get_rates_v2())\n", + "rates.sort()" + ] + }, + { + "cell_type": "markdown", + "id": "f6ff9655", + "metadata": {}, + "source": [ + "# OOP 1: Classes\n", + "\n", + "- Creating new types using classes\n", + "- Types have specific attributes and methods (special functions)\n", + "- Using new types (classes), we can create object instances of those types\n", + "- class creation and instantiation syntax: \n", + "```python\n", + "class Person:\n", + " # some code\n", + "p1 = Person() # object instantiation using constructor\n", + "p2 = Person() # object instantiation using constructor\n", + "```\n", + "- attribute / method access syntax:\n", + "```python\n", + "p1.fname = \"...\" # attribute initialization\n", + "p1.lname = \"...\" # attribute initialization\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "9b693dec", + "metadata": {}, + "source": [ + "#### PythonTutor example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8372eccf", + "metadata": {}, + "outputs": [], + "source": [ + "p1 = {\"fname\": \"Bob\", \"lname\": \"Baker\"}\n", + "\n", + "p2 = dict()\n", + "p2[\"fname\"] = \"Cindy\"\n", + "p2[\"lname\"] = \"Cooper\"\n", + "\n", + "p3 = {\"Fname\": \"Alice\", \"lname\": \"Anderson\"}\n", + "\n", + "# TODO: Let's define a Person class" + ] + }, + { + "cell_type": "markdown", + "id": "6427a25c", + "metadata": {}, + "source": [ + "### Let's create a `Dog` class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5154fd9e", + "metadata": {}, + "outputs": [], + "source": [ + "# eventually we will learn how to write code inside a class" + ] + }, + { + "cell_type": "markdown", + "id": "7d3a0c2a", + "metadata": {}, + "source": [ + "### Let's create `Dog` object instances and add attributes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db5558da", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0600c5b8", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "93b2e3a9", + "metadata": {}, + "source": [ + "### Let's define a `speak` function that will make the `Dog` bark.\n", + "- Algorithm / pseudocode steps:\n", + " 1. puppies bark thrice (age < 2)\n", + " 2. dogs bark once" + ] + }, + { + "cell_type": "markdown", + "id": "2abbf327", + "metadata": {}, + "source": [ + "### `f-strings`\n", + "\n", + "- aka formatted string literals\n", + "- easier and quicker way of formatting `str` than `str.format(...)` method\n", + "\n", + "- Syntax: \n", + "```python\n", + "f\"{} ...\"\n", + "```\n", + "- inside `{}` you can specify a variable or even call a function or a method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1592767d", + "metadata": {}, + "outputs": [], + "source": [ + "def speak(dog):\n", + " if dog.age < 2:\n", + " pass\n", + " else:\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "id": "b3cbd57e", + "metadata": {}, + "source": [ + "### Let's invoke `speak` for dog1 and dog2." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f9fa462", + "metadata": {}, + "outputs": [], + "source": [ + "speak(dog1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61b30ab8", + "metadata": {}, + "outputs": [], + "source": [ + "speak(dog2)" + ] + }, + { + "cell_type": "markdown", + "id": "38c31465", + "metadata": {}, + "source": [ + "### How can we standardize the attribute initialization to avoid bugs?\n", + "\n", + "- Eventually we will learn about how to define methods inside the class, which will include `__init__` method.\n", + "- For now, let's define an `init` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2682bdb7", + "metadata": {}, + "outputs": [], + "source": [ + "def init(???):\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cc6c3a1", + "metadata": {}, + "outputs": [], + "source": [ + "dog2 = Dog()\n", + "init(???)\n", + "speak(dog2)" + ] + }, + { + "cell_type": "markdown", + "id": "79bfd963", + "metadata": {}, + "source": [ + "### What if there are two `speak` functions? Let's define a Cat class and corresponding `speak` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "595a80e8", + "metadata": {}, + "outputs": [], + "source": [ + "class Cat:\n", + " pass\n", + "\n", + "cat1 = Cat()\n", + "\n", + "def speak(cat):\n", + " \"\"\"\n", + " Cats meow!\n", + " \"\"\"\n", + " print(\"meow!\")" + ] + }, + { + "cell_type": "markdown", + "id": "b41e88d5", + "metadata": {}, + "source": [ + "### What will be the output of the below function calls?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c800545f", + "metadata": {}, + "outputs": [], + "source": [ + "speak(dog1)\n", + "speak(dog2)\n", + "speak(cat1)" + ] + }, + { + "cell_type": "markdown", + "id": "bf69e4ff", + "metadata": {}, + "source": [ + "### We lost the previous definition of the `speak` function because it is a function. What if `speak` were a method instead?" + ] + }, + { + "cell_type": "markdown", + "id": "be101310", + "metadata": {}, + "source": [ + "### **IMPORTANT**: it is not recommended to re-define same `class`. This is shown only for example purposes. You must always go back to the original cell and update the definition there." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ef5425a", + "metadata": {}, + "outputs": [], + "source": [ + "class Dog:\n", + " pass # eventually we will learn how to write code inside a class\n", + "\n", + "# Regular function that accepts an object instance of the new type\n", + "def speak(dog):\n", + " \"\"\"\n", + " Puppies (age < 2) bark thrice, whereas dogs bark once.\n", + " \"\"\"\n", + " if dog.age < 2:\n", + " #print(dog.name + \": bark bark bark!\")\n", + " print(f\"{dog.name}: bark bark bark!\")\n", + " else:\n", + " #print(dog.name + \": bark!\")\n", + " print(f\"{dog.name}: bark!\")\n", + " \n", + "# Regular function that accepts an object instance of the new type along with attribute values\n", + "def init(dog, name, how_old):\n", + " dog.name = name\n", + " dog.age = how_old\n", + " \n", + "class Cat:\n", + " pass\n", + "\n", + "def speak(cat):\n", + " \"\"\"\n", + " Cats meow!\n", + " \"\"\"\n", + " print(\"meow!\")\n", + " \n", + "# Let's create object instances\n", + "dog1 = Dog()\n", + "init(dog1, \"Jimmy\", 1)\n", + "\n", + "dog2 = Dog()\n", + "init(dog2, \"Buster\", 10)\n", + "\n", + "cat1 = Cat()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d24e147a", + "metadata": {}, + "outputs": [], + "source": [ + "# speak now is a method, so we need to use . attribute operator for invocation\n", + "speak(dog1)\n", + "speak(dog2)\n", + "speak(cat1)" + ] + }, + { + "cell_type": "markdown", + "id": "90da8bc6", + "metadata": {}, + "source": [ + "### Type-based dispatch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92f3c459", + "metadata": {}, + "outputs": [], + "source": [ + "animals = [dog1, dog2, cat1]\n", + "\n", + "for animal in animals:\n", + " print(type(animal))" + ] + }, + { + "cell_type": "markdown", + "id": "9680a740", + "metadata": {}, + "source": [ + "#### Even though `type` output displays additional details, in essense type is just name of the class: `Dog`, `Cat`, etc.,." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7787e0fe", + "metadata": {}, + "outputs": [], + "source": [ + "type(dog1) == Dog" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "49f9f9b7", + "metadata": {}, + "outputs": [], + "source": [ + "type(cat1) == Cat" + ] + }, + { + "cell_type": "markdown", + "id": "1e280958", + "metadata": {}, + "source": [ + "#### Let's invoke speak for all animals." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3cd9d9fb", + "metadata": {}, + "outputs": [], + "source": [ + "# v1: bad version\n", + "for animal in animals:\n", + " if type(animal) == Dog:\n", + " Dog.speak(animal)\n", + " elif type(animal) == Cat:\n", + " Cat.speak(animal)\n", + " # this conditional will keep growing as we add more and \n", + " # more animal classes!" + ] + }, + { + "cell_type": "markdown", + "id": "6cda6524", + "metadata": {}, + "source": [ + "#### Here is a slightly better version" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "423471d5", + "metadata": {}, + "outputs": [], + "source": [ + "for animal in animals:\n", + " type(animal).speak(animal)" + ] + }, + { + "cell_type": "markdown", + "id": "f1f81bb9", + "metadata": {}, + "source": [ + "### Method invocation (most commonly used syntax)\n", + "\n", + "Notice how the animal is redundant. There is a better way to invoke methods.\n", + "\n", + "- Syntax: `obj_ref.method()`\n", + "- `obj_ref` itself will be the first argument to the method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78ece030", + "metadata": {}, + "outputs": [], + "source": [ + "for animal in animals:\n", + " # this is equivalent to type(animal).speak(animal)" + ] + }, + { + "cell_type": "markdown", + "id": "3a264105", + "metadata": {}, + "source": [ + "#### Let's try passing an argument to `speak` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4270ed59", + "metadata": {}, + "outputs": [], + "source": [ + "dog1.speak(\"hello\")\n", + "# Observe how TypeError says 1 positional argument expected" + ] + }, + { + "cell_type": "markdown", + "id": "dc77d748", + "metadata": {}, + "source": [ + "## `self`\n", + "\n", + "- dedicated special variable that refers to the current object instance (aka receiver) inside a class\n", + "- attribute access inside the class **must** always use `self.<attribute>` syntax" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9af178a3", + "metadata": {}, + "outputs": [], + "source": [ + "class Dog:\n", + " # regular method\n", + " def init(dog, name, how_old): \n", + " dog.name = name\n", + " dog.age = how_old\n", + " \n", + " # regular method\n", + " def speak(dog):\n", + " \"\"\"\n", + " Puppies (age < 2) bark thrice, whereas dogs bark once.\n", + " \"\"\"\n", + " if dog.age < 2:\n", + " #print(dog.name + \": bark bark bark!\")\n", + " print(f\"{dog.name}: bark bark bark!\")\n", + " else:\n", + " #print(dog.name + \": bark!\")\n", + " print(f\"{dog.name}: bark!\")\n", + "\n", + "# Let's create Dog object instances\n", + "dog1 = Dog() \n", + "dog1.init(\"Jimmy\", 1)\n", + "\n", + "dog2 = Dog()\n", + "dog2.init(\"Buster\", 10)\n", + "\n", + "# Invoke speak for dog1 and dog2\n", + "dog1.speak()\n", + "dog2.speak()" + ] + }, + { + "cell_type": "markdown", + "id": "32631b72", + "metadata": {}, + "source": [ + "# OOP: Special Methods\n", + "\n", + "\"Special methods\" is a technical term referring to methods that get called automatically. In Python, they usually begin and end with double underscores.\n", + "- **Note:** you could define a regular method with `__<method>__`." + ] + }, + { + "cell_type": "markdown", + "id": "2f7640b1", + "metadata": {}, + "source": [ + "### `__init__` special method (aka Constructor)\n", + "\n", + "- automatically invoked when creating an object instance\n", + "- only one possible constructor in Python" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66ac7d8c", + "metadata": {}, + "outputs": [], + "source": [ + "# This is the correct and final version of Dog class\n", + "class Dog:\n", + " # regular method\n", + " def init(self, name, how_old): \n", + " self.name = name\n", + " self.age = how_old\n", + " \n", + " # regular method\n", + " def speak(self):\n", + " \"\"\"\n", + " Puppies (age < 2) bark thrice, whereas dogs bark once.\n", + " \"\"\"\n", + " if self.age < 2:\n", + " #print(dog.name + \": bark bark bark!\")\n", + " print(f\"{self.name}: bark bark bark!\")\n", + " else:\n", + " #print(dog.name + \": bark!\")\n", + " print(f\"{self.name}: bark!\")\n", + "\n", + "\n", + "# Let's create Dog object instances\n", + "dog1 = Dog() \n", + "dog1.init(\"Jimmy\", 1)\n", + "\n", + "dog2 = Dog()\n", + "dog2.init(\"Buster\", 10)\n", + "\n", + "# Invoke speak for dog1 and dog2\n", + "dog1.speak()\n", + "dog2.speak()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/lecture_material/05-oop1/template_lec_002.ipynb b/lecture_material/05-oop1/template_lec_002.ipynb new file mode 100644 index 0000000..cb2f581 --- /dev/null +++ b/lecture_material/05-oop1/template_lec_002.ipynb @@ -0,0 +1,1157 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1a6cc54c", + "metadata": {}, + "source": [ + "# Performance 3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "783117c5-146f-454a-963e-ed2873b8a6d3", + "metadata": {}, + "outputs": [], + "source": [ + "# known import statements\n", + "import pandas as pd\n", + "import csv\n", + "from subprocess import check_output\n", + "\n", + "# new import statements\n", + "import zipfile\n", + "from io import TextIOWrapper" + ] + }, + { + "cell_type": "markdown", + "id": "66db2ad0", + "metadata": {}, + "source": [ + "### Let's take a look at the files inside the current working directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6cef713e", + "metadata": {}, + "outputs": [], + "source": [ + "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "c76f819d", + "metadata": {}, + "source": [ + "### Let's `unzip` \"wi.zip\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e87ec01", + "metadata": {}, + "outputs": [], + "source": [ + "check_output([\"unzip\", \"wi.zip\"])" + ] + }, + { + "cell_type": "markdown", + "id": "274fa49a", + "metadata": {}, + "source": [ + "### Let's take a look at the files inside the current working directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2da3cd0", + "metadata": {}, + "outputs": [], + "source": [ + "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "90b11343", + "metadata": {}, + "source": [ + "### Traditional way of reading data using pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3175526", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"wi.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13e6e034", + "metadata": {}, + "outputs": [], + "source": [ + "df.head(5) # Top 5 rows within the DataFrame" + ] + }, + { + "cell_type": "markdown", + "id": "5c79984c", + "metadata": {}, + "source": [ + "### How can we see all the column names?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08d9501d", + "metadata": {}, + "outputs": [], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "markdown", + "id": "a519f383", + "metadata": {}, + "source": [ + "### How to extract `interest_rate`?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "400b885c", + "metadata": {}, + "outputs": [], + "source": [ + "df # observe that there are missing values" + ] + }, + { + "cell_type": "markdown", + "id": "890c6d2c", + "metadata": {}, + "source": [ + "### How to count unique values in a column `Series`?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca108069", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"interest_rate\"]" + ] + }, + { + "cell_type": "markdown", + "id": "715853ee", + "metadata": {}, + "source": [ + "### Let's eliminiate the strings (Exempt) and missing values (NaN).\n", + "Let's try `pd.to_numeric(...)`. We need a way to specify that strings need to be converted into NaN values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69b00b57", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# TODO: open the documentation and figure out what parameter will help us\n", + "# Recall that we can press shift + tab after a function name to open the documentation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c51c8952", + "metadata": {}, + "outputs": [], + "source": [ + "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\")" + ] + }, + { + "cell_type": "markdown", + "id": "60344f67", + "metadata": {}, + "source": [ + "### Let's drop the NaN values and compute average interest rate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0de2786f", + "metadata": {}, + "outputs": [], + "source": [ + "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba74550a", + "metadata": {}, + "outputs": [], + "source": [ + "pd.to_numeric(df[\"interest_rate\"], errors=\"coerce\").dropna()" + ] + }, + { + "cell_type": "markdown", + "id": "ed860a64-6d5f-4169-9217-073e54979028", + "metadata": {}, + "source": [ + "### Clearing memory using re-assignment.\n", + "In python, you can clear memory used up for an object simply by getting rid of all the active references. But we cannot do that for the current notebook because we used \"df\" to perform other operations, so there are more than one active references. In fact, we don't even have access to some of the active references. In that case, you can only free up the memory after you \"shutdown\" the current notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f28cb0b7-beb5-44e0-a4ad-9d10dbd427bd", + "metadata": {}, + "outputs": [], + "source": [ + "df = \"some string\"" + ] + }, + { + "cell_type": "markdown", + "id": "d0d56c80", + "metadata": {}, + "source": [ + "### How can we read the data without creating an uncompressed version called \"wi.csv\"?\n", + "\n", + "- Why would we want to do something like that?\n", + " 1. lower memory usage (we can try to load information on one loan at a time, instead of all the loans): that will still work for average interest rate computation\n", + " 2. lower storage usage (you can directly work with compressed data)\n", + " \n", + "**IMPORTANT**: do not run this cell code unless you shutdown the notebook - your kernel will crash (you will run out of memory space)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b24c0723", + "metadata": {}, + "outputs": [], + "source": [ + "# IMPORTANT: do not run this cell code unless you shutdown the notebook - your kernel will crash (you will run out of memory space)\n", + "f = open(\"wi.csv\")\n", + "# instead of passing relative path of file name, we can pass a file object instance reference\n", + "df = pd.read_csv(f) \n", + "f.close()" + ] + }, + { + "cell_type": "markdown", + "id": "f8a9da46-53a5-4a7a-9d0e-6102aed9ea13", + "metadata": {}, + "source": [ + "### Let's free up memory and delete \"wi.csv\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "457ac313-eb56-445b-b899-65a060ac8b07", + "metadata": {}, + "outputs": [], + "source": [ + "df = \"some string\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f26e069f-d5c5-46e0-bc91-8a8e55aae427", + "metadata": {}, + "outputs": [], + "source": [ + "check_output([\"rm\", \"wi.csv\"])\n", + "str(check_output([\"ls\", \"-lh\"]), encoding=\"utf-8\").split(\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "16a150df", + "metadata": {}, + "source": [ + "### How can we read data directly from a zip file?\n", + "`zipfile.ZipFile(...)`" + ] + }, + { + "cell_type": "markdown", + "id": "0eff57fa", + "metadata": {}, + "source": [ + "### Goals:\n", + "1. directly access the data without decompressing: `zipfile.ZipFile(...)` - saves storage space by directly opening a zip file\n", + "2. only look at one row at a time: `csv.DictReader(...)` - saves memory space by enabling us to read one row at a time (as `dict`)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a85ca8a2", + "metadata": {}, + "outputs": [], + "source": [ + "# code for goal 1\n", + "\n", + "f = open(\"wi.csv\")\n", + "df = pd.read_csv(f) \n", + "f.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4eee3add-cb92-4654-ab29-2b5ae68c10d5", + "metadata": {}, + "outputs": [], + "source": [ + "# Free up the memory again\n", + "df = \"some string\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a194b9b", + "metadata": {}, + "outputs": [], + "source": [ + "# code for goal 2 & goal 1\n", + "zf = zipfile.ZipFile(\"wi.zip\")\n", + "f = zf.open(\"wi.csv\")\n", + "\n", + "df = pd.read_csv(f) \n", + "\n", + "f.close()\n", + "zf.close()" + ] + }, + { + "cell_type": "markdown", + "id": "ca72997f", + "metadata": {}, + "source": [ + "### Let's learn more modes for `open` built-in function\n", + "- `open(..., mode=\"r\")` => text (default)\n", + "- `open(..., mode=\"rb\")` => bytes\n", + "- `zf.open(...)` => always bytes\n", + "\n", + "With `zipfile` module there isn't a way for us to specify that we need text.\n", + "\n", + "### `TextIOWrapper` inside `io` module enables us to convert `bytes` into `str`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ccc4954f", + "metadata": {}, + "outputs": [], + "source": [ + "# code for goal 2 & goal 1\n", + "zf = zipfile.ZipFile(\"wi.zip\")\n", + "f = zf.open(\"wi.csv\")\n", + "\n", + "reader = csv.DictReader(f)\n", + "\n", + "for row in reader:\n", + " print(row)\n", + " break\n", + "\n", + "f.close()\n", + "zf.close()" + ] + }, + { + "cell_type": "markdown", + "id": "8af3971c", + "metadata": {}, + "source": [ + "### Let's go back to calculating average interest rate.\n", + "- Algorithm / Pseudocode steps:\n", + " 1. print \"interest rate\" and type of \"interest rate\"\n", + " 2. convert \"interest rate\" into `float` - how can we handle errors? `try` ... `except` ... (*IMPORTANT*: always have your `except` block catch specific exceptions)\n", + " 3. calculate running total, count for each row of data\n", + " 4. calculate average" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ad2b730", + "metadata": {}, + "outputs": [], + "source": [ + "zf = zipfile.ZipFile(\"wi.zip\")\n", + "f = zf.open(\"wi.csv\")\n", + "\n", + "reader = csv.DictReader(TextIOWrapper(f))\n", + "\n", + "for row in reader:\n", + " print(row)\n", + " break\n", + "\n", + "f.close()\n", + "zf.close()\n", + "\n", + "total / count" + ] + }, + { + "cell_type": "markdown", + "id": "51e7081b", + "metadata": {}, + "source": [ + "### Let's generalize the code to read \"interest rate\" into a function.\n", + "\n", + "- This does make things worse because we are going back to reading all the data before doing the computation.\n", + "- But this sets us up to learn about generators." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79334762", + "metadata": {}, + "outputs": [], + "source": [ + "def get_rates_v1():\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "id": "41f4bfe4", + "metadata": {}, + "source": [ + "### Using a generator\n", + "- `yield` each value\n", + "- use `next` to get the next value => internally `for` loop invokes `next` for each iteration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22c8a41b-1356-4aad-8f92-c3db19ce4c03", + "metadata": {}, + "outputs": [], + "source": [ + "def get_rates_v2():\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe2f060b", + "metadata": {}, + "outputs": [], + "source": [ + "next(rates) # gives us the next value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea5c188d", + "metadata": {}, + "outputs": [], + "source": [ + "next(rates) # gives us the next value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3101b71c", + "metadata": {}, + "outputs": [], + "source": [ + "next(rates) # gives us the next value" + ] + }, + { + "cell_type": "markdown", + "id": "b47aab60", + "metadata": {}, + "source": [ + "### Let's use `for` loop to keep getting all the rates.\n", + "\n", + "- `len` function doesn't work with generators\n", + "- indexing doesn't work with generators" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a074669", + "metadata": {}, + "outputs": [], + "source": [ + "len(rates)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d536ef4d", + "metadata": {}, + "outputs": [], + "source": [ + "rates[4]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f5dbd13", + "metadata": {}, + "outputs": [], + "source": [ + "rates = get_rates_v2()\n", + "\n", + "total = 0\n", + "count = 0\n", + "\n", + " # keeps calling next(rates) to get values from yield\n", + "\n", + " \n", + "total / count" + ] + }, + { + "cell_type": "markdown", + "id": "ff9e77e5", + "metadata": {}, + "source": [ + "This approach doesn't work for median calculation. Why? Remember we have to sort, so we need all values in memory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ace73646-1b9c-45d6-881f-832a5e170766", + "metadata": {}, + "outputs": [], + "source": [ + "rates = list(get_rates_v2())\n", + "rates.sort()" + ] + }, + { + "cell_type": "markdown", + "id": "f6ff9655", + "metadata": {}, + "source": [ + "# OOP 1: Classes\n", + "\n", + "- Creating new types using classes\n", + "- Types have specific attributes and methods (special functions)\n", + "- Using new types (classes), we can create object instances of those types\n", + "- class creation and instantiation syntax: \n", + "```python\n", + "class Person:\n", + " # some code\n", + "p1 = Person() # object instantiation using constructor\n", + "p2 = Person() # object instantiation using constructor\n", + "```\n", + "- attribute / method access syntax:\n", + "```python\n", + "p1.fname = \"...\" # attribute initialization\n", + "p1.lname = \"...\" # attribute initialization\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0593b013", + "metadata": {}, + "source": [ + "#### PythonTutor example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a99b7194", + "metadata": {}, + "outputs": [], + "source": [ + "p1 = {\"fname\": \"Bob\", \"lname\": \"Baker\"}\n", + "\n", + "p2 = dict()\n", + "p2[\"fname\"] = \"Cindy\"\n", + "p2[\"lname\"] = \"Cooper\"\n", + "\n", + "p3 = {\"Fname\": \"Alice\", \"lname\": \"Anderson\"}\n", + "\n", + "# TODO: Let's define a Person class" + ] + }, + { + "cell_type": "markdown", + "id": "6427a25c", + "metadata": {}, + "source": [ + "### Let's create a `Dog` class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5154fd9e", + "metadata": {}, + "outputs": [], + "source": [ + "# eventually we will learn how to write code inside a class" + ] + }, + { + "cell_type": "markdown", + "id": "7d3a0c2a", + "metadata": {}, + "source": [ + "### Let's create `Dog` object instances and add attributes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db5558da", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0600c5b8", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "93b2e3a9", + "metadata": {}, + "source": [ + "### Let's define a `speak` function that will make the `Dog` bark.\n", + "- Algorithm / pseudocode steps:\n", + " 1. puppies bark thrice (age < 2)\n", + " 2. dogs bark once" + ] + }, + { + "cell_type": "markdown", + "id": "1084ec49", + "metadata": {}, + "source": [ + "### `f-strings`\n", + "\n", + "- aka formatted string literals\n", + "- easier and quicker way of formatting `str` than `str.format(...)` method\n", + "\n", + "- Syntax: \n", + "```python\n", + "f\"{} ...\"\n", + "```\n", + "- inside `{}` you can specify a variable or even call a function or a method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1592767d", + "metadata": {}, + "outputs": [], + "source": [ + "def speak(dog):\n", + " if dog.age < 2:\n", + " pass\n", + " else:\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "id": "b3cbd57e", + "metadata": {}, + "source": [ + "### Let's invoke `speak` for dog1 and dog2." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f9fa462", + "metadata": {}, + "outputs": [], + "source": [ + "speak(dog1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61b30ab8", + "metadata": {}, + "outputs": [], + "source": [ + "speak(dog2)" + ] + }, + { + "cell_type": "markdown", + "id": "38c31465", + "metadata": {}, + "source": [ + "### How can we standardize the attribute initialization to avoid bugs?\n", + "\n", + "- Eventually we will learn about how to define methods inside the class, which will include `__init__` method.\n", + "- For now, let's define an `init` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2682bdb7", + "metadata": {}, + "outputs": [], + "source": [ + "def init(???):\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cc6c3a1", + "metadata": {}, + "outputs": [], + "source": [ + "dog2 = Dog()\n", + "init(???)\n", + "speak(dog2)" + ] + }, + { + "cell_type": "markdown", + "id": "144913e6", + "metadata": {}, + "source": [ + "### What if there are two `speak` functions? Let's define a Cat class and corresponding `speak` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb5d407e", + "metadata": {}, + "outputs": [], + "source": [ + "class Cat:\n", + " pass\n", + "\n", + "cat1 = Cat()\n", + "\n", + "def speak(cat):\n", + " \"\"\"\n", + " Cats meow!\n", + " \"\"\"\n", + " print(\"meow!\")" + ] + }, + { + "cell_type": "markdown", + "id": "ab8106b8", + "metadata": {}, + "source": [ + "### What will be the output of the below function calls?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "747a8cc1", + "metadata": {}, + "outputs": [], + "source": [ + "speak(dog1)\n", + "speak(dog2)\n", + "speak(cat1)" + ] + }, + { + "cell_type": "markdown", + "id": "94dae812", + "metadata": {}, + "source": [ + "### We lost the previous definition of the `speak` function because it is a function. What if `speak` were a method instead?" + ] + }, + { + "cell_type": "markdown", + "id": "51c27380", + "metadata": {}, + "source": [ + "### **IMPORTANT**: it is not recommended to re-define same `class`. This is shown only for example purposes. You must always go back to the original cell and update the definition there." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d311c1fb", + "metadata": {}, + "outputs": [], + "source": [ + "class Dog:\n", + " pass # eventually we will learn how to write code inside a class\n", + "\n", + "# Regular function that accepts an object instance of the new type\n", + "def speak(dog):\n", + " \"\"\"\n", + " Puppies (age < 2) bark thrice, whereas dogs bark once.\n", + " \"\"\"\n", + " if dog.age < 2:\n", + " #print(dog.name + \": bark bark bark!\")\n", + " print(f\"{dog.name}: bark bark bark!\")\n", + " else:\n", + " #print(dog.name + \": bark!\")\n", + " print(f\"{dog.name}: bark!\")\n", + " \n", + "# Regular function that accepts an object instance of the new type along with attribute values\n", + "def init(dog, name, how_old):\n", + " dog.name = name\n", + " dog.age = how_old\n", + " \n", + "class Cat:\n", + " pass\n", + "\n", + "def speak(cat):\n", + " \"\"\"\n", + " Cats meow!\n", + " \"\"\"\n", + " print(\"meow!\")\n", + " \n", + "# Let's create object instances\n", + "dog1 = Dog()\n", + "init(dog1, \"Jimmy\", 1)\n", + "\n", + "dog2 = Dog()\n", + "init(dog2, \"Buster\", 10)\n", + "\n", + "cat1 = Cat()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62165284", + "metadata": {}, + "outputs": [], + "source": [ + "# speak now is a method, so we need to use . attribute operator for invocation\n", + "speak(dog1)\n", + "speak(dog2)\n", + "speak(cat1)" + ] + }, + { + "cell_type": "markdown", + "id": "809c208e", + "metadata": {}, + "source": [ + "### Type-based dispatch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f930cc92", + "metadata": {}, + "outputs": [], + "source": [ + "animals = [dog1, dog2, cat1]\n", + "\n", + "for animal in animals:\n", + " print(type(animal))" + ] + }, + { + "cell_type": "markdown", + "id": "44b76bbb", + "metadata": {}, + "source": [ + "#### Even though `type` output displays additional details, in essense type is just name of the class: `Dog`, `Cat`, etc.,." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc89bc34", + "metadata": {}, + "outputs": [], + "source": [ + "type(dog1) == Dog" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1dff233", + "metadata": {}, + "outputs": [], + "source": [ + "type(cat1) == Cat" + ] + }, + { + "cell_type": "markdown", + "id": "c9311d53", + "metadata": {}, + "source": [ + "#### Let's invoke speak for all animals." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1f6f851", + "metadata": {}, + "outputs": [], + "source": [ + "# v1: bad version\n", + "for animal in animals:\n", + " if type(animal) == Dog:\n", + " Dog.speak(animal)\n", + " elif type(animal) == Cat:\n", + " Cat.speak(animal)\n", + " # this conditional will keep growing as we add more and \n", + " # more animal classes!" + ] + }, + { + "cell_type": "markdown", + "id": "fc2d9ce4", + "metadata": {}, + "source": [ + "#### Here is a slightly better version" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8d69a30", + "metadata": {}, + "outputs": [], + "source": [ + "for animal in animals:\n", + " type(animal).speak(animal)" + ] + }, + { + "cell_type": "markdown", + "id": "187b4671", + "metadata": {}, + "source": [ + "### Method invocation (most commonly used syntax)\n", + "\n", + "Notice how the animal is redundant. There is a better way to invoke methods.\n", + "\n", + "- Syntax: `obj_ref.method()`\n", + "- `obj_ref` itself will be the first argument to the method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d5f3794", + "metadata": {}, + "outputs": [], + "source": [ + "for animal in animals:\n", + " # this is equivalent to type(animal).speak(animal)" + ] + }, + { + "cell_type": "markdown", + "id": "3c7ddcd5", + "metadata": {}, + "source": [ + "#### Let's try passing an argument to `speak` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b55d4a90", + "metadata": {}, + "outputs": [], + "source": [ + "dog1.speak(\"hello\")\n", + "# Observe how TypeError says 1 positional argument expected" + ] + }, + { + "cell_type": "markdown", + "id": "c201d284", + "metadata": {}, + "source": [ + "## `self`\n", + "\n", + "- dedicated special variable that refers to the current object instance (aka receiver) inside a class\n", + "- attribute access inside the class **must** always use `self.<attribute>` syntax" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50ce0774", + "metadata": {}, + "outputs": [], + "source": [ + "class Dog:\n", + " # regular method\n", + " def init(dog, name, how_old): \n", + " dog.name = name\n", + " dog.age = how_old\n", + " \n", + " # regular method\n", + " def speak(dog):\n", + " \"\"\"\n", + " Puppies (age < 2) bark thrice, whereas dogs bark once.\n", + " \"\"\"\n", + " if dog.age < 2:\n", + " #print(dog.name + \": bark bark bark!\")\n", + " print(f\"{dog.name}: bark bark bark!\")\n", + " else:\n", + " #print(dog.name + \": bark!\")\n", + " print(f\"{dog.name}: bark!\")\n", + "\n", + "# Let's create Dog object instances\n", + "dog1 = Dog() \n", + "dog1.init(\"Jimmy\", 1)\n", + "\n", + "dog2 = Dog()\n", + "dog2.init(\"Buster\", 10)\n", + "\n", + "# Invoke speak for dog1 and dog2\n", + "dog1.speak()\n", + "dog2.speak()" + ] + }, + { + "cell_type": "markdown", + "id": "f801c756", + "metadata": {}, + "source": [ + "# OOP: Special Methods\n", + "\n", + "\"Special methods\" is a technical term referring to methods that get called automatically. In Python, they usually begin and end with double underscores.\n", + "- **Note:** you could define a regular method with `__<method>__`." + ] + }, + { + "cell_type": "markdown", + "id": "67b222d8", + "metadata": {}, + "source": [ + "### `__init__` special method (aka Constructor)\n", + "\n", + "- automatically invoked when creating an object instance\n", + "- only one possible constructor in Python" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c71681c9", + "metadata": {}, + "outputs": [], + "source": [ + "# This is the correct and final version of Dog class\n", + "class Dog:\n", + " # regular method\n", + " def init(self, name, how_old): \n", + " self.name = name\n", + " self.age = how_old\n", + " \n", + " # regular method\n", + " def speak(self):\n", + " \"\"\"\n", + " Puppies (age < 2) bark thrice, whereas dogs bark once.\n", + " \"\"\"\n", + " if self.age < 2:\n", + " #print(dog.name + \": bark bark bark!\")\n", + " print(f\"{self.name}: bark bark bark!\")\n", + " else:\n", + " #print(dog.name + \": bark!\")\n", + " print(f\"{self.name}: bark!\")\n", + "\n", + "\n", + "# Let's create Dog object instances\n", + "dog1 = Dog() \n", + "dog1.init(\"Jimmy\", 1)\n", + "\n", + "dog2 = Dog()\n", + "dog2.init(\"Buster\", 10)\n", + "\n", + "# Invoke speak for dog1 and dog2\n", + "dog1.speak()\n", + "dog2.speak()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} -- GitLab