diff --git a/lecture_material/05-oop1/in_class_demo_lec2.ipynb b/lecture_material/05-oop1/in_class_demo_lec2.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..53f96f226f206c006c04c483fd94a9b13c5979ad --- /dev/null +++ b/lecture_material/05-oop1/in_class_demo_lec2.ipynb @@ -0,0 +1,1224 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "3ad30ce8-1eca-4633-bc69-9ccbe0180e8e", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "52e6c912-27c8-4aa2-a3c7-780cc70299a6", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_16573/3756477020.py:1: DtypeWarning: Columns (22,23,24,26,27,28,29,30,31,32,33,38,43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(\"wi.csv\")\n" + ] + } + ], + "source": [ + "df = pd.read_csv(\"wi.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b069d5d4-4e22-4321-a1db-e47e4d46afd9", + "metadata": {}, + "outputs": [], + "source": [ + "df1 = df" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4d6fd21d-fa12-4eac-a58b-e92344e492ee", + "metadata": {}, + "outputs": [], + "source": [ + "df = 123" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7ccc7e53-cae0-46af-88e5-77ffffc6195b", + "metadata": {}, + "outputs": [], + "source": [ + "df1 = None" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "05a3e388-839a-497d-ac60-e1478f802d70", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_16573/3756477020.py:1: DtypeWarning: Columns (22,23,24,26,27,28,29,30,31,32,33,38,43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(\"wi.csv\")\n" + ] + } + ], + "source": [ + "df = pd.read_csv(\"wi.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "65431158-8919-402c-8dcd-d850ff2f1891", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>activity_year</th>\n", + " <th>lei</th>\n", + " <th>derived_msa-md</th>\n", + " <th>state_code</th>\n", + " <th>county_code</th>\n", + " <th>census_tract</th>\n", + " <th>conforming_loan_limit</th>\n", + " <th>derived_loan_product_type</th>\n", + " <th>derived_dwelling_category</th>\n", + " <th>derived_ethnicity</th>\n", + " <th>...</th>\n", + " <th>denial_reason-2</th>\n", + " <th>denial_reason-3</th>\n", + " <th>denial_reason-4</th>\n", + " <th>tract_population</th>\n", + " <th>tract_minority_population_percent</th>\n", + " <th>ffiec_msa_md_median_family_income</th>\n", + " <th>tract_to_msa_income_percentage</th>\n", + " <th>tract_owner_occupied_units</th>\n", + " <th>tract_one_to_four_family_homes</th>\n", + " <th>tract_median_age_of_housing_units</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>2020</td>\n", + " <td>549300FX7K8PTEQUU487</td>\n", + " <td>31540</td>\n", + " <td>WI</td>\n", + " <td>55025.0</td>\n", + " <td>5.502500e+10</td>\n", + " <td>C</td>\n", + " <td>Conventional:First Lien</td>\n", + " <td>Single Family (1-4 Units):Site-Built</td>\n", + " <td>Not Hispanic or Latino</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>3572</td>\n", + " <td>41.15</td>\n", + " <td>96600</td>\n", + " <td>64</td>\n", + " <td>812</td>\n", + " <td>910</td>\n", + " <td>45</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2020</td>\n", + " <td>549300FX7K8PTEQUU487</td>\n", + " <td>99999</td>\n", + " <td>WI</td>\n", + " <td>55013.0</td>\n", + " <td>5.501397e+10</td>\n", + " <td>C</td>\n", + " <td>Conventional:First Lien</td>\n", + " <td>Single Family (1-4 Units):Site-Built</td>\n", + " <td>Not Hispanic or Latino</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2333</td>\n", + " <td>9.90</td>\n", + " <td>68000</td>\n", + " <td>87</td>\n", + " <td>1000</td>\n", + " <td>2717</td>\n", + " <td>34</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>2 rows × 99 columns</p>\n", + "</div>" + ], + "text/plain": [ + " activity_year lei derived_msa-md state_code \\\n", + "0 2020 549300FX7K8PTEQUU487 31540 WI \n", + "1 2020 549300FX7K8PTEQUU487 99999 WI \n", + "\n", + " county_code census_tract conforming_loan_limit derived_loan_product_type \\\n", + "0 55025.0 5.502500e+10 C Conventional:First Lien \n", + "1 55013.0 5.501397e+10 C Conventional:First Lien \n", + "\n", + " derived_dwelling_category derived_ethnicity ... \\\n", + "0 Single Family (1-4 Units):Site-Built Not Hispanic or Latino ... \n", + "1 Single Family (1-4 Units):Site-Built Not Hispanic or Latino ... \n", + "\n", + " denial_reason-2 denial_reason-3 denial_reason-4 tract_population \\\n", + "0 NaN NaN NaN 3572 \n", + "1 NaN NaN NaN 2333 \n", + "\n", + " tract_minority_population_percent ffiec_msa_md_median_family_income \\\n", + "0 41.15 96600 \n", + "1 9.90 68000 \n", + "\n", + " tract_to_msa_income_percentage tract_owner_occupied_units \\\n", + "0 64 812 \n", + "1 87 1000 \n", + "\n", + " tract_one_to_four_family_homes tract_median_age_of_housing_units \n", + "0 910 45 \n", + "1 2717 34 \n", + "\n", + "[2 rows x 99 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d5190089-62f8-4c82-a125-e5d761f5c1a6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>activity_year</th>\n", + " <th>lei</th>\n", + " <th>derived_msa-md</th>\n", + " <th>state_code</th>\n", + " <th>county_code</th>\n", + " <th>census_tract</th>\n", + " <th>conforming_loan_limit</th>\n", + " <th>derived_loan_product_type</th>\n", + " <th>derived_dwelling_category</th>\n", + " <th>derived_ethnicity</th>\n", + " <th>...</th>\n", + " <th>denial_reason-2</th>\n", + " <th>denial_reason-3</th>\n", + " <th>denial_reason-4</th>\n", + " <th>tract_population</th>\n", + " <th>tract_minority_population_percent</th>\n", + " <th>ffiec_msa_md_median_family_income</th>\n", + " <th>tract_to_msa_income_percentage</th>\n", + " <th>tract_owner_occupied_units</th>\n", + " <th>tract_one_to_four_family_homes</th>\n", + " <th>tract_median_age_of_housing_units</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>468269</th>\n", + " <td>2020</td>\n", + " <td>549300FX7K8PTEQUU487</td>\n", + " <td>36780</td>\n", + " <td>WI</td>\n", + " <td>55139.0</td>\n", + " <td>5.513900e+10</td>\n", + " <td>C</td>\n", + " <td>Conventional:Subordinate Lien</td>\n", + " <td>Single Family (1-4 Units):Site-Built</td>\n", + " <td>Not Hispanic or Latino</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>4886</td>\n", + " <td>15.04</td>\n", + " <td>80800</td>\n", + " <td>102</td>\n", + " <td>1099</td>\n", + " <td>1563</td>\n", + " <td>36</td>\n", + " </tr>\n", + " <tr>\n", + " <th>468270</th>\n", + " <td>2020</td>\n", + " <td>549300FX7K8PTEQUU487</td>\n", + " <td>33340</td>\n", + " <td>WI</td>\n", + " <td>55079.0</td>\n", + " <td>5.507902e+10</td>\n", + " <td>C</td>\n", + " <td>Conventional:First Lien</td>\n", + " <td>Single Family (1-4 Units):Site-Built</td>\n", + " <td>Hispanic or Latino</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>6961</td>\n", + " <td>28.13</td>\n", + " <td>83800</td>\n", + " <td>93</td>\n", + " <td>1732</td>\n", + " <td>2138</td>\n", + " <td>50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>468271</th>\n", + " <td>2020</td>\n", + " <td>549300FX7K8PTEQUU487</td>\n", + " <td>33460</td>\n", + " <td>WI</td>\n", + " <td>55109.0</td>\n", + " <td>5.510912e+10</td>\n", + " <td>C</td>\n", + " <td>Conventional:Subordinate Lien</td>\n", + " <td>Single Family (1-4 Units):Site-Built</td>\n", + " <td>Hispanic or Latino</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>9057</td>\n", + " <td>6.41</td>\n", + " <td>97300</td>\n", + " <td>100</td>\n", + " <td>2448</td>\n", + " <td>3368</td>\n", + " <td>17</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>3 rows × 99 columns</p>\n", + "</div>" + ], + "text/plain": [ + " activity_year lei derived_msa-md state_code \\\n", + "468269 2020 549300FX7K8PTEQUU487 36780 WI \n", + "468270 2020 549300FX7K8PTEQUU487 33340 WI \n", + "468271 2020 549300FX7K8PTEQUU487 33460 WI \n", + "\n", + " county_code census_tract conforming_loan_limit \\\n", + "468269 55139.0 5.513900e+10 C \n", + "468270 55079.0 5.507902e+10 C \n", + "468271 55109.0 5.510912e+10 C \n", + "\n", + " derived_loan_product_type derived_dwelling_category \\\n", + "468269 Conventional:Subordinate Lien Single Family (1-4 Units):Site-Built \n", + "468270 Conventional:First Lien Single Family (1-4 Units):Site-Built \n", + "468271 Conventional:Subordinate Lien Single Family (1-4 Units):Site-Built \n", + "\n", + " derived_ethnicity ... denial_reason-2 denial_reason-3 \\\n", + "468269 Not Hispanic or Latino ... NaN NaN \n", + "468270 Hispanic or Latino ... NaN NaN \n", + "468271 Hispanic or Latino ... NaN NaN \n", + "\n", + " denial_reason-4 tract_population tract_minority_population_percent \\\n", + "468269 NaN 4886 15.04 \n", + "468270 NaN 6961 28.13 \n", + "468271 NaN 9057 6.41 \n", + "\n", + " ffiec_msa_md_median_family_income tract_to_msa_income_percentage \\\n", + "468269 80800 102 \n", + "468270 83800 93 \n", + "468271 97300 100 \n", + "\n", + " tract_owner_occupied_units tract_one_to_four_family_homes \\\n", + "468269 1099 1563 \n", + "468270 1732 2138 \n", + "468271 2448 3368 \n", + "\n", + " tract_median_age_of_housing_units \n", + "468269 36 \n", + "468270 50 \n", + "468271 17 \n", + "\n", + "[3 rows x 99 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.tail(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "850faeeb-116f-4c56-aa61-05bcde7719fa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['activity_year', 'lei', 'derived_msa-md', 'state_code', 'county_code',\n", + " 'census_tract', 'conforming_loan_limit', 'derived_loan_product_type',\n", + " 'derived_dwelling_category', 'derived_ethnicity', 'derived_race',\n", + " 'derived_sex', 'action_taken', 'purchaser_type', 'preapproval',\n", + " 'loan_type', 'loan_purpose', 'lien_status', 'reverse_mortgage',\n", + " 'open-end_line_of_credit', 'business_or_commercial_purpose',\n", + " 'loan_amount', 'loan_to_value_ratio', 'interest_rate', 'rate_spread',\n", + " 'hoepa_status', 'total_loan_costs', 'total_points_and_fees',\n", + " 'origination_charges', 'discount_points', 'lender_credits', 'loan_term',\n", + " 'prepayment_penalty_term', 'intro_rate_period', 'negative_amortization',\n", + " 'interest_only_payment', 'balloon_payment',\n", + " 'other_nonamortizing_features', 'property_value', 'construction_method',\n", + " 'occupancy_type', 'manufactured_home_secured_property_type',\n", + " 'manufactured_home_land_property_interest', 'total_units',\n", + " 'multifamily_affordable_units', 'income', 'debt_to_income_ratio',\n", + " 'applicant_credit_score_type', 'co-applicant_credit_score_type',\n", + " 'applicant_ethnicity-1', 'applicant_ethnicity-2',\n", + " 'applicant_ethnicity-3', 'applicant_ethnicity-4',\n", + " 'applicant_ethnicity-5', 'co-applicant_ethnicity-1',\n", + " 'co-applicant_ethnicity-2', 'co-applicant_ethnicity-3',\n", + " 'co-applicant_ethnicity-4', 'co-applicant_ethnicity-5',\n", + " 'applicant_ethnicity_observed', 'co-applicant_ethnicity_observed',\n", + " 'applicant_race-1', 'applicant_race-2', 'applicant_race-3',\n", + " 'applicant_race-4', 'applicant_race-5', 'co-applicant_race-1',\n", + " 'co-applicant_race-2', 'co-applicant_race-3', 'co-applicant_race-4',\n", + " 'co-applicant_race-5', 'applicant_race_observed',\n", + " 'co-applicant_race_observed', 'applicant_sex', 'co-applicant_sex',\n", + " 'applicant_sex_observed', 'co-applicant_sex_observed', 'applicant_age',\n", + " 'co-applicant_age', 'applicant_age_above_62',\n", + " 'co-applicant_age_above_62', 'submission_of_application',\n", + " 'initially_payable_to_institution', 'aus-1', 'aus-2', 'aus-3', 'aus-4',\n", + " 'aus-5', 'denial_reason-1', 'denial_reason-2', 'denial_reason-3',\n", + " 'denial_reason-4', 'tract_population',\n", + " 'tract_minority_population_percent',\n", + " 'ffiec_msa_md_median_family_income', 'tract_to_msa_income_percentage',\n", + " 'tract_owner_occupied_units', 'tract_one_to_four_family_homes',\n", + " 'tract_median_age_of_housing_units'],\n", + " dtype='object')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "ceec5d46-ff31-462b-a0cb-94f62f67e4cc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 NaN\n", + "1 3.0\n", + "2 NaN\n", + "3 3.75\n", + "4 2.5\n", + " ... \n", + "468267 2.25\n", + "468268 2.5\n", + "468269 4.25\n", + "468270 NaN\n", + "468271 NaN\n", + "Name: interest_rate, Length: 468272, dtype: object" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"interest_rate\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "26c1afa9-79cd-4096-a490-f9080f3fe02b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "interest_rate\n", + "Exempt 37959\n", + "3.0 22584\n", + "2.75 22480\n", + "3.25 21343\n", + "2.875 21201\n", + " ... \n", + "3.023 1\n", + "2.632 1\n", + "3.345 1\n", + "3.364 1\n", + "2.32 1\n", + "Name: count, Length: 2080, dtype: int64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"interest_rate\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "a0af4324-50a1-4ae3-87a6-6eba608ff1cd", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Unable to parse string \"Exempt\" at position 1505", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32mlib.pyx:2368\u001b[0m, in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Unable to parse string \"Exempt\"", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[15], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_numeric\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minterest_rate\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/tools/numeric.py:222\u001b[0m, in \u001b[0;36mto_numeric\u001b[0;34m(arg, errors, downcast, dtype_backend)\u001b[0m\n\u001b[1;32m 220\u001b[0m coerce_numeric \u001b[38;5;241m=\u001b[39m errors \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 221\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 222\u001b[0m values, new_mask \u001b[38;5;241m=\u001b[39m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmaybe_convert_numeric\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore[call-overload] # noqa: E501\u001b[39;49;00m\n\u001b[1;32m 223\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 224\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mset\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 225\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoerce_numeric\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoerce_numeric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 226\u001b[0m \u001b[43m \u001b[49m\u001b[43mconvert_to_masked_nullable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype_backend\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mno_default\u001b[49m\n\u001b[1;32m 227\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mvalues_dtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mStringDtype\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m):\n\u001b[1;32m 230\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", + "File \u001b[0;32mlib.pyx:2410\u001b[0m, in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Unable to parse string \"Exempt\" at position 1505" + ] + } + ], + "source": [ + "pd.to_numeric(df[\"interest_rate\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "5be93ff0-c7c8-438a-b140-6dfa9624a067", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 NaN\n", + "1 3.00\n", + "2 NaN\n", + "3 3.75\n", + "4 2.50\n", + " ... \n", + "468267 2.25\n", + "468268 2.50\n", + "468269 4.25\n", + "468270 NaN\n", + "468271 NaN\n", + "Name: interest_rate, Length: 468272, dtype: float64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_numeric(df[\"interest_rate\"], errors= \"coerce\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "689dc2c0-c051-49b8-aee2-ac8e6b5a575f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3.266264315063852" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_numeric(df[\"interest_rate\"], errors= \"coerce\").dropna().mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c11270c9-9a44-480c-8564-16c314940c4e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_16573/264512408.py:2: DtypeWarning: Columns (22,23,24,26,27,28,29,30,31,32,33,38,43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " pd.read_csv(f)\n" + ] + } + ], + "source": [ + "f = open(\"wi.csv\")\n", + "pd.read_csv(f)\n", + "f.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c04585e-8ab8-4138-b229-b4119a6a06d6", + "metadata": {}, + "outputs": [], + "source": [ + "open(..., mode = \"r\")--> text\n", + "open(..., mode = \"rb\")--> bytes\n", + "zf.open(..., mode = \"r\")--> bytes always" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "2df5c81c-7fdf-47da-b581-dd15c0abed8c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3.266264315063054" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import zipfile\n", + "import csv\n", + "from io import TextIOWrapper\n", + "\n", + "zf = zipfile.ZipFile(\"wi.zip\")\n", + "\n", + "f = zf.open(\"wi.csv\")\n", + "\n", + "total = 0\n", + "count = 0\n", + "\n", + "reader = csv.DictReader(TextIOWrapper(f))\n", + "for row in reader:\n", + " try:\n", + " total += float(row[\"interest_rate\"])\n", + " count += 1\n", + " except ValueError:\n", + " pass # do nothing\n", + "\n", + "f.close()\n", + "zf.close()\n", + "\n", + "total/count" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "e97422c0-8ed9-4e20-8104-432dd1bea109", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3.266264315063054" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def get_rates_v1():\n", + " rates = []\n", + " zf = zipfile.ZipFile(\"wi.zip\")\n", + "\n", + " f = zf.open(\"wi.csv\")\n", + "\n", + "\n", + " reader = csv.DictReader(TextIOWrapper(f))\n", + " for row in reader:\n", + " try:\n", + " rates.append(float(row[\"interest_rate\"]))\n", + "\n", + " except ValueError:\n", + " pass # do nothing\n", + "\n", + " f.close()\n", + " zf.close()\n", + " return rates\n", + "\n", + "rates = get_rates_v1()\n", + "\n", + "sum(rates)/len(rates)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "739424b6-cf67-4821-b626-23fcd500f8ce", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<generator object get_rates_v2 at 0x7f1da61c6c00>" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def get_rates_v2():\n", + " \n", + " print(\"Test\")\n", + " zf = zipfile.ZipFile(\"wi.zip\")\n", + "\n", + " f = zf.open(\"wi.csv\")\n", + "\n", + "\n", + " reader = csv.DictReader(TextIOWrapper(f))\n", + " for row in reader:\n", + " try:\n", + " yield float(row[\"interest_rate\"])\n", + "\n", + " except ValueError:\n", + " pass # do nothing\n", + "\n", + " f.close()\n", + " zf.close()\n", + "\n", + "get_rates_v2()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "02f60cb5-77a8-475b-a7e9-6815b74b9637", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test\n" + ] + }, + { + "data": { + "text/plain": [ + "3.0" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rates = get_rates_v2()\n", + "next(rates)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "d64cf45a-f878-4b5c-98f0-d906a1ff0456", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3.75" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "next(rates)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "a6fbcec3-cbce-4e6c-8579-7b344d49ab05", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2.5" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "next(rates)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "0d99bdf7-c367-4cad-9306-ffb0aba20bca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2.75" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "next(rates)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "8ee28948-1a2f-431b-988b-d11f626f5844", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "object of type 'generator' has no len()", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[22], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mrates\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: object of type 'generator' has no len()" + ] + } + ], + "source": [ + "len(rates)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "5a9eac32-d40f-4bf4-881b-3442488a3ff8", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "'generator' object is not subscriptable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[23], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mrates\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: 'generator' object is not subscriptable" + ] + } + ], + "source": [ + "rates[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "4cf8a3e1-6705-4762-8982-19ce6c1072cf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test\n" + ] + }, + { + "data": { + "text/plain": [ + "3.266264315063054" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rates = get_rates_v2()\n", + "total = 0\n", + "count = 0\n", + "\n", + "for rate in rates:\n", + " total += rate\n", + " count += 1\n", + "total/count" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "c6da348e-7975-4306-a338-e536325b6394", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test\n" + ] + } + ], + "source": [ + "rates = get_rates_v2()\n", + "\n", + "L = list(rates)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "0e0f6133-08b2-4210-8ea4-ddf16e20ad6e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "324658" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(L)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "9ce88f36-f50a-4e61-94dd-348922d1add8", + "metadata": {}, + "outputs": [], + "source": [ + "# L" + ] + }, + { + "cell_type": "markdown", + "id": "f14e7d46-94d7-424f-95d1-f6c9ff4bef70", + "metadata": {}, + "source": [ + "# OOP 1" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "65f96fc3-46a6-4e01-b1de-20b701c5727e", + "metadata": {}, + "outputs": [], + "source": [ + "class Dog:\n", + " pass\n", + "\n", + " def speak(d):\n", + " if d.age < 3:\n", + " print(f\"{d.name} barks: woof!woof!woof!\")\n", + " else:\n", + " print(f\"{d.name} barks: woof!\")\n", + "\n", + "def init(dog, name, how_old):\n", + " dog.name = name\n", + " dog.age = how_old\n", + " \n", + " \n", + "dog1 = Dog()\n", + "# # type(dog1)\n", + "# dog1.name = \"Fido\"\n", + "# dog1.age = 2\n", + "\n", + "init(dog1, \"Fido\", 2)\n", + "\n", + "dog2= Dog()\n", + "# dog2.name = \"Sam\"\n", + "# dog2.age = 5\n", + "init(dog2, \"Sam\", 5)\n", + "\n", + "# speak(dog1)\n", + "# speak(dog2)\n", + "pets = [dog1, dog2]\n", + "\n", + "# for pet in pets:\n", + "# speak(pet)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "c8d2fe89-8b97-416a-a273-62ff0a0131b7", + "metadata": {}, + "outputs": [], + "source": [ + "class Cat:\n", + " pass\n", + "\n", + " def speak(cat):\n", + " print(\"meow\")\n", + "\n", + "cat = Cat()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "9fade852-7da2-433a-8d35-ec12831d98dd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "meow\n", + "meow\n", + "meow\n" + ] + } + ], + "source": [ + "pets = [dog1, dog2, cat]\n", + "\n", + "for pet in pets:\n", + " Cat.speak(pet)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "439fdc25-112d-4156-9559-fe08faa54f2a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(dog1) == Dog" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "7fdf917e-ea86-4652-9926-0dd412539e96", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fido barks: woof!woof!woof!\n", + "Sam barks: woof!\n", + "meow\n" + ] + } + ], + "source": [ + "#bad version\n", + "\n", + "for pet in pets:\n", + " if type(pet) == Dog:\n", + " Dog.speak(pet)\n", + " elif type(pet) == Cat:\n", + " Cat.speak(pet)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "f585fdce-03ca-43ef-828b-29b661dcaac9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fido barks: woof!woof!woof!\n", + "Sam barks: woof!\n", + "meow\n" + ] + } + ], + "source": [ + "#better version\n", + "\n", + "for pet in pets:\n", + " type(pet).speak(pet)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "2f833df0-be23-4d4f-b5be-56b66f6b4150", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fido barks: woof!woof!woof!\n", + "Sam barks: woof!\n", + "meow\n" + ] + } + ], + "source": [ + "#best version\n", + "for pet in pets:\n", + " pet.speak() #type(pet).speak(pet)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "d8c37f76-ad91-4487-8832-1eef17d8c545", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Dog.speak() takes 1 positional argument but 2 were given", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[54], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m pet \u001b[38;5;129;01min\u001b[39;00m pets:\n\u001b[0;32m----> 2\u001b[0m \u001b[43mpet\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mspeak\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mHi\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m#type(pet).speak(pet)\u001b[39;00m\n", + "\u001b[0;31mTypeError\u001b[0m: Dog.speak() takes 1 positional argument but 2 were given" + ] + } + ], + "source": [ + "for pet in pets:\n", + " pet.speak(\"Hi\") #type(pet).speak(pet)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60bab56c-1f4c-4923-9173-044be5c36ed1", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}