From f6f2843ccfde4ced815a92bb7577dac106cc4f2c Mon Sep 17 00:00:00 2001
From: TYLER CARAZA-HARTER <tharter@cs544-tharter.cs.wisc.edu>
Date: Fri, 28 Feb 2025 12:36:33 -0600
Subject: [PATCH] lec demos

---
 lec/15-sql/lec1.ipynb | 1287 +++++++++++++++++++++++++++++++++++
 lec/15-sql/lec2.ipynb | 1505 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 2792 insertions(+)
 create mode 100644 lec/15-sql/lec1.ipynb
 create mode 100644 lec/15-sql/lec2.ipynb

diff --git a/lec/15-sql/lec1.ipynb b/lec/15-sql/lec1.ipynb
new file mode 100644
index 0000000..6c084b4
--- /dev/null
+++ b/lec/15-sql/lec1.ipynb
@@ -0,0 +1,1287 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "df568295-31af-4fde-b402-8adecdf57f13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sqlalchemy import create_engine, text\n",
+    "engine = create_engine(\"mysql+mysqlconnector://root:abc@127.0.0.1:3306/cs544\")\n",
+    "conn = engine.connect()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8cabbb35-3d75-44ee-886c-6aa871a01d68",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "list(conn.execute(text(\"\"\"\n",
+    "    show tables\n",
+    "\"\"\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "b68ba375-c9c9-4677-9d9d-310d1927a276",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<sqlalchemy.engine.cursor.CursorResult at 0x765a7c3ddb00>"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# table: users\n",
+    "# columns: id, name, phone\n",
+    "# name is required\n",
+    "# id uniquely identifies row\n",
+    "conn.execute(text(\"\"\"\n",
+    "    create table users (\n",
+    "        id int,\n",
+    "        name text NOT NULL,\n",
+    "        phone text,\n",
+    "        PRIMARY KEY (id)\n",
+    "    )\n",
+    "\"\"\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "00a5b5b9-8e91-4d90-99ad-e85a1756ea88",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<sqlalchemy.engine.cursor.CursorResult at 0x765a714c6820>"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "conn.execute(text(\"\"\"\n",
+    "    create table accounts (\n",
+    "        user_id int,\n",
+    "        name text NOT NULL,\n",
+    "        amount int NOT NULL,\n",
+    "        FOREIGN KEY (user_id) references users(id)\n",
+    "    )\n",
+    "\"\"\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "9739d58c-8004-4844-a609-7ed95bdbf9aa",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('accounts',), ('users',)]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "list(conn.execute(text(\"\"\"\n",
+    "    show tables\n",
+    "\"\"\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "1fc2171c-9f09-4b40-b5e0-fcb84870ec7d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<sqlalchemy.engine.cursor.CursorResult at 0x765a714c6f90>"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "conn.execute(text(\"\"\"\n",
+    "    INSERT INTO users (id, name) VALUES (1, \"tyler\")\n",
+    "\"\"\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "a96a9978-0927-4886-bd72-225150f9a5a2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# conn.execute(text(\"\"\"\n",
+    "#     INSERT INTO users (id, name) VALUES (1, \"tyler\")\n",
+    "# \"\"\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "0dbc816b-4f66-4b19-bcb6-e1b5212ef469",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[(1, 'tyler', None)]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "list(conn.execute(text(\"\"\"\n",
+    "    SELECT *\n",
+    "    FROM users\n",
+    "\"\"\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "45c24702-0285-43fe-ae0e-b9d01adc2a37",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conn.commit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "2a18a93d-20c1-4f02-875a-e6154ad7aef5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<sqlalchemy.engine.cursor.CursorResult at 0x765a842ece50>"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "conn.execute(text(\"\"\"\n",
+    "    INSERT INTO accounts (user_id, name, amount)\n",
+    "    VALUES (1, \"A\", 10), (1, \"B\", 20)\n",
+    "\"\"\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "753e807c-51d8-4190-9e8b-92e95bc8030b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conn.commit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "ca9c9379-ba53-42f0-9e11-eed9921adc01",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# this would break an invariant, so it's not allowed!\n",
+    "# foreign keys are still referencing user id 1\n",
+    "#\n",
+    "# conn.execute(text(\"\"\"\n",
+    "#     DELETE FROM users WHERE id = 1;\n",
+    "# \"\"\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "75ae1cff-684a-4c04-9ce3-f619edea898c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "de914434-5eb7-465b-aec4-8bec6953b623",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = \"https://raw.githubusercontent.com/cfpb/api/master/resources/datasets/hmda/code_sheets/\"\n",
+    "df = pd.read_csv(url + \"action_taken.csv\")\n",
+    "df.to_sql(\"actions\", conn, index=False, if_exists=\"replace\")\n",
+    "df = pd.read_csv(url + \"loan_type.csv\")\n",
+    "df.to_sql(\"loan_types\", conn, index=False, if_exists=\"replace\")\n",
+    "df = pd.read_csv(url + \"loan_purpose.csv\")\n",
+    "df.to_sql(\"purposes\", conn, index=False, if_exists=\"replace\")\n",
+    "conn.commit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "4c24c47e-8f03-4b84-9647-6fd1559a4b0b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pyarrow as pa\n",
+    "import pyarrow.csv, pyarrow.parquet\n",
+    "\n",
+    "t = pa.parquet.read_table(\n",
+    "    \"loans.parquet\", \n",
+    "    columns=[\"lei\", \"action_taken\", \"loan_type\",\n",
+    "             \"loan_amount\", \"interest_rate\", \"loan_purpose\", \"income\"\n",
+    "            ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "a0f419e7-b0d7-4bf1-94ad-4c4ca961ada6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "447367"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "t.to_pandas().to_sql(\"loans\", conn, index=False,\n",
+    "                     if_exists=\"replace\", chunksize=10_000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "96846d26-3aa8-414a-a925-eafa9fe60f50",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conn.commit()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "24e3be3e-5296-44fa-a850-c1e0df34cd38",
+   "metadata": {},
+   "source": [
+    "# Transactions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "791ad3aa-0a41-4c5c-b1ee-6d9b5b95e023",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "remaining: -2\n",
+      "rollback!\n"
+     ]
+    }
+   ],
+   "source": [
+    "conn.execute(text(\"\"\"\n",
+    "update accounts set amount = amount + 5 where name = 'B'\n",
+    "\"\"\"))\n",
+    "conn.execute(text(\"\"\"\n",
+    "update accounts set amount = amount - 5 where name = 'A'\n",
+    "\"\"\"))\n",
+    "\n",
+    "# invariant: account cannot go negative\n",
+    "remaining_amount = list(conn.execute(text(\n",
+    "    \"select amount from accounts where name = 'A'\"\n",
+    ")))[0][0]\n",
+    "print(\"remaining:\", remaining_amount)\n",
+    "if remaining_amount >= 0:\n",
+    "    print(\"commit!\")\n",
+    "    conn.commit()\n",
+    "else:\n",
+    "    print(\"rollback!\")\n",
+    "    conn.rollback()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "10f7d983-69fc-4800-b529-b7f19e0cdb73",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# conn.rollback() or conn.commit()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "49392cee-e7b9-4ce0-9ce3-961965443b3d",
+   "metadata": {},
+   "source": [
+    "# Analyze/Query the Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "ed5f92fc-ac76-4307-bd0c-6b714ed5a699",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>action_taken</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Loan originated</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Application approved but not accepted</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Application denied by financial institution</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>Application withdrawn by applicant</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>File closed for incompleteness</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>6</td>\n",
+       "      <td>Loan purchased by the institution</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>7</td>\n",
+       "      <td>Preapproval request denied by financial instit...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>8</td>\n",
+       "      <td>Preapproval request approved but not accepted</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id                                       action_taken\n",
+       "0   1                                    Loan originated\n",
+       "1   2              Application approved but not accepted\n",
+       "2   3        Application denied by financial institution\n",
+       "3   4                 Application withdrawn by applicant\n",
+       "4   5                     File closed for incompleteness\n",
+       "5   6                  Loan purchased by the institution\n",
+       "6   7  Preapproval request denied by financial instit...\n",
+       "7   8      Preapproval request approved but not accepted"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what are all the possible actions?  Practice SELECT/FROM.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT *\n",
+    "FROM actions\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "8349be60-02f4-43bb-9cf2-568bc70d2c75",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>lei</th>\n",
+       "      <th>action_taken</th>\n",
+       "      <th>loan_type</th>\n",
+       "      <th>loan_amount</th>\n",
+       "      <th>interest_rate</th>\n",
+       "      <th>loan_purpose</th>\n",
+       "      <th>income</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>305000.0</td>\n",
+       "      <td>3.875</td>\n",
+       "      <td>1</td>\n",
+       "      <td>108.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>65000.0</td>\n",
+       "      <td>NA</td>\n",
+       "      <td>1</td>\n",
+       "      <td>103.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>75000.0</td>\n",
+       "      <td>3.25</td>\n",
+       "      <td>1</td>\n",
+       "      <td>146.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>155000.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>32</td>\n",
+       "      <td>70.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>305000.0</td>\n",
+       "      <td>3.25</td>\n",
+       "      <td>1</td>\n",
+       "      <td>71.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>175000.0</td>\n",
+       "      <td>3.375</td>\n",
+       "      <td>1</td>\n",
+       "      <td>117.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>575000.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>180.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>105000.0</td>\n",
+       "      <td>5.375</td>\n",
+       "      <td>1</td>\n",
+       "      <td>180.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>85000.0</td>\n",
+       "      <td>3.375</td>\n",
+       "      <td>1</td>\n",
+       "      <td>136.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>549300FQ2SN6TRRGB032</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>405000.0</td>\n",
+       "      <td>Exempt</td>\n",
+       "      <td>1</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    lei  action_taken  loan_type  loan_amount interest_rate  \\\n",
+       "0  54930034MNPILHP25H80             6          1     305000.0         3.875   \n",
+       "1  54930034MNPILHP25H80             4          1      65000.0            NA   \n",
+       "2  54930034MNPILHP25H80             6          1      75000.0          3.25   \n",
+       "3  54930034MNPILHP25H80             1          1     155000.0           4.0   \n",
+       "4  54930034MNPILHP25H80             1          1     305000.0          3.25   \n",
+       "5  54930034MNPILHP25H80             1          1     175000.0         3.375   \n",
+       "6  54930034MNPILHP25H80             1          1     575000.0           4.5   \n",
+       "7  54930034MNPILHP25H80             1          1     105000.0         5.375   \n",
+       "8  54930034MNPILHP25H80             1          1      85000.0         3.375   \n",
+       "9  549300FQ2SN6TRRGB032             1          1     405000.0        Exempt   \n",
+       "\n",
+       "   loan_purpose  income  \n",
+       "0             1   108.0  \n",
+       "1             1   103.0  \n",
+       "2             1   146.0  \n",
+       "3            32    70.0  \n",
+       "4             1    71.0  \n",
+       "5             1   117.0  \n",
+       "6             1   180.0  \n",
+       "7             1   180.0  \n",
+       "8             1   136.0  \n",
+       "9             1     NaN  "
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what are the first 10 loans?  Practice LIMIT.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT *\n",
+    "FROM loans\n",
+    "LIMIT 10\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f99ed83c-d50c-43f7-bf3c-7307bb30801b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# projection: choosing what columns (SELECT)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d2fe1388-b308-4f8c-8a63-c426c0f1b787",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# selection: filtering rows (WHERE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "0da82d35-5e7f-481b-a3fe-9d828b541794",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>interest_rate</th>\n",
+       "      <th>loan_thousands</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3.875</td>\n",
+       "      <td>305.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>NA</td>\n",
+       "      <td>65.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3.25</td>\n",
+       "      <td>75.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4.0</td>\n",
+       "      <td>155.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>3.25</td>\n",
+       "      <td>305.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>3.375</td>\n",
+       "      <td>175.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>4.5</td>\n",
+       "      <td>575.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>5.375</td>\n",
+       "      <td>105.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>3.375</td>\n",
+       "      <td>85.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Exempt</td>\n",
+       "      <td>405.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  interest_rate  loan_thousands\n",
+       "0         3.875           305.0\n",
+       "1            NA            65.0\n",
+       "2          3.25            75.0\n",
+       "3           4.0           155.0\n",
+       "4          3.25           305.0\n",
+       "5         3.375           175.0\n",
+       "6           4.5           575.0\n",
+       "7         5.375           105.0\n",
+       "8         3.375            85.0\n",
+       "9        Exempt           405.0"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what are the first 10 interest rates and loan amounts (in thousands)?  Practice SELECT.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT interest_rate, loan_amount / 1000 AS loan_thousands\n",
+    "FROM loans\n",
+    "LIMIT 10\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "4ee4b0cb-7671-4570-9e18-fcd4b8c0394c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>lei</th>\n",
+       "      <th>action_taken</th>\n",
+       "      <th>loan_type</th>\n",
+       "      <th>loan_amount</th>\n",
+       "      <th>interest_rate</th>\n",
+       "      <th>loan_purpose</th>\n",
+       "      <th>income</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>254900IER2H3R8YLBW04</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>105000.0</td>\n",
+       "      <td>2.875</td>\n",
+       "      <td>31</td>\n",
+       "      <td>1530000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>3Y4U8VZURTYWI1W2K376</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7455000.0</td>\n",
+       "      <td>NA</td>\n",
+       "      <td>4</td>\n",
+       "      <td>94657029.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>549300CS1XP28EERR469</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>75000.0</td>\n",
+       "      <td>4.99</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2030000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>549300CS1XP28EERR469</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>205000.0</td>\n",
+       "      <td>3.75</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7291000.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    lei  action_taken  loan_type  loan_amount interest_rate  \\\n",
+       "0  254900IER2H3R8YLBW04             1          1     105000.0         2.875   \n",
+       "1  3Y4U8VZURTYWI1W2K376             3          1    7455000.0            NA   \n",
+       "2  549300CS1XP28EERR469             1          1      75000.0          4.99   \n",
+       "3  549300CS1XP28EERR469             1          1     205000.0          3.75   \n",
+       "\n",
+       "   loan_purpose      income  \n",
+       "0            31   1530000.0  \n",
+       "1             4  94657029.0  \n",
+       "2             4   2030000.0  \n",
+       "3             1   7291000.0  "
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what are the loans for individuals with income over $1 million?  Practice WHERE.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT *\n",
+    "FROM loans\n",
+    "WHERE income > 1000000\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "id": "d64a2a5e-e2d4-42a3-a894-9283160d2636",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>lei</th>\n",
+       "      <th>action_taken</th>\n",
+       "      <th>loan_type</th>\n",
+       "      <th>loan_amount</th>\n",
+       "      <th>interest_rate</th>\n",
+       "      <th>loan_purpose</th>\n",
+       "      <th>income</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>549300XWUSRVVOHPRY47</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>264185000.0</td>\n",
+       "      <td>NA</td>\n",
+       "      <td>1</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AD6GFRVSDT01YPT1CS68</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>74755000.0</td>\n",
+       "      <td>1.454</td>\n",
+       "      <td>1</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>AD6GFRVSDT01YPT1CS68</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>66005000.0</td>\n",
+       "      <td>NA</td>\n",
+       "      <td>1</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>YQI2CPR3Z44KAR0HG822</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>65005000.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>254900YA1AQXNM8QVZ06</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>63735000.0</td>\n",
+       "      <td>2.99</td>\n",
+       "      <td>2</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    lei  action_taken  loan_type  loan_amount interest_rate  \\\n",
+       "0  549300XWUSRVVOHPRY47             6          1  264185000.0            NA   \n",
+       "1  AD6GFRVSDT01YPT1CS68             1          1   74755000.0         1.454   \n",
+       "2  AD6GFRVSDT01YPT1CS68             4          2   66005000.0            NA   \n",
+       "3  YQI2CPR3Z44KAR0HG822             1          1   65005000.0           3.0   \n",
+       "4  254900YA1AQXNM8QVZ06             1          2   63735000.0          2.99   \n",
+       "\n",
+       "   loan_purpose income  \n",
+       "0             1   None  \n",
+       "1             1   None  \n",
+       "2             1   None  \n",
+       "3             1   None  \n",
+       "4             2   None  "
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what are the five biggest loans in terms of dollar amount?  Practice ORDER BY.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT *\n",
+    "FROM loans\n",
+    "ORDER BY loan_amount DESC\n",
+    "LIMIT 5\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "id": "c01b796c-ded3-4e11-81e7-69d7660da9cb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>action_taken</th>\n",
+       "      <th>loan_type</th>\n",
+       "      <th>lei</th>\n",
+       "      <th>loan_amount</th>\n",
+       "      <th>interest_rate</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Loan purchased by the institution</td>\n",
+       "      <td>Conventional</td>\n",
+       "      <td>549300XWUSRVVOHPRY47</td>\n",
+       "      <td>264185000.0</td>\n",
+       "      <td>NA</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Loan originated</td>\n",
+       "      <td>Conventional</td>\n",
+       "      <td>AD6GFRVSDT01YPT1CS68</td>\n",
+       "      <td>74755000.0</td>\n",
+       "      <td>1.454</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Application withdrawn by applicant</td>\n",
+       "      <td>FHA-insured</td>\n",
+       "      <td>AD6GFRVSDT01YPT1CS68</td>\n",
+       "      <td>66005000.0</td>\n",
+       "      <td>NA</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Loan originated</td>\n",
+       "      <td>Conventional</td>\n",
+       "      <td>YQI2CPR3Z44KAR0HG822</td>\n",
+       "      <td>65005000.0</td>\n",
+       "      <td>3.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Loan originated</td>\n",
+       "      <td>FHA-insured</td>\n",
+       "      <td>254900YA1AQXNM8QVZ06</td>\n",
+       "      <td>63735000.0</td>\n",
+       "      <td>2.99</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                         action_taken     loan_type                   lei  \\\n",
+       "0   Loan purchased by the institution  Conventional  549300XWUSRVVOHPRY47   \n",
+       "1                     Loan originated  Conventional  AD6GFRVSDT01YPT1CS68   \n",
+       "2  Application withdrawn by applicant   FHA-insured  AD6GFRVSDT01YPT1CS68   \n",
+       "3                     Loan originated  Conventional  YQI2CPR3Z44KAR0HG822   \n",
+       "4                     Loan originated   FHA-insured  254900YA1AQXNM8QVZ06   \n",
+       "\n",
+       "   loan_amount interest_rate  \n",
+       "0  264185000.0            NA  \n",
+       "1   74755000.0         1.454  \n",
+       "2   66005000.0            NA  \n",
+       "3   65005000.0           3.0  \n",
+       "4   63735000.0          2.99  "
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what are the actions taken and types for those loans (show the text, not numbers)?  Practice INNER JOIN.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT actions.action_taken, loan_types.loan_type, loans.lei, loans.loan_amount, loans.interest_rate\n",
+    "FROM loans\n",
+    "INNER JOIN actions ON loans.action_taken = actions.id\n",
+    "INNER JOIN loan_types ON loans.loan_type = loan_types.id\n",
+    "ORDER BY loan_amount DESC\n",
+    "LIMIT 5\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "id": "b7d4a687-70bf-46e7-a715-013977358d05",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>lei</th>\n",
+       "      <th>action_taken</th>\n",
+       "      <th>loan_type</th>\n",
+       "      <th>loan_amount</th>\n",
+       "      <th>interest_rate</th>\n",
+       "      <th>loan_purpose</th>\n",
+       "      <th>income</th>\n",
+       "      <th>id</th>\n",
+       "      <th>loan_purpose</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Refinancing</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    lei action_taken loan_type loan_amount interest_rate loan_purpose income  \\\n",
+       "0  None         None      None        None          None         None   None   \n",
+       "\n",
+       "   id loan_purpose  \n",
+       "0   3  Refinancing  "
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what is a loan_purpose that doesn't appear in the loans table?  Practice LEFT/RIGHT JOIN.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT *\n",
+    "FROM loans\n",
+    "RIGHT JOIN purposes ON loans.loan_purpose = purposes.id\n",
+    "WHERE loans.loan_purpose IS NULL\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fc73517c-cf57-4a91-bb9d-2fbfc76544d5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# how many rows are in the table?  Practice COUNT(*)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e91feeee-8689-4f57-a991-f6ca3fee2a6d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# how many non-null values are in the income column?  Practice COUNT(column)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12333532-0618-4ed4-b71b-260a4f35e581",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# what is the average interest rate for loans of type \"Conventional\"?  Practice AVG."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23c00af3-e385-435a-8bf6-f5cfe64f02db",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# how many loans are there of each type?  Practice GROUP BY."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2400a6a4-7056-47e0-b202-3bbb85a77b2f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# which loan types appear at least 10,000 times?  Practice HAVING."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/lec/15-sql/lec2.ipynb b/lec/15-sql/lec2.ipynb
new file mode 100644
index 0000000..6fa2118
--- /dev/null
+++ b/lec/15-sql/lec2.ipynb
@@ -0,0 +1,1505 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "34c3d038-ece1-448a-8613-9f950fd70fb2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sqlalchemy import create_engine, text\n",
+    "engine = create_engine(\"mysql+mysqlconnector://root:abc@127.0.0.1:3306/cs544\")\n",
+    "conn = engine.connect()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "d8c59b65-c21d-4853-b54a-77364d8009ad",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "list(conn.execute(text(\"show tables\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "cad5f150-10c2-4d87-a45e-ac21622862db",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<sqlalchemy.engine.cursor.CursorResult at 0x7696940966d0>"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# table: users\n",
+    "# columns: id, name, phone\n",
+    "# name is required\n",
+    "# id uniquely identifies row\n",
+    "conn.execute(text(\"\"\"\n",
+    "    create table users (\n",
+    "        id int,\n",
+    "        name text NOT NULL,\n",
+    "        phone text,\n",
+    "        PRIMARY KEY (id)\n",
+    "    )\n",
+    "\"\"\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "7c53fd18-7db7-419a-a884-f5a73de25b6f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<sqlalchemy.engine.cursor.CursorResult at 0x769694096cf0>"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "conn.execute(text(\"\"\"\n",
+    "    create table accounts (\n",
+    "        user_id int,\n",
+    "        name text NOT NULL,\n",
+    "        amount int NOT NULL,\n",
+    "        FOREIGN KEY (user_id) REFERENCES users (id)\n",
+    "    )\n",
+    "\"\"\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "7f694879-f48c-4336-89fb-03c2b423cdf2",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('accounts',), ('users',)]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "list(conn.execute(text(\"show tables\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "4a0f1e2c-0e17-41bb-ab9c-49c93351f18e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<sqlalchemy.engine.cursor.CursorResult at 0x769694096970>"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "conn.execute(text(\"\"\"\n",
+    "INSERT INTO users (id, name) VALUES (1, \"tyler\")\n",
+    "\"\"\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "a2e34483-f7f0-4659-91db-d587f08de40b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# conn.execute(text(\"\"\"\n",
+    "# INSERT INTO users (id, name) VALUES (1, \"tyler\")\n",
+    "# \"\"\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "88ff233c-ff1b-4d4c-8060-28b216898734",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conn.commit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "7b93f8cb-bc35-49bd-be82-7c48af5d2f56",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<sqlalchemy.engine.cursor.CursorResult at 0x76969c5c8e50>"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "conn.execute(text(\"\"\"\n",
+    "INSERT INTO accounts (user_id, name, amount) VALUES (1, \"A\", 10), (1, \"B\", 20)\n",
+    "\"\"\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "4cd70a0c-ca3b-44bb-aacc-8dc95ad02d29",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conn.commit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "f77e36c7-0ee3-4dd8-80f5-10321c1c78cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# conn.execute(text(\"\"\"\n",
+    "# DELETE FROM users WHERE id = 1\n",
+    "# \"\"\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "a0862124-c44a-42bb-a63d-d160dd1312f4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "url = \"https://raw.githubusercontent.com/cfpb/api/master/resources/datasets/hmda/code_sheets/\"\n",
+    "df = pd.read_csv(url + \"action_taken.csv\")\n",
+    "df.to_sql(\"actions\", conn, index=False, if_exists=\"replace\")\n",
+    "df = pd.read_csv(url + \"loan_type.csv\")\n",
+    "df.to_sql(\"loan_types\", conn, index=False, if_exists=\"replace\")\n",
+    "df = pd.read_csv(url + \"loan_purpose.csv\")\n",
+    "df.to_sql(\"purposes\", conn, index=False, if_exists=\"replace\")\n",
+    "conn.commit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "3e6ac1cc-5f2b-4c43-a3e7-862713ff66f0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pyarrow as pa\n",
+    "import pyarrow.parquet\n",
+    "t = pa.parquet.read_table(\n",
+    "    \"loans.parquet\", \n",
+    "    columns=[\"lei\", \"action_taken\", \"loan_type\", \"loan_amount\",\n",
+    "             \"interest_rate\", \"loan_purpose\", \"income\"]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "504b747e-7b4c-4528-845a-32a03a9e2a8e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t.to_pandas().to_sql(\"loans\", conn, index=False, if_exists=\"replace\", chunksize=10_000)\n",
+    "conn.commit()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1fc118ae-84c9-423b-8038-c0d44bc9a443",
+   "metadata": {},
+   "source": [
+    "# Transactions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "4cb87a68-c82a-404e-bf43-1360753aba41",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "remaining: -1\n",
+      "rollback!\n"
+     ]
+    }
+   ],
+   "source": [
+    "conn.execute(text(\"\"\"\n",
+    "update accounts set amount = amount - 5 WHERE name = 'A'\n",
+    "\"\"\"))\n",
+    "\n",
+    "conn.execute(text(\"\"\"\n",
+    "update accounts set amount = amount + 5 WHERE name = 'B'\n",
+    "\"\"\"))\n",
+    "\n",
+    "remaining_amount = list(conn.execute(text(\"\"\"\n",
+    "select amount from accounts WHERE name = 'A'\n",
+    "\"\"\")))[0][0]\n",
+    "print(\"remaining:\", remaining_amount)\n",
+    "\n",
+    "if remaining_amount >= 0:\n",
+    "    print(\"commit!\")\n",
+    "    conn.commit()\n",
+    "else:\n",
+    "    print(\"rollback!\")\n",
+    "    conn.rollback()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "019705c1-82e6-4434-9359-0843d6b2d8c5",
+   "metadata": {},
+   "source": [
+    "# Analysis/SQL Queries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "35290388-dbfe-451f-94ba-d9ef8dee4e00",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>action_taken</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Loan originated</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Application approved but not accepted</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Application denied by financial institution</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>Application withdrawn by applicant</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>File closed for incompleteness</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>6</td>\n",
+       "      <td>Loan purchased by the institution</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>7</td>\n",
+       "      <td>Preapproval request denied by financial instit...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>8</td>\n",
+       "      <td>Preapproval request approved but not accepted</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id                                       action_taken\n",
+       "0   1                                    Loan originated\n",
+       "1   2              Application approved but not accepted\n",
+       "2   3        Application denied by financial institution\n",
+       "3   4                 Application withdrawn by applicant\n",
+       "4   5                     File closed for incompleteness\n",
+       "5   6                  Loan purchased by the institution\n",
+       "6   7  Preapproval request denied by financial instit...\n",
+       "7   8      Preapproval request approved but not accepted"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what are all the possible actions?  Practice SELECT/FROM.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT *\n",
+    "FROM actions\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "cdbedc10-8d92-4c1a-83aa-9c604e447d1e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>lei</th>\n",
+       "      <th>action_taken</th>\n",
+       "      <th>loan_type</th>\n",
+       "      <th>loan_amount</th>\n",
+       "      <th>interest_rate</th>\n",
+       "      <th>loan_purpose</th>\n",
+       "      <th>income</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>305000.0</td>\n",
+       "      <td>3.875</td>\n",
+       "      <td>1</td>\n",
+       "      <td>108.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>65000.0</td>\n",
+       "      <td>NA</td>\n",
+       "      <td>1</td>\n",
+       "      <td>103.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>75000.0</td>\n",
+       "      <td>3.25</td>\n",
+       "      <td>1</td>\n",
+       "      <td>146.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>155000.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>32</td>\n",
+       "      <td>70.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>305000.0</td>\n",
+       "      <td>3.25</td>\n",
+       "      <td>1</td>\n",
+       "      <td>71.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>175000.0</td>\n",
+       "      <td>3.375</td>\n",
+       "      <td>1</td>\n",
+       "      <td>117.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>575000.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>180.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>105000.0</td>\n",
+       "      <td>5.375</td>\n",
+       "      <td>1</td>\n",
+       "      <td>180.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>54930034MNPILHP25H80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>85000.0</td>\n",
+       "      <td>3.375</td>\n",
+       "      <td>1</td>\n",
+       "      <td>136.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>549300FQ2SN6TRRGB032</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>405000.0</td>\n",
+       "      <td>Exempt</td>\n",
+       "      <td>1</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    lei  action_taken  loan_type  loan_amount interest_rate  \\\n",
+       "0  54930034MNPILHP25H80             6          1     305000.0         3.875   \n",
+       "1  54930034MNPILHP25H80             4          1      65000.0            NA   \n",
+       "2  54930034MNPILHP25H80             6          1      75000.0          3.25   \n",
+       "3  54930034MNPILHP25H80             1          1     155000.0           4.0   \n",
+       "4  54930034MNPILHP25H80             1          1     305000.0          3.25   \n",
+       "5  54930034MNPILHP25H80             1          1     175000.0         3.375   \n",
+       "6  54930034MNPILHP25H80             1          1     575000.0           4.5   \n",
+       "7  54930034MNPILHP25H80             1          1     105000.0         5.375   \n",
+       "8  54930034MNPILHP25H80             1          1      85000.0         3.375   \n",
+       "9  549300FQ2SN6TRRGB032             1          1     405000.0        Exempt   \n",
+       "\n",
+       "   loan_purpose  income  \n",
+       "0             1   108.0  \n",
+       "1             1   103.0  \n",
+       "2             1   146.0  \n",
+       "3            32    70.0  \n",
+       "4             1    71.0  \n",
+       "5             1   117.0  \n",
+       "6             1   180.0  \n",
+       "7             1   180.0  \n",
+       "8             1   136.0  \n",
+       "9             1     NaN  "
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what are the first 10 loans?  Practice LIMIT.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT *\n",
+    "FROM loans\n",
+    "LIMIT 10\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4cc63602-f5d1-4502-b955-54d504dfcb40",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# projection: choosing what columns (SELECT)\n",
+    "# selection: filtering rows (WHERE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "c0eb9c38-ff94-486a-b34e-e241d8f69d01",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>interest_rate</th>\n",
+       "      <th>amount_in_thousands</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3.875</td>\n",
+       "      <td>305.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>NA</td>\n",
+       "      <td>65.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3.25</td>\n",
+       "      <td>75.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4.0</td>\n",
+       "      <td>155.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>3.25</td>\n",
+       "      <td>305.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>3.375</td>\n",
+       "      <td>175.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>4.5</td>\n",
+       "      <td>575.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>5.375</td>\n",
+       "      <td>105.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>3.375</td>\n",
+       "      <td>85.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Exempt</td>\n",
+       "      <td>405.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  interest_rate  amount_in_thousands\n",
+       "0         3.875                305.0\n",
+       "1            NA                 65.0\n",
+       "2          3.25                 75.0\n",
+       "3           4.0                155.0\n",
+       "4          3.25                305.0\n",
+       "5         3.375                175.0\n",
+       "6           4.5                575.0\n",
+       "7         5.375                105.0\n",
+       "8         3.375                 85.0\n",
+       "9        Exempt                405.0"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what are the first 10 interest rates and loan amounts (in thousands)?  Practice SELECT.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT interest_rate, loan_amount / 1000 AS amount_in_thousands\n",
+    "FROM loans\n",
+    "LIMIT 10\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "29f2e695-fc92-412c-bc1c-5c06f4200d25",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>lei</th>\n",
+       "      <th>action_taken</th>\n",
+       "      <th>loan_type</th>\n",
+       "      <th>loan_amount</th>\n",
+       "      <th>interest_rate</th>\n",
+       "      <th>loan_purpose</th>\n",
+       "      <th>income</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>254900IER2H3R8YLBW04</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>105000.0</td>\n",
+       "      <td>2.875</td>\n",
+       "      <td>31</td>\n",
+       "      <td>1530000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>3Y4U8VZURTYWI1W2K376</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7455000.0</td>\n",
+       "      <td>NA</td>\n",
+       "      <td>4</td>\n",
+       "      <td>94657029.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>549300CS1XP28EERR469</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>75000.0</td>\n",
+       "      <td>4.99</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2030000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>549300CS1XP28EERR469</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>205000.0</td>\n",
+       "      <td>3.75</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7291000.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    lei  action_taken  loan_type  loan_amount interest_rate  \\\n",
+       "0  254900IER2H3R8YLBW04             1          1     105000.0         2.875   \n",
+       "1  3Y4U8VZURTYWI1W2K376             3          1    7455000.0            NA   \n",
+       "2  549300CS1XP28EERR469             1          1      75000.0          4.99   \n",
+       "3  549300CS1XP28EERR469             1          1     205000.0          3.75   \n",
+       "\n",
+       "   loan_purpose      income  \n",
+       "0            31   1530000.0  \n",
+       "1             4  94657029.0  \n",
+       "2             4   2030000.0  \n",
+       "3             1   7291000.0  "
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what are the loans for individuals with income over $1 million?  Practice WHERE.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT *\n",
+    "FROM loans\n",
+    "WHERE income > 1000000\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "b0808413-c09d-4d5f-9c03-b7f5747d3205",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>lei</th>\n",
+       "      <th>action_taken</th>\n",
+       "      <th>loan_type</th>\n",
+       "      <th>loan_amount</th>\n",
+       "      <th>interest_rate</th>\n",
+       "      <th>loan_purpose</th>\n",
+       "      <th>income</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>549300XWUSRVVOHPRY47</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>264185000.0</td>\n",
+       "      <td>NA</td>\n",
+       "      <td>1</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AD6GFRVSDT01YPT1CS68</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>74755000.0</td>\n",
+       "      <td>1.454</td>\n",
+       "      <td>1</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>AD6GFRVSDT01YPT1CS68</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>66005000.0</td>\n",
+       "      <td>NA</td>\n",
+       "      <td>1</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>YQI2CPR3Z44KAR0HG822</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>65005000.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>254900YA1AQXNM8QVZ06</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>63735000.0</td>\n",
+       "      <td>2.99</td>\n",
+       "      <td>2</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    lei  action_taken  loan_type  loan_amount interest_rate  \\\n",
+       "0  549300XWUSRVVOHPRY47             6          1  264185000.0            NA   \n",
+       "1  AD6GFRVSDT01YPT1CS68             1          1   74755000.0         1.454   \n",
+       "2  AD6GFRVSDT01YPT1CS68             4          2   66005000.0            NA   \n",
+       "3  YQI2CPR3Z44KAR0HG822             1          1   65005000.0           3.0   \n",
+       "4  254900YA1AQXNM8QVZ06             1          2   63735000.0          2.99   \n",
+       "\n",
+       "   loan_purpose income  \n",
+       "0             1   None  \n",
+       "1             1   None  \n",
+       "2             1   None  \n",
+       "3             1   None  \n",
+       "4             2   None  "
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what are the five biggest loans in terms of dollar amount?  Practice ORDER BY.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT *\n",
+    "FROM loans\n",
+    "ORDER BY loan_amount DESC\n",
+    "LIMIT 5\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "id": "77bc2433-5cdb-4134-a75a-6ddbf5e34661",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>action_taken</th>\n",
+       "      <th>loan_type</th>\n",
+       "      <th>lei</th>\n",
+       "      <th>action_taken</th>\n",
+       "      <th>loan_type</th>\n",
+       "      <th>loan_amount</th>\n",
+       "      <th>interest_rate</th>\n",
+       "      <th>loan_purpose</th>\n",
+       "      <th>income</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Loan purchased by the institution</td>\n",
+       "      <td>Conventional</td>\n",
+       "      <td>549300XWUSRVVOHPRY47</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>264185000.0</td>\n",
+       "      <td>NA</td>\n",
+       "      <td>1</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Loan originated</td>\n",
+       "      <td>Conventional</td>\n",
+       "      <td>AD6GFRVSDT01YPT1CS68</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>74755000.0</td>\n",
+       "      <td>1.454</td>\n",
+       "      <td>1</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Application withdrawn by applicant</td>\n",
+       "      <td>FHA-insured</td>\n",
+       "      <td>AD6GFRVSDT01YPT1CS68</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>66005000.0</td>\n",
+       "      <td>NA</td>\n",
+       "      <td>1</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Loan originated</td>\n",
+       "      <td>Conventional</td>\n",
+       "      <td>YQI2CPR3Z44KAR0HG822</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>65005000.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Loan originated</td>\n",
+       "      <td>FHA-insured</td>\n",
+       "      <td>254900YA1AQXNM8QVZ06</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>63735000.0</td>\n",
+       "      <td>2.99</td>\n",
+       "      <td>2</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                         action_taken     loan_type                   lei  \\\n",
+       "0   Loan purchased by the institution  Conventional  549300XWUSRVVOHPRY47   \n",
+       "1                     Loan originated  Conventional  AD6GFRVSDT01YPT1CS68   \n",
+       "2  Application withdrawn by applicant   FHA-insured  AD6GFRVSDT01YPT1CS68   \n",
+       "3                     Loan originated  Conventional  YQI2CPR3Z44KAR0HG822   \n",
+       "4                     Loan originated   FHA-insured  254900YA1AQXNM8QVZ06   \n",
+       "\n",
+       "   action_taken  loan_type  loan_amount interest_rate  loan_purpose income  \n",
+       "0             6          1  264185000.0            NA             1   None  \n",
+       "1             1          1   74755000.0         1.454             1   None  \n",
+       "2             4          2   66005000.0            NA             1   None  \n",
+       "3             1          1   65005000.0           3.0             1   None  \n",
+       "4             1          2   63735000.0          2.99             2   None  "
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what are the actions taken and types for those loans (show the text, not numbers)?  Practice INNER JOIN.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT actions.action_taken, loan_types.loan_type, loans.*\n",
+    "FROM loans\n",
+    "INNER JOIN actions ON loans.action_taken = actions.id\n",
+    "INNER JOIN loan_types ON loans.loan_type = loan_types.id\n",
+    "ORDER BY loan_amount DESC\n",
+    "LIMIT 5\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "id": "fe7e1b59-644c-4b12-8c1e-38dce7fdd53b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>lei</th>\n",
+       "      <th>action_taken</th>\n",
+       "      <th>loan_type</th>\n",
+       "      <th>loan_amount</th>\n",
+       "      <th>interest_rate</th>\n",
+       "      <th>loan_purpose</th>\n",
+       "      <th>income</th>\n",
+       "      <th>id</th>\n",
+       "      <th>loan_purpose</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Refinancing</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    lei action_taken loan_type loan_amount interest_rate loan_purpose income  \\\n",
+       "0  None         None      None        None          None         None   None   \n",
+       "\n",
+       "   id loan_purpose  \n",
+       "0   3  Refinancing  "
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what is a loan_purpose that doesn't appear in the loans table?  Practice LEFT/RIGHT JOIN.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT *\n",
+    "FROM loans\n",
+    "RIGHT JOIN purposes ON loans.loan_purpose = purposes.id\n",
+    "WHERE loans.loan_purpose IS NULL\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "id": "b0ce8f29-49ef-4e1c-9f78-e42e4242f7b6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>COUNT(*)</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>447367</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   COUNT(*)\n",
+       "0    447367"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# how many rows are in the table?  Practice COUNT(*).\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT COUNT(*)\n",
+    "FROM loans\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "id": "17652759-6909-47e7-86d9-efd91c374cf3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>COUNT(income)</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>399948</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   COUNT(income)\n",
+       "0         399948"
+      ]
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# how many non-null values are in the income column?  Practice COUNT(column).\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT COUNT(income)\n",
+    "FROM loans\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "id": "6c029b7b-5b29-4be9-be5b-83ee246e5f30",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>AVG(interest_rate)</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2.21657</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   AVG(interest_rate)\n",
+       "0             2.21657"
+      ]
+     },
+     "execution_count": 56,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# what is the average interest rate for loans of type \"Conventional\"?  Practice AVG.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT AVG(interest_rate)\n",
+    "FROM loans\n",
+    "INNER JOIN loan_types ON loans.loan_type = loan_types.id\n",
+    "WHERE loan_types.loan_type = \"Conventional\"\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "id": "40d65b3d-ed6f-4318-8e95-b03dcc668d01",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>loan_type</th>\n",
+       "      <th>COUNT(*)</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Conventional</td>\n",
+       "      <td>389217</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>VA-guaranteed</td>\n",
+       "      <td>24551</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>FHA-insured</td>\n",
+       "      <td>30496</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>FSA/RHS-guaranteed</td>\n",
+       "      <td>3103</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            loan_type  COUNT(*)\n",
+       "0        Conventional    389217\n",
+       "1       VA-guaranteed     24551\n",
+       "2         FHA-insured     30496\n",
+       "3  FSA/RHS-guaranteed      3103"
+      ]
+     },
+     "execution_count": 58,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# how many loans are there of each type?  Practice GROUP BY.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT loan_types.loan_type, COUNT(*)\n",
+    "FROM loans\n",
+    "INNER JOIN loan_types ON loans.loan_type = loan_types.id\n",
+    "GROUP BY loan_types.loan_type\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "id": "c1bf9134-2694-44e4-9a38-bcb5b51fa2e8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>loan_type</th>\n",
+       "      <th>count</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Conventional</td>\n",
+       "      <td>389217</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>VA-guaranteed</td>\n",
+       "      <td>24551</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>FHA-insured</td>\n",
+       "      <td>30496</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       loan_type   count\n",
+       "0   Conventional  389217\n",
+       "1  VA-guaranteed   24551\n",
+       "2    FHA-insured   30496"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# which loan types appear at least 10,000 times?  Practice HAVING.\n",
+    "# how many loans are there of each type?  Practice GROUP BY.\n",
+    "pd.read_sql(\"\"\"\n",
+    "SELECT loan_types.loan_type, COUNT(*) AS count\n",
+    "FROM loans\n",
+    "INNER JOIN loan_types ON loans.loan_type = loan_types.idGROUP BY loan_types.loan_type\n",
+    "HAVING count >= 10000\n",
+    "\"\"\", conn)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
-- 
GitLab