From 9869b90e97d9a34ced6f07fefc0306580d787192 Mon Sep 17 00:00:00 2001
From: TYLER CARAZA-HARTER <tharter@cs544-tharter.cs.wisc.edu>
Date: Fri, 21 Mar 2025 15:25:54 -0500
Subject: [PATCH] lec demos

---
 lec/26-cassandra/src/lec1.ipynb |  401 +++++++++++
 lec/26-cassandra/src/lec2.ipynb |  583 ++++++++++++++++
 lec/27-cassandra/hash.ipynb     |  261 +++++++
 lec/27-cassandra/lec.ipynb      | 1166 +++++++++++++++++++++++++++++++
 p6/Dockerfile.cassandra         |    4 +-
 5 files changed, 2413 insertions(+), 2 deletions(-)
 create mode 100644 lec/26-cassandra/src/lec1.ipynb
 create mode 100644 lec/26-cassandra/src/lec2.ipynb
 create mode 100644 lec/27-cassandra/hash.ipynb
 create mode 100644 lec/27-cassandra/lec.ipynb

diff --git a/lec/26-cassandra/src/lec1.ipynb b/lec/26-cassandra/src/lec1.ipynb
new file mode 100644
index 0000000..7fc65ca
--- /dev/null
+++ b/lec/26-cassandra/src/lec1.ipynb
@@ -0,0 +1,401 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "0ff1c81b-9867-4f98-b227-c89871ff04bf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cassandra.cluster import Cluster\n",
+    "cluster = Cluster([\"p6-db-1\", \"p6-db-2\", \"p6-db-3\"])\n",
+    "cass = cluster.connect()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "3d807c68-ac2a-4fe3-8153-919ea4c0f302",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x76c2245a83d0>"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(\"use banking\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "93a16129-d410-4bce-9034-4c792a74188b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x76c21c122500>"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(\"\"\"\n",
+    "CREATE TABLE loans (\n",
+    "    bank_id INT,\n",
+    "    bank_name text STATIC,\n",
+    "    loan_id UUID,\n",
+    "    amount int,\n",
+    "    state text,\n",
+    "    PRIMARY KEY ((bank_id), amount, loan_id)\n",
+    ") WITH CLUSTERING ORDER BY (amount DESC)\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "ec0308f4-5132-4e1e-a24e-579e06cc3105",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CREATE TABLE banking.loans (\n",
+      "    bank_id int,\n",
+      "    amount int,\n",
+      "    loan_id uuid,\n",
+      "    bank_name text static,\n",
+      "    state text,\n",
+      "    PRIMARY KEY (bank_id, amount, loan_id)\n",
+      ") WITH CLUSTERING ORDER BY (amount DESC, loan_id ASC)\n",
+      "    AND additional_write_policy = '99p'\n",
+      "    AND allow_auto_snapshot = true\n",
+      "    AND bloom_filter_fp_chance = 0.01\n",
+      "    AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n",
+      "    AND cdc = false\n",
+      "    AND comment = ''\n",
+      "    AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}\n",
+      "    AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n",
+      "    AND memtable = 'default'\n",
+      "    AND crc_check_chance = 1.0\n",
+      "    AND default_time_to_live = 0\n",
+      "    AND extensions = {}\n",
+      "    AND gc_grace_seconds = 864000\n",
+      "    AND incremental_backups = true\n",
+      "    AND max_index_interval = 2048\n",
+      "    AND memtable_flush_period_in_ms = 0\n",
+      "    AND min_index_interval = 128\n",
+      "    AND read_repair = 'BLOCKING'\n",
+      "    AND speculative_retry = '99p';\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(cass.execute(\"DESCRIBE TABLE loans\").one().create_statement)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "7350e0c3-95b7-404e-b6f6-a469eac7169a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x76c21cd73160>"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# INSERT in Cassandra is an UPSERT: update or insert\n",
+    "cass.execute(\"\"\"\n",
+    "INSERT INTO loans (bank_id, bank_name)\n",
+    "VALUES (544, 'test2')\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "f10be20a-15bb-4092-9a08-272698012813",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bank_id</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>loan_id</th>\n",
+       "      <th>bank_name</th>\n",
+       "      <th>state</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>544</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>test2</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   bank_id amount loan_id bank_name state\n",
+       "0      544   None    None     test2  None"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "pd.DataFrame(cass.execute(\"SELECT * FROM loans\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "be2f1f47-f4be-49b5-90cf-8a9e0d69024a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x76c21c2aceb0>"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(\"\"\"\n",
+    "INSERT INTO loans (bank_id, bank_name, loan_id, amount)\n",
+    "VALUES (544, 'test2', UUID(), 300)\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "498c868d-41ee-4bc5-bb8d-d8a4e10b4464",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bank_id</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>loan_id</th>\n",
+       "      <th>bank_name</th>\n",
+       "      <th>state</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>544</td>\n",
+       "      <td>300</td>\n",
+       "      <td>9e912c13-91ad-4d9c-b29c-0ac48d6231fe</td>\n",
+       "      <td>test2</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   bank_id  amount                               loan_id bank_name state\n",
+       "0      544     300  9e912c13-91ad-4d9c-b29c-0ac48d6231fe     test2  None"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(cass.execute(\"SELECT * FROM loans\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "714bedc8-6a48-4aef-b706-cd30c274473b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x76c21d7a3910>"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# NOW and UUID generate UUID.  Supposed to be universally unique.\n",
+    "# NOW uses MAC addrs and timestamps to make it guaranteed.\n",
+    "cass.execute(\"\"\"\n",
+    "INSERT INTO loans (bank_id, bank_name, loan_id, amount, state)\n",
+    "VALUES (544, 'mybank', NOW(), 400, 'wi')\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "7af36f0e-5db8-424d-9b08-1e8aacc2dacf",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bank_id</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>loan_id</th>\n",
+       "      <th>bank_name</th>\n",
+       "      <th>state</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>544</td>\n",
+       "      <td>400</td>\n",
+       "      <td>05601530-0662-11f0-9e42-b531eb6d9b34</td>\n",
+       "      <td>mybank</td>\n",
+       "      <td>wi</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>544</td>\n",
+       "      <td>300</td>\n",
+       "      <td>9e912c13-91ad-4d9c-b29c-0ac48d6231fe</td>\n",
+       "      <td>mybank</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   bank_id  amount                               loan_id bank_name state\n",
+       "0      544     400  05601530-0662-11f0-9e42-b531eb6d9b34    mybank    wi\n",
+       "1      544     300  9e912c13-91ad-4d9c-b29c-0ac48d6231fe    mybank  None"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# above did INSERT of a new row; UPSERT on the partition\n",
+    "pd.DataFrame(cass.execute(\"SELECT * FROM loans\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/lec/26-cassandra/src/lec2.ipynb b/lec/26-cassandra/src/lec2.ipynb
new file mode 100644
index 0000000..ea06f4d
--- /dev/null
+++ b/lec/26-cassandra/src/lec2.ipynb
@@ -0,0 +1,583 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "11035b24-6195-412f-af98-50e41ce8b3d0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cassandra.cluster import Cluster\n",
+    "cluster = Cluster([\"p6-db-1\", \"p6-db-2\", \"p6-db-3\"])\n",
+    "cass = cluster.connect()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "480162ed-d618-4dde-bda6-03249f609a69",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70c6d9b05330>"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(\"use banking\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "ec630031-d518-4747-ac1f-8ec40aa43251",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70c6d9b06500>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(\"\"\"\n",
+    "create table loans (\n",
+    "    bank_id int,\n",
+    "    bank_name text STATIC,\n",
+    "    loan_id UUID,\n",
+    "    amount int,\n",
+    "    state text,\n",
+    "    PRIMARY KEY ((bank_id), amount, loan_id)\n",
+    ") WITH CLUSTERING ORDER BY (amount DESC)\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "286ead09-fbf5-491d-831e-fb0056edd134",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CREATE TABLE banking.loans (\n",
+      "    bank_id int,\n",
+      "    amount int,\n",
+      "    loan_id uuid,\n",
+      "    bank_name text static,\n",
+      "    state text,\n",
+      "    PRIMARY KEY (bank_id, amount, loan_id)\n",
+      ") WITH CLUSTERING ORDER BY (amount DESC, loan_id ASC)\n",
+      "    AND additional_write_policy = '99p'\n",
+      "    AND allow_auto_snapshot = true\n",
+      "    AND bloom_filter_fp_chance = 0.01\n",
+      "    AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n",
+      "    AND cdc = false\n",
+      "    AND comment = ''\n",
+      "    AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}\n",
+      "    AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n",
+      "    AND memtable = 'default'\n",
+      "    AND crc_check_chance = 1.0\n",
+      "    AND default_time_to_live = 0\n",
+      "    AND extensions = {}\n",
+      "    AND gc_grace_seconds = 864000\n",
+      "    AND incremental_backups = true\n",
+      "    AND max_index_interval = 2048\n",
+      "    AND memtable_flush_period_in_ms = 0\n",
+      "    AND min_index_interval = 128\n",
+      "    AND read_repair = 'BLOCKING'\n",
+      "    AND speculative_retry = '99p';\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(cass.execute(\"describe table loans\").one().create_statement)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "c4f6335b-6340-455e-921c-4b6ac2e1d67b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70c6d17263e0>"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(\"\"\"\n",
+    "INSERT INTO loans (bank_id, bank_name)\n",
+    "VALUES (544, 'test')\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "55c10edd-126d-4a10-98c8-708f2b337f99",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70c6d9b056c0>"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# INSERT is actually UPSERT (insert or update)\n",
+    "cass.execute(\"\"\"\n",
+    "INSERT INTO loans (bank_id, bank_name)\n",
+    "VALUES (544, 'test2')\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "99035291-59eb-4c00-99a1-9737d4e406d9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bank_id</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>loan_id</th>\n",
+       "      <th>bank_name</th>\n",
+       "      <th>state</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>544</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>test2</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   bank_id amount loan_id bank_name state\n",
+       "0      544   None    None     test2  None"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "pd.DataFrame(cass.execute(\"SELECT * FROM loans\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "896b8215-7282-4b73-9ad2-8f0761879253",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70c6d1726830>"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(\"\"\"\n",
+    "INSERT INTO loans (bank_id, bank_name, loan_id, amount)\n",
+    "VALUES (544, 'test2', UUID(), 300)\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "b8e2a46a-6e86-49ef-9d1c-0da26eec0a53",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bank_id</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>loan_id</th>\n",
+       "      <th>bank_name</th>\n",
+       "      <th>state</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>544</td>\n",
+       "      <td>300</td>\n",
+       "      <td>01976a29-7d80-435f-ba6b-e22abc9d10f3</td>\n",
+       "      <td>test2</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   bank_id  amount                               loan_id bank_name state\n",
+       "0      544     300  01976a29-7d80-435f-ba6b-e22abc9d10f3     test2  None"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(cass.execute(\"SELECT * FROM loans\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "c87c69d1-4a9e-4065-9a95-8e47a08755c1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70c6d3f9eec0>"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# NOW and UUID both generate UUIDs.\n",
+    "# NOW is stronge because it uses MAC addresses and timestamps\n",
+    "\n",
+    "# this is both an INSERT (inserting a row) and UPDATE (on the partition)\n",
+    "cass.execute(\"\"\"\n",
+    "INSERT INTO loans (bank_id, bank_name, loan_id, amount, state)\n",
+    "VALUES (544, 'mybank2', NOW(), 350, 'wi')\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "032cc612-c753-4bbe-bac3-ca5a8df6cd02",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bank_id</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>loan_id</th>\n",
+       "      <th>bank_name</th>\n",
+       "      <th>state</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>544</td>\n",
+       "      <td>400</td>\n",
+       "      <td>2b644550-0674-11f0-9e42-b531eb6d9b34</td>\n",
+       "      <td>mybank2</td>\n",
+       "      <td>wi</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>544</td>\n",
+       "      <td>350</td>\n",
+       "      <td>5c852cd0-0674-11f0-8b0a-b3bc8dc2bdb9</td>\n",
+       "      <td>mybank2</td>\n",
+       "      <td>wi</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>544</td>\n",
+       "      <td>300</td>\n",
+       "      <td>aebdb7c0-0673-11f0-8b0a-b3bc8dc2bdb9</td>\n",
+       "      <td>mybank2</td>\n",
+       "      <td>wi</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>544</td>\n",
+       "      <td>300</td>\n",
+       "      <td>f8c80870-0673-11f0-8acf-b5f913312dcb</td>\n",
+       "      <td>mybank2</td>\n",
+       "      <td>wi</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>544</td>\n",
+       "      <td>300</td>\n",
+       "      <td>01976a29-7d80-435f-ba6b-e22abc9d10f3</td>\n",
+       "      <td>mybank2</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   bank_id  amount                               loan_id bank_name state\n",
+       "0      544     400  2b644550-0674-11f0-9e42-b531eb6d9b34   mybank2    wi\n",
+       "1      544     350  5c852cd0-0674-11f0-8b0a-b3bc8dc2bdb9   mybank2    wi\n",
+       "2      544     300  aebdb7c0-0673-11f0-8b0a-b3bc8dc2bdb9   mybank2    wi\n",
+       "3      544     300  f8c80870-0673-11f0-8acf-b5f913312dcb   mybank2    wi\n",
+       "4      544     300  01976a29-7d80-435f-ba6b-e22abc9d10f3   mybank2  None"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(cass.execute(\"SELECT * FROM loans\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "d8142b23-c6c4-4b77-b5dd-8684c47c05c8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70c6ad556e30>"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(\"\"\"\n",
+    "INSERT INTO loans (bank_id, bank_name, loan_id, amount, state)\n",
+    "VALUES (999, 'uwcu', NOW(), 500, 'il')\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "b3b1ef23-db1a-43c8-b572-09a48f4aba41",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bank_id</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>loan_id</th>\n",
+       "      <th>bank_name</th>\n",
+       "      <th>state</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>544</td>\n",
+       "      <td>400</td>\n",
+       "      <td>2b644550-0674-11f0-9e42-b531eb6d9b34</td>\n",
+       "      <td>mybank2</td>\n",
+       "      <td>wi</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>544</td>\n",
+       "      <td>350</td>\n",
+       "      <td>5c852cd0-0674-11f0-8b0a-b3bc8dc2bdb9</td>\n",
+       "      <td>mybank2</td>\n",
+       "      <td>wi</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>544</td>\n",
+       "      <td>300</td>\n",
+       "      <td>aebdb7c0-0673-11f0-8b0a-b3bc8dc2bdb9</td>\n",
+       "      <td>mybank2</td>\n",
+       "      <td>wi</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>544</td>\n",
+       "      <td>300</td>\n",
+       "      <td>f8c80870-0673-11f0-8acf-b5f913312dcb</td>\n",
+       "      <td>mybank2</td>\n",
+       "      <td>wi</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>544</td>\n",
+       "      <td>300</td>\n",
+       "      <td>01976a29-7d80-435f-ba6b-e22abc9d10f3</td>\n",
+       "      <td>mybank2</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>999</td>\n",
+       "      <td>500</td>\n",
+       "      <td>78e4a9f0-0674-11f0-8acf-b5f913312dcb</td>\n",
+       "      <td>uwcu</td>\n",
+       "      <td>il</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   bank_id  amount                               loan_id bank_name state\n",
+       "0      544     400  2b644550-0674-11f0-9e42-b531eb6d9b34   mybank2    wi\n",
+       "1      544     350  5c852cd0-0674-11f0-8b0a-b3bc8dc2bdb9   mybank2    wi\n",
+       "2      544     300  aebdb7c0-0673-11f0-8b0a-b3bc8dc2bdb9   mybank2    wi\n",
+       "3      544     300  f8c80870-0673-11f0-8acf-b5f913312dcb   mybank2    wi\n",
+       "4      544     300  01976a29-7d80-435f-ba6b-e22abc9d10f3   mybank2  None\n",
+       "5      999     500  78e4a9f0-0674-11f0-8acf-b5f913312dcb      uwcu    il"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(cass.execute(\"SELECT * FROM loans\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/lec/27-cassandra/hash.ipynb b/lec/27-cassandra/hash.ipynb
new file mode 100644
index 0000000..45cf799
--- /dev/null
+++ b/lec/27-cassandra/hash.ipynb
@@ -0,0 +1,261 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "19e1af7a-a39d-4382-9e7a-f720a23baba7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import string\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "da2a5620-5326-4031-a63c-eb1677e9d92f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'ABCDEFGHIJKLMNOPQRSTUVWXYZ'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "string.ascii_uppercase"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "e594fb1d-c840-4a33-ad15-bb86b5053037",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>letter</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>B</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>D</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>E</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  letter\n",
+       "0      A\n",
+       "1      B\n",
+       "2      C\n",
+       "3      D\n",
+       "4      E"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.DataFrame({\"letter\": list(string.ascii_uppercase)})\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "3b9d113f-7d73-44ec-9df8-b78c1b64a999",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "hash(\"A\") % 4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "85d5af92-2a7d-4588-881e-d6ae27d53174",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>letter</th>\n",
+       "      <th>partition-before</th>\n",
+       "      <th>partition-after</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>A</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>B</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>C</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>D</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>E</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  letter  partition-before  partition-after\n",
+       "0      A                 3                3\n",
+       "1      B                 0                1\n",
+       "2      C                 2                1\n",
+       "3      D                 0                0\n",
+       "4      E                 1                1"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df[\"partition-before\"] = df[\"letter\"].map(lambda letter: hash(letter) % 4) # when we have 4 machines\n",
+    "df[\"partition-after\"] = df[\"letter\"].map(lambda letter: hash(letter) % 5) # when we have 5 machines\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "f9fb0213-6fa3-4b5d-8e8b-acf13929e5db",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.34615384615384615"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 35% of letters stayed on the same machine when we scaled from 4 to 5, the rest moved\n",
+    "float((df[\"partition-before\"] == df[\"partition-after\"]).astype(int).mean())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/lec/27-cassandra/lec.ipynb b/lec/27-cassandra/lec.ipynb
new file mode 100644
index 0000000..cafed45
--- /dev/null
+++ b/lec/27-cassandra/lec.ipynb
@@ -0,0 +1,1166 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "0ff1c81b-9867-4f98-b227-c89871ff04bf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cassandra.cluster import Cluster\n",
+    "cluster = Cluster([\"p6-db-1\", \"p6-db-2\", \"p6-db-3\"])\n",
+    "cass = cluster.connect()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "3d807c68-ac2a-4fe3-8153-919ea4c0f302",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70d240bc3c40>"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(\"use banking\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "d55396a4-33e6-43c7-842e-d8994c2f5acd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70d240bc0250>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(\"drop table if exists loans\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "93a16129-d410-4bce-9034-4c792a74188b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70d238dba320>"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(\"\"\"\n",
+    "CREATE TABLE loans (\n",
+    "    bank_id INT,\n",
+    "    bank_name text STATIC,\n",
+    "    loan_id UUID,\n",
+    "    amount int,\n",
+    "    state text,\n",
+    "    PRIMARY KEY ((bank_id), amount, loan_id)\n",
+    ") WITH CLUSTERING ORDER BY (amount DESC)\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "ec0308f4-5132-4e1e-a24e-579e06cc3105",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CREATE TABLE banking.loans (\n",
+      "    bank_id int,\n",
+      "    amount int,\n",
+      "    loan_id uuid,\n",
+      "    bank_name text static,\n",
+      "    state text,\n",
+      "    PRIMARY KEY (bank_id, amount, loan_id)\n",
+      ") WITH CLUSTERING ORDER BY (amount DESC, loan_id ASC)\n",
+      "    AND additional_write_policy = '99p'\n",
+      "    AND allow_auto_snapshot = true\n",
+      "    AND bloom_filter_fp_chance = 0.01\n",
+      "    AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n",
+      "    AND cdc = false\n",
+      "    AND comment = ''\n",
+      "    AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}\n",
+      "    AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n",
+      "    AND memtable = 'default'\n",
+      "    AND crc_check_chance = 1.0\n",
+      "    AND default_time_to_live = 0\n",
+      "    AND extensions = {}\n",
+      "    AND gc_grace_seconds = 864000\n",
+      "    AND incremental_backups = true\n",
+      "    AND max_index_interval = 2048\n",
+      "    AND memtable_flush_period_in_ms = 0\n",
+      "    AND min_index_interval = 128\n",
+      "    AND read_repair = 'BLOCKING'\n",
+      "    AND speculative_retry = '99p';\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(cass.execute(\"DESCRIBE TABLE loans\").one().create_statement)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "7350e0c3-95b7-404e-b6f6-a469eac7169a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70d238de1870>"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# INSERT in Cassandra is an UPSERT: update or insert\n",
+    "cass.execute(\"\"\"\n",
+    "INSERT INTO loans (bank_id, bank_name)\n",
+    "VALUES (544, 'test2')\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "f10be20a-15bb-4092-9a08-272698012813",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bank_id</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>loan_id</th>\n",
+       "      <th>bank_name</th>\n",
+       "      <th>state</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>544</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>test2</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   bank_id amount loan_id bank_name state\n",
+       "0      544   None    None     test2  None"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "pd.DataFrame(cass.execute(\"SELECT * FROM loans\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "be2f1f47-f4be-49b5-90cf-8a9e0d69024a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70d238de12d0>"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(\"\"\"\n",
+    "INSERT INTO loans (bank_id, bank_name, loan_id, amount)\n",
+    "VALUES (544, 'test2', UUID(), 300)\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "498c868d-41ee-4bc5-bb8d-d8a4e10b4464",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bank_id</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>loan_id</th>\n",
+       "      <th>bank_name</th>\n",
+       "      <th>state</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>544</td>\n",
+       "      <td>300</td>\n",
+       "      <td>3daed7cd-7e7a-4107-a11b-35ef10ffc035</td>\n",
+       "      <td>test2</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   bank_id  amount                               loan_id bank_name state\n",
+       "0      544     300  3daed7cd-7e7a-4107-a11b-35ef10ffc035     test2  None"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(cass.execute(\"SELECT * FROM loans\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "714bedc8-6a48-4aef-b706-cd30c274473b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70d238dba620>"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# NOW and UUID generate UUID.  Supposed to be universally unique.\n",
+    "# NOW uses MAC addrs and timestamps to make it guaranteed.\n",
+    "cass.execute(\"\"\"\n",
+    "INSERT INTO loans (bank_id, bank_name, loan_id, amount, state)\n",
+    "VALUES (544, 'mybank', NOW(), 400, 'wi')\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "7af36f0e-5db8-424d-9b08-1e8aacc2dacf",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bank_id</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>loan_id</th>\n",
+       "      <th>bank_name</th>\n",
+       "      <th>state</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>544</td>\n",
+       "      <td>400</td>\n",
+       "      <td>794087c0-0683-11f0-8b0a-b3bc8dc2bdb9</td>\n",
+       "      <td>mybank</td>\n",
+       "      <td>wi</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>544</td>\n",
+       "      <td>300</td>\n",
+       "      <td>3daed7cd-7e7a-4107-a11b-35ef10ffc035</td>\n",
+       "      <td>mybank</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   bank_id  amount                               loan_id bank_name state\n",
+       "0      544     400  794087c0-0683-11f0-8b0a-b3bc8dc2bdb9    mybank    wi\n",
+       "1      544     300  3daed7cd-7e7a-4107-a11b-35ef10ffc035    mybank  None"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# above did INSERT of a new row; UPSERT on the partition\n",
+    "pd.DataFrame(cass.execute(\"SELECT * FROM loans\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "33c033d2-3aa4-4b32-8248-7b96117c7099",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70d240bc1210>"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(\"\"\"\n",
+    "INSERT INTO loans (bank_id, bank_name, loan_id, amount, state)\n",
+    "VALUES (999, 'uwcu', NOW(), 500, 'il')\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "c8153e23-2f92-40f8-b9c1-6241c365ea7c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bank_id</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>loan_id</th>\n",
+       "      <th>bank_name</th>\n",
+       "      <th>state</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>544</td>\n",
+       "      <td>400</td>\n",
+       "      <td>794087c0-0683-11f0-8b0a-b3bc8dc2bdb9</td>\n",
+       "      <td>mybank</td>\n",
+       "      <td>wi</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>544</td>\n",
+       "      <td>300</td>\n",
+       "      <td>3daed7cd-7e7a-4107-a11b-35ef10ffc035</td>\n",
+       "      <td>mybank</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>999</td>\n",
+       "      <td>500</td>\n",
+       "      <td>d7faddb0-0683-11f0-9e42-b531eb6d9b34</td>\n",
+       "      <td>uwcu</td>\n",
+       "      <td>il</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   bank_id  amount                               loan_id bank_name state\n",
+       "0      544     400  794087c0-0683-11f0-8b0a-b3bc8dc2bdb9    mybank    wi\n",
+       "1      544     300  3daed7cd-7e7a-4107-a11b-35ef10ffc035    mybank  None\n",
+       "2      999     500  d7faddb0-0683-11f0-9e42-b531eb6d9b34      uwcu    il"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(cass.execute(\"SELECT * FROM loans\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "25b33372-08ad-4d4a-9f8a-389883360b2d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70d240bc2fe0>"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# TODO: create a new type for names\n",
+    "cass.execute(\"\"\"\n",
+    "CREATE TYPE FullName (\n",
+    "    first text,\n",
+    "    last  text\n",
+    ")\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "c6b42f4a-eb53-477d-aff2-3bd72845749c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70d207260d30>"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(\"\"\"\n",
+    "ALTER TABLE loans ADD (name FullName);\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "af6c7d02-5185-44ae-a0f2-c743feae1927",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bank_id</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>loan_id</th>\n",
+       "      <th>bank_name</th>\n",
+       "      <th>name</th>\n",
+       "      <th>state</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>544</td>\n",
+       "      <td>400</td>\n",
+       "      <td>794087c0-0683-11f0-8b0a-b3bc8dc2bdb9</td>\n",
+       "      <td>mybank</td>\n",
+       "      <td>None</td>\n",
+       "      <td>wi</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>544</td>\n",
+       "      <td>300</td>\n",
+       "      <td>3daed7cd-7e7a-4107-a11b-35ef10ffc035</td>\n",
+       "      <td>mybank</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>999</td>\n",
+       "      <td>500</td>\n",
+       "      <td>d7faddb0-0683-11f0-9e42-b531eb6d9b34</td>\n",
+       "      <td>uwcu</td>\n",
+       "      <td>None</td>\n",
+       "      <td>il</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   bank_id  amount                               loan_id bank_name  name state\n",
+       "0      544     400  794087c0-0683-11f0-8b0a-b3bc8dc2bdb9    mybank  None    wi\n",
+       "1      544     300  3daed7cd-7e7a-4107-a11b-35ef10ffc035    mybank  None  None\n",
+       "2      999     500  d7faddb0-0683-11f0-9e42-b531eb6d9b34      uwcu  None    il"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Cassandra has sparse tables, so adding these nulls doesn't requiring modifying a bunch for rows\n",
+    "pd.DataFrame(cass.execute(\"SELECT * FROM loans\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "d0485f70-3919-437c-9e56-b22569c61cb0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70d2072cd780>"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(\"\"\"\n",
+    "INSERT INTO loans (bank_id, bank_name, loan_id, amount, state, name)\n",
+    "VALUES (999, 'uwcu', NOW(), 500, 'il', {first: 'Tyler', last:'C'})\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "ced5a9f4-2e8e-4956-9d08-a632bc4b0960",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bank_id</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>loan_id</th>\n",
+       "      <th>bank_name</th>\n",
+       "      <th>name</th>\n",
+       "      <th>state</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>544</td>\n",
+       "      <td>400</td>\n",
+       "      <td>794087c0-0683-11f0-8b0a-b3bc8dc2bdb9</td>\n",
+       "      <td>mybank</td>\n",
+       "      <td>None</td>\n",
+       "      <td>wi</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>544</td>\n",
+       "      <td>300</td>\n",
+       "      <td>3daed7cd-7e7a-4107-a11b-35ef10ffc035</td>\n",
+       "      <td>mybank</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>999</td>\n",
+       "      <td>500</td>\n",
+       "      <td>d7faddb0-0683-11f0-9e42-b531eb6d9b34</td>\n",
+       "      <td>uwcu</td>\n",
+       "      <td>None</td>\n",
+       "      <td>il</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>999</td>\n",
+       "      <td>500</td>\n",
+       "      <td>5866b0a0-0684-11f0-8acf-b5f913312dcb</td>\n",
+       "      <td>uwcu</td>\n",
+       "      <td>(Tyler, C)</td>\n",
+       "      <td>il</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   bank_id  amount                               loan_id bank_name  \\\n",
+       "0      544     400  794087c0-0683-11f0-8b0a-b3bc8dc2bdb9    mybank   \n",
+       "1      544     300  3daed7cd-7e7a-4107-a11b-35ef10ffc035    mybank   \n",
+       "2      999     500  d7faddb0-0683-11f0-9e42-b531eb6d9b34      uwcu   \n",
+       "3      999     500  5866b0a0-0684-11f0-8acf-b5f913312dcb      uwcu   \n",
+       "\n",
+       "         name state  \n",
+       "0        None    wi  \n",
+       "1        None  None  \n",
+       "2        None    il  \n",
+       "3  (Tyler, C)    il  "
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(cass.execute(\"SELECT * FROM loans\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "dd3051d3-efe0-4d15-abf8-2b8db5c429ce",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name_first</th>\n",
+       "      <th>name_last</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Tyler</td>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  name_first name_last\n",
+       "0       None      None\n",
+       "1       None      None\n",
+       "2       None      None\n",
+       "3      Tyler         C"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(cass.execute(\"SELECT name.first, name.last FROM loans\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "id": "2bfc163c-994e-43f4-bbe3-291c0324e040",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>f</th>\n",
+       "      <th>l</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Tyler</td>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       f     l\n",
+       "0   None  None\n",
+       "1  Tyler     C"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(cass.execute(\"SELECT name.first AS f, name.last as l FROM loans WHERE bank_id = 999\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "id": "8fe52763-0c3d-40a3-98f3-9435e2401628",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# prepared statement"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "id": "ee49b471-f7bb-4bf0-86d6-211c5f314fa2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inst_stmt = cass.prepare(\"\"\"\n",
+    "INSERT INTO loans (bank_id, loan_id, amount, name)\n",
+    "VALUES (999, NOW(), ?, {first: ?, last: ?})\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "id": "4a3f64c0-33cc-4b89-b170-3bb875ceaeaa",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<cassandra.cluster.ResultSet at 0x70d207261390>"
+      ]
+     },
+     "execution_count": 68,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cass.execute(inst_stmt, (345, 'Tyler', 'C'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "id": "027c3b62-4f17-4358-b5d9-ae22eed54a91",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bank_id</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>loan_id</th>\n",
+       "      <th>bank_name</th>\n",
+       "      <th>name</th>\n",
+       "      <th>state</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>544</td>\n",
+       "      <td>400</td>\n",
+       "      <td>794087c0-0683-11f0-8b0a-b3bc8dc2bdb9</td>\n",
+       "      <td>mybank</td>\n",
+       "      <td>None</td>\n",
+       "      <td>wi</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>544</td>\n",
+       "      <td>300</td>\n",
+       "      <td>3daed7cd-7e7a-4107-a11b-35ef10ffc035</td>\n",
+       "      <td>mybank</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>999</td>\n",
+       "      <td>500</td>\n",
+       "      <td>d7faddb0-0683-11f0-9e42-b531eb6d9b34</td>\n",
+       "      <td>uwcu</td>\n",
+       "      <td>None</td>\n",
+       "      <td>il</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>999</td>\n",
+       "      <td>500</td>\n",
+       "      <td>5866b0a0-0684-11f0-8acf-b5f913312dcb</td>\n",
+       "      <td>uwcu</td>\n",
+       "      <td>(Tyler, C)</td>\n",
+       "      <td>il</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>999</td>\n",
+       "      <td>345</td>\n",
+       "      <td>857de4d0-0686-11f0-8acf-b5f913312dcb</td>\n",
+       "      <td>uwcu</td>\n",
+       "      <td>(Tyler, C)</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   bank_id  amount                               loan_id bank_name  \\\n",
+       "0      544     400  794087c0-0683-11f0-8b0a-b3bc8dc2bdb9    mybank   \n",
+       "1      544     300  3daed7cd-7e7a-4107-a11b-35ef10ffc035    mybank   \n",
+       "2      999     500  d7faddb0-0683-11f0-9e42-b531eb6d9b34      uwcu   \n",
+       "3      999     500  5866b0a0-0684-11f0-8acf-b5f913312dcb      uwcu   \n",
+       "4      999     345  857de4d0-0686-11f0-8acf-b5f913312dcb      uwcu   \n",
+       "\n",
+       "         name state  \n",
+       "0        None    wi  \n",
+       "1        None  None  \n",
+       "2        None    il  \n",
+       "3  (Tyler, C)    il  \n",
+       "4  (Tyler, C)  None  "
+      ]
+     },
+     "execution_count": 69,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(cass.execute(\"SELECT * FROM loans\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "id": "30d95055-3815-4411-bf0f-1138dcde89aa",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bank_name</th>\n",
+       "      <th>system_avg_amount</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>mybank</td>\n",
+       "      <td>350</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>uwcu</td>\n",
+       "      <td>448</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  bank_name  system_avg_amount\n",
+       "0    mybank                350\n",
+       "1      uwcu                448"
+      ]
+     },
+     "execution_count": 72,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(cass.execute(\"\"\"\n",
+    "SELECT bank_name, AVG(amount)\n",
+    "FROM loans\n",
+    "GROUP BY bank_id\n",
+    "\"\"\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "id": "ea717e2a-1534-4866-a4ba-a10663af5cc5",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "InvalidRequest",
+     "evalue": "Error from server: code=2200 [Invalid query] message=\"Group by is currently only supported on the columns of the PRIMARY KEY, got state\"",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mInvalidRequest\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[73], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m pd\u001b[38;5;241m.\u001b[39mDataFrame(\u001b[43mcass\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;124;43mSELECT state, AVG(amount)\u001b[39;49m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;124;43mFROM loans\u001b[39;49m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;124;43mGROUP BY state\u001b[39;49m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m)\n",
+      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/cassandra/cluster.py:2637\u001b[0m, in \u001b[0;36mcassandra.cluster.Session.execute\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/cassandra/cluster.py:4920\u001b[0m, in \u001b[0;36mcassandra.cluster.ResponseFuture.result\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mInvalidRequest\u001b[0m: Error from server: code=2200 [Invalid query] message=\"Group by is currently only supported on the columns of the PRIMARY KEY, got state\""
+     ]
+    }
+   ],
+   "source": [
+    "# data is partitioned by partition key.\n",
+    "# grouping on something else would trigger a shuffle, which Cassandra doesn't support!\n",
+    "# TODO: write ETL job to get this into HDFS/Parquet+Spark\n",
+    "pd.DataFrame(cass.execute(\"\"\"\n",
+    "SELECT state, AVG(amount)\n",
+    "FROM loans\n",
+    "GROUP BY state\n",
+    "\"\"\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/p6/Dockerfile.cassandra b/p6/Dockerfile.cassandra
index a92d1a0..9dc822f 100644
--- a/p6/Dockerfile.cassandra
+++ b/p6/Dockerfile.cassandra
@@ -1,5 +1,5 @@
-FROM ubuntu:24.04
-RUN apt-get update; apt-get install -y wget curl openjdk-17-jdk python3-pip net-tools lsof vim unzip
+FROM ubuntu:22.04
+RUN apt-get update; apt-get install -y wget curl openjdk-17-jdk python3-pip iproute2
 
 # Python stuff
 RUN pip3 install numpy==2.1.3 pyspark==3.4.1 cassandra-driver==3.28.0 grpcio==1.58.0 grpcio-tools==1.58.0
-- 
GitLab