diff --git a/lecture_material/14-web-4-and-regex-1/web_4.py b/lecture_material/14-web-4/web_4.py similarity index 100% rename from lecture_material/14-web-4-and-regex-1/web_4.py rename to lecture_material/14-web-4/web_4.py diff --git a/lecture_material/14-web-4-and-regex-1/web_4_lec_001.py b/lecture_material/14-web-4/web_4_lec_001.py similarity index 100% rename from lecture_material/14-web-4-and-regex-1/web_4_lec_001.py rename to lecture_material/14-web-4/web_4_lec_001.py diff --git a/lecture_material/14-web-4-and-regex-1/web_4_lec_002.py b/lecture_material/14-web-4/web_4_lec_002.py similarity index 100% rename from lecture_material/14-web-4-and-regex-1/web_4_lec_002.py rename to lecture_material/14-web-4/web_4_lec_002.py diff --git a/lecture_material/14-web-4-and-regex-1/14-web-4-and-regex-1.pdf b/lecture_material/15-regex/15-regex.pdf similarity index 100% rename from lecture_material/14-web-4-and-regex-1/14-web-4-and-regex-1.pdf rename to lecture_material/15-regex/15-regex.pdf diff --git a/lecture_material/14-web-4-and-regex-1/14-web-4-and-regex-1.pptx b/lecture_material/15-regex/15-regex.pptx similarity index 100% rename from lecture_material/14-web-4-and-regex-1/14-web-4-and-regex-1.pptx rename to lecture_material/15-regex/15-regex.pptx diff --git a/lecture_material/14-web-4-and-regex-1/regex_1.ipynb b/lecture_material/15-regex/regex.ipynb similarity index 92% rename from lecture_material/14-web-4-and-regex-1/regex_1.ipynb rename to lecture_material/15-regex/regex.ipynb index 5b836e41a9c474fce19fae01502363065f70dc31..34105575ab8e8e2c41b106e14432b20ec24d21df 100644 --- a/lecture_material/14-web-4-and-regex-1/regex_1.ipynb +++ b/lecture_material/15-regex/regex.ipynb @@ -10,7 +10,7 @@ "id": "e60c1c48", "metadata": {}, "source": [ - "# Regex 1\n", + "# Regex\n", "\n", "## Reading\n", "\n", @@ -128,7 +128,9 @@ "outputs": [], "source": [ "#import statements\n", - "import re" + "import re\n", + "from subprocess import check_output\n", + "import pandas as pd" ] }, { @@ -1404,6 +1406,1188 @@ "source": [ "In CS <b>320</b>, there are <b>40</b> lectures, <b>10</b> quizzes, <b>3</b> exams, <b>6</b> projects, and <b>1000</b> things to learn. CS <b>320</b> is awesome!" ] + }, + { + "cell_type": "markdown", + "id": "d53219fc-977e-41af-b6c6-c7ab7170ee13", + "metadata": {}, + "source": [ + "### Git log example" + ] + }, + { + "cell_type": "markdown", + "id": "aaf31f0b-b3d4-40a3-a597-55f18c1d9373", + "metadata": {}, + "source": [ + "#### Run `git log` as a shell command" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "23004baa-f037-4494-af50-de321f149f44", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mcommit d53c732e0da3ad0cd625c25e02703face75511ee\u001b[m\u001b[33m (\u001b[m\u001b[1;36mHEAD -> \u001b[m\u001b[1;32mmain\u001b[m\u001b[33m, \u001b[m\u001b[1;31morigin/main\u001b[m\u001b[33m, \u001b[m\u001b[1;31morigin/HEAD\u001b[m\u001b[33m)\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Mon Mar 18 20:43:56 2024 -0500\n", + "\n", + " zip folders added to lec17\n", + "\n", + "\u001b[33mcommit 0eba23b48835a617b53191de5aa8f2709517bb7a\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Mon Mar 18 20:36:44 2024 -0500\n", + "\n", + " lec16 and lec17 updated\n", + "\n", + "\u001b[33mcommit 844963cd59c769ef379b0272140b554aee6d9e60\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Thu Mar 14 13:00:40 2024 -0500\n", + "\n", + " lab11\n", + "\n", + "\u001b[33mcommit 31882131674f49276c66a6764113d5347721f239\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Thu Mar 14 06:39:42 2024 -0500\n", + "\n", + " number of lectures updated in regex_2.ipynb\n", + "\n", + "\u001b[33mcommit 08b3243a9ed2a67cdcfcbbb080910f8c86716712\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Wed Mar 13 10:05:24 2024 -0500\n", + "\n", + " lec14&15 updatd\n", + "\n", + "\u001b[33mcommit f5b5fff2409b01c2e12f3a7baae131e9a23d964b\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Wed Mar 13 09:39:09 2024 -0500\n", + "\n", + " regex_2_lec_002 renamed as regex_1_lec_002\n", + "\n", + "\u001b[33mcommit e017cc00d97679786d97c469733117c016e3ba9b\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Wed Mar 13 09:34:43 2024 -0500\n", + "\n", + " web_4_lec_002 added\n", + "\n", + "\u001b[33mcommit 7897fec9ecf701e61b2ed2713fabe72e726ffd7c\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Tue Mar 12 05:51:48 2024 -0500\n", + "\n", + " lec14 and lec15 added\n", + "\n", + "\u001b[33mcommit c13cfb21e69230b393ef2051027d029322c12cac\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Mon Mar 11 10:36:36 2024 -0500\n", + "\n", + " Update file README.md\n", + "\n", + "\u001b[33mcommit 95bfde13c281a6eb28f0cd98f6dc393762732d4c\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sat Mar 9 18:45:08 2024 -0600\n", + "\n", + " lab10\n", + "\n", + "\u001b[33mcommit 90111df9e72309597c67da15bb61de78bff126e7\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sat Mar 9 18:43:47 2024 -0600\n", + "\n", + " lab9\n", + "\n", + "\u001b[33mcommit c401ade096b0dd3e1178f46d067b2fbb98d499f3\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sat Mar 9 18:43:20 2024 -0600\n", + "\n", + " Update 2 files\n", + " \n", + " - /Labs/Lab9/EDGAR.md\n", + " - /Labs/Lab9/README.md\n", + "\n", + "\u001b[33mcommit c99c65b5efdaa91f8fce8fadffc41c4747e0a3a0\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Thu Mar 7 08:36:02 2024 -0600\n", + "\n", + " lec13 updated\n", + "\n", + "\u001b[33mcommit df5877233cc57005e3003c19c0dbf89aafc53804\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Thu Mar 7 07:02:16 2024 -0600\n", + "\n", + " lec12 & 13 updated\n", + "\n", + "\u001b[33mcommit 26470de563bbe3bc65a73718aee1988ba64e8601\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Tue Mar 5 05:22:26 2024 -0600\n", + "\n", + " lec12 and 13: ipynb files added\n", + "\n", + "\u001b[33mcommit 406a4231cd865f81653eb97f7d5f61136360f9e5\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sun Mar 3 17:41:42 2024 -0600\n", + "\n", + " Update file README.md\n", + "\n", + "\u001b[33mcommit 64745e91caeed8149cb33be037b795f88b4ddd39\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Sun Mar 3 05:03:29 2024 -0600\n", + "\n", + " lec12 and lec13 updated\n", + "\n", + "\u001b[33mcommit 42568d125bb87a10c178ec1d4640e467f2d826b0\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Wed Feb 28 22:31:11 2024 -0600\n", + "\n", + " lab8\n", + "\n", + "\u001b[33mcommit e00ff203a601993ec07c23420ff223707228e820\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Wed Feb 28 22:26:10 2024 -0600\n", + "\n", + " lab6\n", + "\n", + "\u001b[33mcommit 408cff34905add795fb9cce796913372b184a60b\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Wed Feb 28 22:25:47 2024 -0600\n", + "\n", + " add lab7\n", + "\n", + "\u001b[33mcommit dd5c119d6d72ee039a3eb10608d2f57f56933d72\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Wed Feb 28 22:22:15 2024 -0600\n", + "\n", + " add lab6\n", + "\n", + "\u001b[33mcommit 27ce693c05fe8beae4bd2b15715454c6c3c3f3f7\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Tue Feb 27 00:24:06 2024 -0600\n", + "\n", + " lec11 updated\n", + "\n", + "\u001b[33mcommit 8fda6507879ad4a220675d7af1dcc4f2dc6eb8cb\u001b[m\n", + "Author: gsingh58 <gurmail-singh@cs.wisc.edu>\n", + "Date: Thu Feb 22 16:57:25 2024 -0600\n", + "\n", + " redundant html files deleted\n", + "\n", + "\u001b[33mcommit 3693ad5b1f9ff5e2ed5af15baac3579715d26c9d\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Thu Feb 22 03:45:27 2024 -0600\n", + "\n", + " lec10 updated\n", + "\n", + "\u001b[33mcommit 6b7ac80ca513020f4ac43898001896aeec597179\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Thu Feb 22 01:48:04 2024 -0600\n", + "\n", + " lec9 updated again\n", + "\n", + "\u001b[33mcommit 3c0d23f6d2568da03b4ad8032e912cf2899af829\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Wed Feb 21 15:07:18 2024 -0600\n", + "\n", + " add screenshot req\n", + "\n", + "\u001b[33mcommit 5ca95af225907454cf9ffc50d6fadd2dae8838a1\u001b[m\n", + "Author: gsingh58 <gurmail-singh@cs.wisc.edu>\n", + "Date: Tue Feb 20 13:36:44 2024 -0600\n", + "\n", + " lec9 solution updated\n", + "\n", + "\u001b[33mcommit eb9f52e5c56b09918c76c25d431fae6e4f0aca59\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Tue Feb 20 09:32:16 2024 -0600\n", + "\n", + " add lab4 req\n", + "\n", + "\u001b[33mcommit 5caf51ade3733e981b801edd007155334dd70a29\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Tue Feb 20 07:37:59 2024 -0600\n", + "\n", + " Lec9 updated HTML removed\n", + "\n", + "\u001b[33mcommit e8915e7e2c1daef12cf968949c7641afa11c0758\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Tue Feb 20 07:33:57 2024 -0600\n", + "\n", + " lec9 updated\n", + "\n", + "\u001b[33mcommit 15b3e1d7c9ea971f161b2fb4df4f253a45588fe8\u001b[m\n", + "Author: gsingh58 <gurmail-singh@cs.wisc.edu>\n", + "Date: Thu Feb 15 13:45:20 2024 -0600\n", + "\n", + " a starter.ipynb file deleted\n", + "\n", + "\u001b[33mcommit 13ab0a06e5fa6ad7918b3564ab92d8e4a1914be1\u001b[m\n", + "Author: gsingh58 <gurmail-singh@cs.wisc.edu>\n", + "Date: Thu Feb 15 13:44:18 2024 -0600\n", + "\n", + " lec8 slides added\n", + "\n", + "\u001b[33mcommit faf0945797857eff185318c4146fdd00b57acd87\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Wed Feb 14 15:37:26 2024 -0600\n", + "\n", + " add lab5\n", + "\n", + "\u001b[33mcommit 19cd94a871641d13e77c58b03bfa50b53645473d\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Tue Feb 13 10:35:16 2024 -0600\n", + "\n", + " add exams\n", + "\n", + "\u001b[33mcommit 960fccecbdc787ecbe01fbe805e2d58255a47e35\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Tue Feb 13 03:49:40 2024 -0600\n", + "\n", + " Add new directory\n", + "\n", + "\u001b[33mcommit aa18ba4a4daa4c07a794b95aa647d33cbc4b21fc\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Tue Feb 13 03:48:21 2024 -0600\n", + "\n", + " Add new directory\n", + "\n", + "\u001b[33mcommit bf9845e5336eb0db7f309b55665c7bc46035bbf4\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Tue Feb 13 03:48:08 2024 -0600\n", + "\n", + " Add new directory\n", + "\n", + "\u001b[33mcommit 64fd3ae4f7677738f7fa88f75434674fb5b673fb\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Tue Feb 13 03:47:08 2024 -0600\n", + "\n", + " Add new directory\n", + "\n", + "\u001b[33mcommit 7daae2c37e007f641988e96e8252281a926089dd\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Tue Feb 13 03:40:01 2024 -0600\n", + "\n", + " worksheet add\n", + "\n", + "\u001b[33mcommit 489b6fba8db75a0b2ca55af0e774d21bdcf53447\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Tue Feb 13 03:23:22 2024 -0600\n", + "\n", + " readings moved\n", + "\n", + "\u001b[33mcommit 84ec381168bf2faefc4013883ecad4041ca97dd7\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Tue Feb 13 02:45:35 2024 -0600\n", + "\n", + " lec7 and 8 updated\n", + "\n", + "\u001b[33mcommit 4eb6f5c315386e467751a7636dbfc3ecce49aa40\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Thu Feb 8 14:19:12 2024 -0600\n", + "\n", + " add lab4\n", + "\n", + "\u001b[33mcommit ffce38e0a69d68e43186e37a74f9c5db7838d57a\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Thu Feb 8 08:14:20 2024 -0600\n", + "\n", + " lec6 updated\n", + "\n", + "\u001b[33mcommit 0436dacdd2a05927ee0cdc6b4f21c6af8bfd0f7f\u001b[m\n", + "Author: gsingh58 <gurmail-singh@cs.wisc.edu>\n", + "Date: Tue Feb 6 16:21:28 2024 -0600\n", + "\n", + " in_class_demo_lec2\n", + "\n", + "\u001b[33mcommit a5926f4b1152a2d1851a0c0d46e10337ec2e3307\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Tue Feb 6 13:20:11 2024 -0600\n", + "\n", + " update lab2\n", + "\n", + "\u001b[33mcommit e46ee7eb2210f5f4dceae060fe876f65bfaf9291\u001b[m\n", + "Author: gsingh58 <gurmail-singh@cs.wisc.edu>\n", + "Date: Tue Feb 6 11:23:25 2024 -0600\n", + "\n", + " wi.zip uploaded\n", + "\n", + "\u001b[33mcommit a58c5160b1df3fde7bb3a61efe0fb488d48a4f1e\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Tue Feb 6 08:16:41 2024 -0600\n", + "\n", + " lec5 notes updated\n", + "\n", + "\u001b[33mcommit aa58287550760afedc5d42c6d6a7418e256fc216\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Tue Feb 6 07:30:46 2024 -0600\n", + "\n", + " lec5 slides updated\n", + "\n", + "\u001b[33mcommit 775fe85700e8ea29573f379b762337214b0672a1\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Mon Feb 5 17:17:27 2024 -0600\n", + "\n", + " update lab2\n", + "\n", + "\u001b[33mcommit 513506b35bef83e8c9bdfd9101f3dd7e492f6630\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sun Feb 4 19:34:38 2024 -0600\n", + "\n", + " add lab3\n", + "\n", + "\u001b[33mcommit a171f55cba1a8ff24be8235638b36394974f5f4e\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sun Feb 4 19:30:18 2024 -0600\n", + "\n", + " Update file README.md\n", + "\n", + "\u001b[33mcommit c205681624d12c40cda836a9aa7bfecdfb9e1b5e\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sun Feb 4 19:29:13 2024 -0600\n", + "\n", + " Update file README.md\n", + "\n", + "\u001b[33mcommit 1961c8e880f7b8bea9fe276fe5cfeaf070c8fda3\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sun Feb 4 19:27:30 2024 -0600\n", + "\n", + " update lab2\n", + "\n", + "\u001b[33mcommit d6f156b6680afce23e971fa02c7550b7ed1c8464\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sun Feb 4 19:27:00 2024 -0600\n", + "\n", + " update lab2\n", + "\n", + "\u001b[33mcommit 9933dd1f67182d29af40d794604474a942ac8d05\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sun Feb 4 19:13:10 2024 -0600\n", + "\n", + " add lab2\n", + "\n", + "\u001b[33mcommit ad8795c898efc2630a667e90db4fb3b198a1f281\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Sun Feb 4 07:52:55 2024 -0600\n", + "\n", + " redundant repos deleted\n", + "\n", + "\u001b[33mcommit 0c4c4e75d8efdcb5aaf5d0ef39e27b7dc7d3baf1\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Thu Feb 1 07:17:47 2024 -0600\n", + "\n", + " lec4 updated\n", + "\n", + "\u001b[33mcommit 78d0d8c28355c33713d8dc3a9cb271c9dc13b9fb\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Thu Feb 1 07:16:55 2024 -0600\n", + "\n", + " few more files added\n", + "\n", + "\u001b[33mcommit e15b0eae22fc127b87d155207ccf93687075429b\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Wed Jan 31 09:35:12 2024 -0600\n", + "\n", + " update lab1\n", + "\n", + "\u001b[33mcommit f6bf4ed07310ec20bdc81b07a7b84e931abc1d52\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Tue Jan 30 10:03:13 2024 -0600\n", + "\n", + " Update lab1.md\n", + "\n", + "\u001b[33mcommit 6ab88b7c35171871a9824fbde72b4886990744fc\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Tue Jan 30 08:18:02 2024 -0600\n", + "\n", + " a typo removed\n", + "\n", + "\u001b[33mcommit eb8dbdafb79b044801352602a23cce59e0cf2f5b\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Tue Jan 30 07:55:27 2024 -0600\n", + "\n", + " slides name changed\n", + "\n", + "\u001b[33mcommit acaeff6953d1faf55deac9ec23605368d8423407\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Tue Jan 30 07:45:22 2024 -0600\n", + "\n", + " html file added\n", + "\n", + "\u001b[33mcommit d8ef60dc6f2e014366c4878c4811019b2704da84\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Tue Jan 30 07:37:33 2024 -0600\n", + "\n", + " lec3 updated\n", + "\n", + "\u001b[33mcommit d7f3849918a34536c532fa2dbad09199dd06ed15\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sat Jan 27 17:26:22 2024 -0600\n", + "\n", + " add lab1\n", + "\n", + "\u001b[33mcommit 845fad19e5e68b7ba37b8d9814d0d7391d28475d\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sat Jan 27 17:24:03 2024 -0600\n", + "\n", + " add lab1\n", + "\n", + "\u001b[33mcommit 3e1e4395110c0bd26b3f8cd638cd2987585e84db\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sat Jan 27 17:23:06 2024 -0600\n", + "\n", + " add lab1\n", + "\n", + "\u001b[33mcommit 34bcc1d6d1451a19515690ee2ccdbfd2b2bfd9c5\u001b[m\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sat Jan 27 17:22:04 2024 -0600\n", + "\n", + " add lab1\n", + "\n", + "\u001b[33mcommit 8199778a4ca97efeac99f1a091f2f2038d3eabd2\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Fri Jan 26 21:33:46 2024 -0600\n", + "\n", + " Add new directory\n", + "\n", + "\u001b[33mcommit 75866c80ee6a563bc31cd2182bdafd7371d01f62\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Fri Jan 26 21:33:36 2024 -0600\n", + "\n", + " Add new directory\n", + "\n", + "\u001b[33mcommit 8cd6029682fbe3253e1b045dc5c2523f99946e7e\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Fri Jan 26 21:33:26 2024 -0600\n", + "\n", + " Add new directory\n", + "\n", + "\u001b[33mcommit 99b66918e05eef2a193fee51f6159bf1268d361b\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Fri Jan 26 21:33:17 2024 -0600\n", + "\n", + " Add new directory\n", + "\n", + "\u001b[33mcommit 63c27755fdb1c938a8c55a9152970cba77f08ff9\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Fri Jan 26 21:27:17 2024 -0600\n", + "\n", + " Add new directory\n", + "\n", + "\u001b[33mcommit 5ab60fd384fd1d3934b864d8d74358491c4c1cbc\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Fri Jan 26 21:26:12 2024 -0600\n", + "\n", + " Add new directory\n", + "\n", + "\u001b[33mcommit 66e8e7233ecfd7e2493c07c5ff8e6006d20bb24c\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Fri Jan 26 21:15:22 2024 -0600\n", + "\n", + " Add new directory\n", + "\n", + "\u001b[33mcommit ad08df7aa2e0bc2396dfe580c76f8b386b0248ff\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Fri Jan 26 20:25:44 2024 -0600\n", + "\n", + " Add new directory\n", + "\n", + "\u001b[33mcommit 86f9157696f95c31e4d914b4e89fb5974f537e87\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Fri Jan 26 20:25:29 2024 -0600\n", + "\n", + " Add new directory\n", + "\n", + "\u001b[33mcommit fe4faf3439ab3b8caab1748762d83891187f54e9\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Fri Jan 26 20:14:28 2024 -0600\n", + "\n", + " Add new directory\n", + "\n", + "\u001b[33mcommit bfb93b54fb43fc18d6dacd22865416f32027c0bb\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Fri Jan 26 20:14:06 2024 -0600\n", + "\n", + " Add new directory\n", + "\n", + "\u001b[33mcommit ed7d967ede5422f9f3e2eeba85a105ffa2d03db0\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Thu Jan 25 08:10:04 2024 -0600\n", + "\n", + " lec2 py file added\n", + "\n", + "\u001b[33mcommit 81f38731851c3b844e0d4855f3ccfbb259579d2f\u001b[m\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Thu Jan 25 07:38:47 2024 -0600\n", + "\n", + " lec2 update\n", + "\n", + "\u001b[33mcommit 92279f04219c9d9fdcf504f10e3cb7f41b9a9c3a\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Tue Jan 23 04:23:37 2024 -0600\n", + "\n", + " Configure SAST in `.gitlab-ci.yml`, creating this file if it does not already exist\n", + "\n", + "\u001b[33mcommit 4721ddd9ae732b4ca058962aa3df2eb1614f45b0\u001b[m\n", + "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", + "Date: Tue Jan 23 04:23:36 2024 -0600\n", + "\n", + " Initial commit\n" + ] + } + ], + "source": [ + "!git log" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "927369bc-21f0-4e72-9133-36df6f0563dc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "commit d53c732e0da3ad0cd625c25e02703face75511ee\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Mon Mar 18 20:43:56 2024 -0500\n", + "\n", + " zip folders added to lec17\n", + "\n", + "commit 0eba23b48835a617b53191de5aa8f2709517bb7a\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Mon Mar 18 20:36:44 2024 -0500\n", + "\n", + " lec16 and lec17 updated\n", + "\n", + "commit 844963cd59c769ef379b0272140b554aee6d9e60\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Thu Mar 14 13:00:40 2024 -0500\n", + "\n", + " lab11\n", + "\n", + "commit 31882131674f49276c66a6764113d53\n" + ] + } + ], + "source": [ + "git_log_output = str(check_output([\"git\", \"log\"]), encoding=\"utf-8\")\n", + "print(git_log_output[:500])" + ] + }, + { + "cell_type": "markdown", + "id": "a2bbc9ca-ceb7-42ec-9c26-a2852e403787", + "metadata": {}, + "source": [ + "#### GOAL: find all the commit numbers" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "e02fd638-6aa4-46ce-88a9-c2cc7445e7de", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['d53c732e0da3ad0cd625c25e02703face75511ee',\n", + " '0eba23b48835a617b53191de5aa8f2709517bb7a',\n", + " '844963cd59c769ef379b0272140b554aee6d9e60',\n", + " '31882131674f49276c66a6764113d5347721f239',\n", + " '08b3243a9ed2a67cdcfcbbb080910f8c86716712',\n", + " 'f5b5fff2409b01c2e12f3a7baae131e9a23d964b',\n", + " 'e017cc00d97679786d97c469733117c016e3ba9b',\n", + " '7897fec9ecf701e61b2ed2713fabe72e726ffd7c',\n", + " 'c13cfb21e69230b393ef2051027d029322c12cac',\n", + " '95bfde13c281a6eb28f0cd98f6dc393762732d4c']" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "commits = re.findall(r\"[0-9a-f]{40}\", git_log_output)\n", + "# recent 10 commit numbers\n", + "commits[:10]" + ] + }, + { + "cell_type": "markdown", + "id": "08155a24-8193-4ed4-b15b-92a50da7dc61", + "metadata": {}, + "source": [ + "#### What days of the week does the team push things into this repo?" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "0bfc004b-7ed4-4396-976b-ec9aa4376320", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "commit d53c732e0da3ad0cd625c25e02703face75511ee\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Mon Mar 18 20:43:56 2024 -0500\n", + "\n", + " zip folders added to lec17\n", + "\n", + "commit 0eba23b48835a617b53191de5aa8f2709517bb7a\n", + "Author: gsingh58 <gurmail-singh@wisc.edu>\n", + "Date: Mon Mar 18 20:36:44 2024 -0500\n", + "\n", + " lec16 and lec17 updated\n", + "\n", + "commit 844963cd59c769ef379b0272140b554aee6d9e60\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Thu Mar 14 13:00:40 2024 -0500\n", + "\n", + " lab11\n", + "\n", + "commit 31882131674f49276c66a6764113d53\n" + ] + } + ], + "source": [ + "print(git_log_output[:500])" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "b3ce06a0-d6b6-43d2-aabe-66e0adf831d9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Mon',\n", + " 'Mon',\n", + " 'Thu',\n", + " 'Thu',\n", + " 'Wed',\n", + " 'Wed',\n", + " 'Wed',\n", + " 'Tue',\n", + " 'Mon',\n", + " 'Sat',\n", + " 'Sat',\n", + " 'Sat',\n", + " 'Thu',\n", + " 'Thu',\n", + " 'Tue',\n", + " 'Sun',\n", + " 'Sun',\n", + " 'Wed',\n", + " 'Wed',\n", + " 'Wed',\n", + " 'Wed',\n", + " 'Tue',\n", + " 'Thu',\n", + " 'Thu',\n", + " 'Thu',\n", + " 'Wed',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Thu',\n", + " 'Thu',\n", + " 'Wed',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Thu',\n", + " 'Thu',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Mon',\n", + " 'Sun',\n", + " 'Sun',\n", + " 'Sun',\n", + " 'Sun',\n", + " 'Sun',\n", + " 'Sun',\n", + " 'Sun',\n", + " 'Thu',\n", + " 'Thu',\n", + " 'Wed',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Tue',\n", + " 'Sat',\n", + " 'Sat',\n", + " 'Sat',\n", + " 'Sat',\n", + " 'Fri',\n", + " 'Fri',\n", + " 'Fri',\n", + " 'Fri',\n", + " 'Fri',\n", + " 'Fri',\n", + " 'Fri',\n", + " 'Fri',\n", + " 'Fri',\n", + " 'Fri',\n", + " 'Fri',\n", + " 'Thu',\n", + " 'Thu',\n", + " 'Tue',\n", + " 'Tue']" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "days = re.findall(r\"Date:\\s+(\\w+)\", git_log_output)\n", + "days" + ] + }, + { + "cell_type": "markdown", + "id": "9b4edce3-9b91-4acb-8408-a8d1e6c350a3", + "metadata": {}, + "source": [ + "#### Count unique days" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "8fd158b5-dfbb-4ede-8c6d-7c61427f9bf7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Tue 27\n", + "Thu 15\n", + "Fri 11\n", + "Wed 10\n", + "Sun 9\n", + "Sat 7\n", + "Mon 4\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "day_counts = pd.Series(days).value_counts()\n", + "day_counts" + ] + }, + { + "cell_type": "markdown", + "id": "de22ffd2-0c5c-4040-979a-f44011055f48", + "metadata": {}, + "source": [ + "#### Sort by day of the week" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "70e98a92-02e6-489e-9406-2404755eed40", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Mon 4\n", + "Tue 27\n", + "Wed 10\n", + "Thu 15\n", + "Fri 11\n", + "Sun 9\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sorted_day_counts = day_counts.loc[[\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sun\"]]\n", + "sorted_day_counts" + ] + }, + { + "cell_type": "markdown", + "id": "d2fccecd-7e1a-446c-86ce-368427825fd0", + "metadata": {}, + "source": [ + "#### Create a bar plot" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "0ec72c6e-59ed-4b26-8b33-5690c27088dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 0, 'Days of the week')" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjIAAAHBCAYAAABzIlFzAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAxPklEQVR4nO3deVhWdeL//9eNKOICiguLkuCS+1apuaTgjpqZTmM2TS7llJmlppZtqJlo06jVOFo2ajUtk1nZJp9yAff8qqlZSmo4UeKSCCgkoJzfH/28r+5Y5NYbzv3G5+O6znV5lvu+X5y5J16c8z7nOCzLsgQAAGAgH7sDAAAAXCmKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWL52Byht+fn5OnbsmKpXry6Hw2F3HAAAUAKWZens2bMKCwuTj0/Rx13KfZE5duyYwsPD7Y4BAACuQEpKiurXr1/k+nJfZKpXry7ptx0REBBgcxoAAFASmZmZCg8Pd/4eL0q5LzKXTicFBARQZAAAMMzlhoUw2BcAABiLIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWL52B8C1LeLxz+yOUCJH5w60OwIAoBAckQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxKDIAAMBYFBkAAGAsW4tMXFycOnTooOrVq6tu3boaMmSIkpKSXLaJioqSw+FwmR544AGbEgMAAG9ia5FJTEzU+PHjtX37dn355ZfKy8tT3759lZWV5bLd2LFjlZqa6pyef/55mxIDAABv4mvnh8fHx7vMr1ixQnXr1tWuXbvUvXt35/IqVaooJCSkrOMBAAAv51VjZDIyMiRJQUFBLsvfeust1a5dW61atdL06dOVnZ1d5Hvk5OQoMzPTZQIAAOWTrUdkfi8/P18TJ05U165d1apVK+fyu+66Sw0aNFBYWJj27dunxx57TElJSfrggw8KfZ+4uDjNnDmzrGIDAAAbOSzLsuwOIUnjxo3TmjVrtHnzZtWvX7/I7davX69evXrp8OHDatSoUYH1OTk5ysnJcc5nZmYqPDxcGRkZCggIKJXsuHIRj39md4QSOTp3oN0RAOCakpmZqcDAwMv+/vaKIzIPPfSQPv30U23cuLHYEiNJnTp1kqQii4yfn5/8/PxKJScAAPAuthYZy7I0YcIEffjhh0pISFBkZORlX7Nnzx5JUmhoaCmnAwAA3s7WIjN+/Hi9/fbbWr16tapXr67jx49LkgIDA+Xv768jR47o7bff1oABA1SrVi3t27dPkyZNUvfu3dWmTRs7owMAAC9ga5FZvHixpN9uevd7y5cv16hRo1SpUiWtXbtWCxcuVFZWlsLDwzVs2DA99dRTNqQFAADexvZTS8UJDw9XYmJiGaUBAACm8ar7yAAAALiDIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxKDIAAMBYFBkAAGAsigwAADAWRQYAABiLIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxKDIAAMBYFBkAAGAsigwAADAWRQYAABiLIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFi2Fpm4uDh16NBB1atXV926dTVkyBAlJSW5bHP+/HmNHz9etWrVUrVq1TRs2DCdOHHCpsQAAMCb2FpkEhMTNX78eG3fvl1ffvml8vLy1LdvX2VlZTm3mTRpkj755BOtXLlSiYmJOnbsmIYOHWpjagAA4C187fzw+Ph4l/kVK1aobt262rVrl7p3766MjAz9+9//1ttvv62ePXtKkpYvX67mzZtr+/btuvnmm+2IDQAAvIRXjZHJyMiQJAUFBUmSdu3apby8PPXu3du5TbNmzXTddddp27ZttmQEAADew9YjMr+Xn5+viRMnqmvXrmrVqpUk6fjx46pUqZJq1Kjhsm1wcLCOHz9e6Pvk5OQoJyfHOZ+ZmVlqmQEAgL285ojM+PHjtX//fr377rtX9T5xcXEKDAx0TuHh4R5KCAAAvI1XFJmHHnpIn376qTZs2KD69es7l4eEhCg3N1fp6eku2584cUIhISGFvtf06dOVkZHhnFJSUkozOgAAsJGtRcayLD300EP68MMPtX79ekVGRrqsv/HGG1WxYkWtW7fOuSwpKUk//vijOnfuXOh7+vn5KSAgwGUCAADlk61jZMaPH6+3335bq1evVvXq1Z3jXgIDA+Xv76/AwEDde++9mjx5soKCghQQEKAJEyaoc+fOXLEEAADsLTKLFy+WJEVFRbksX758uUaNGiVJWrBggXx8fDRs2DDl5OSoX79++te//lXGSQEAgDeytchYlnXZbSpXrqxFixZp0aJFZZAIAACYxCsG+wIAAFwJigwAADAWRQYAABiLIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwlttFJiUlRT/99JNzfseOHZo4caJeffVVjwYDAAC4HLeLzF133aUNGzZIko4fP64+ffpox44devLJJzVr1iyPBwQAACiK20Vm//796tixoyTpvffeU6tWrbR161a99dZbWrFihafzAQAAFMntIpOXlyc/Pz9J0tq1azV48GBJUrNmzZSamurZdAAAAMVwu8i0bNlSS5Ys0aZNm/Tll1+qf//+kqRjx46pVq1aHg8IAABQFLeLzLx58/TKK68oKipKI0aMUNu2bSVJH3/8sfOUEwAAQFnwdfcFUVFR+uWXX5SZmamaNWs6l//tb39T1apVPRoOAACgOG4fkenZs6fOnj3rUmIkKSgoSMOHD/dYMAAAgMtxu8gkJCQoNze3wPLz589r06ZNHgkFAABQEiU+tbRv3z7nv7/77jsdP37cOX/x4kXFx8erXr16nk0HAABQjBIXmXbt2snhcMjhcKhnz54F1vv7++vll1/2aDgAAIDilLjIJCcny7IsNWzYUDt27FCdOnWc6ypVqqS6deuqQoUKpRISAACgMCUuMg0aNJAk5efnl1oYAAAAd7h9+bUkHTp0SBs2bNDJkycLFJtnnnnGI8EAAAAux+0is3TpUo0bN061a9dWSEiIHA6Hc53D4aDIAACAMuN2kZk9e7aee+45PfbYY6WRBwAAoMTcvo/MmTNndMcdd5RGFgAAALe4XWTuuOMOffHFF6WRBQAAwC1un1pq3Lixnn76aW3fvl2tW7dWxYoVXdY//PDDHgsHAABQHIdlWZY7L4iMjCz6zRwO/fDDD1cdypMyMzMVGBiojIwMBQQE2B0HfxDx+Gd2RyiRo3MH2h0BAK4pJf397fYRmeTk5KsKBgAA4Cluj5EBAADwFm4fkRkzZkyx65ctW3bFYQAAANzhdpE5c+aMy3xeXp7279+v9PT0Qh8mCQAAUFrcLjIffvhhgWX5+fkaN26cGjVq5JFQAAAAJeGRMTI+Pj6aPHmyFixY4Im3AwAAKBGPDfY9cuSILly44Km3AwAAuCy3Ty1NnjzZZd6yLKWmpuqzzz7TyJEjPRYMAADgctwuMl9//bXLvI+Pj+rUqaN//OMfl72iCQAAwJPcLjIbNmwojRwAAABuc7vIXHLq1CklJSVJkpo2bao6dep4LBQAAEBJuD3YNysrS2PGjFFoaKi6d++u7t27KywsTPfee6+ys7NLIyMAAECh3C4ykydPVmJioj755BOlp6crPT1dq1evVmJioh599NHSyAgAAFAot08trVq1Su+//76ioqKcywYMGCB/f3/9+c9/1uLFiz2ZDwAAoEhuH5HJzs5WcHBwgeV169bl1BIAAChTbheZzp07KzY2VufPn3cu+/XXXzVz5kx17tzZo+EAAACK4/appRdffFH9+vVT/fr11bZtW0nS3r17VblyZf3f//2fxwMCAAAUxe0jMq1atdKhQ4cUFxendu3aqV27dpo7d64OHTqkli1buvVeGzdu1K233qqwsDA5HA599NFHLutHjRolh8PhMvXv39/dyAAAoJy6ovvIVKlSRWPHjr3qD8/KylLbtm01ZswYDR06tNBt+vfvr+XLlzvn/fz8rvpzAQBA+eB2kYmLi1NwcHCBxxEsW7ZMp06d0mOPPVbi94qJiVFMTEyx2/j5+SkkJMTdmAAA4Brg9qmlV155Rc2aNSuwvGXLllqyZIlHQv1eQkKC6tatq6ZNm2rcuHE6ffp0sdvn5OQoMzPTZQIAAOWT20dkjh8/rtDQ0ALL69Spo9TUVI+EuqR///4aOnSoIiMjdeTIET3xxBOKiYnRtm3bVKFChUJfExcXp5kzZ3o0B4BrS8Tjn9kd4bKOzh1odwTAK7hdZMLDw7VlyxZFRka6LN+yZYvCwsI8FkyS7rzzTue/W7durTZt2qhRo0ZKSEhQr169Cn3N9OnTNXnyZOd8ZmamwsPDPZoLAAB4B7eLzNixYzVx4kTl5eWpZ8+ekqR169Zp2rRppf6IgoYNG6p27do6fPhwkUXGz8+PAcEAAFwj3C4yU6dO1enTp/Xggw8qNzdXklS5cmU99thjmj59uscD/t5PP/2k06dPF3pqCwAAXHvcLjIOh0Pz5s3T008/rQMHDsjf319NmjS5oqMg586d0+HDh53zycnJ2rNnj4KCghQUFKSZM2dq2LBhCgkJ0ZEjRzRt2jQ1btxY/fr1c/uzAABA+XNF95GRpGrVqqlDhw5X9eE7d+5UdHS0c/7S2JaRI0dq8eLF2rdvn15//XWlp6crLCxMffv21bPPPsupIwAAIOkqiownREVFybKsItfzyAMAAFAct+8jAwAA4C0oMgAAwFhuF5mNGzfqwoULBZZfuHBBGzdu9EgoAACAknC7yERHRystLa3A8oyMDJeBuwAAAKXN7SJjWZYcDkeB5adPn1bVqlU9EgoAAKAkSnzV0tChQyX9dh+ZUaNGuVwCffHiRe3bt09dunTxfEIAAIAilLjIBAYGSvrtiEz16tXl7+/vXFepUiXdfPPNGjt2rOcTAgAAFKHERWb58uWSpIiICE2ZMoXTSAAAwHZu3xAvNja2NHIAAAC4rURF5oYbbtC6detUs2ZNtW/fvtDBvpfs3r3bY+EAAACKU6Iic9tttzkH9w4ZMqQ08wAAAJRYiYrM708ncWoJAAB4i6t6aOS5c+eUn5/vsiwgIOCqAgEAAJSU2zfES05O1sCBA1W1alUFBgaqZs2aqlmzpmrUqKGaNWuWRkYAAIBCuX1E5u6775ZlWVq2bJmCg4OLHfgLAABQmtwuMnv37tWuXbvUtGnT0sgDAABQYm6fWurQoYNSUlJKIwsAAIBb3D4i89prr+mBBx7Qzz//rFatWqlixYou69u0aeOxcAAAAMVxu8icOnVKR44c0ejRo53LHA6H86nYFy9e9GhAAACAorhdZMaMGaP27dvrnXfeYbAvAACwldtF5n//+58+/vhjNW7cuDTyAAAAlJjbg3179uypvXv3lkYWAAAAt7h9RObWW2/VpEmT9M0336h169YFBvsOHjzYY+EAAACK43aReeCBByRJs2bNKrCOwb4AAKAsuV1k/vhsJQAAALu4PUYGAADAW1zR06//3//7f9qwYYNOnjxZ4AjN/PnzPRIMAADgctwuMnPmzNFTTz2lpk2bFriPDPeUAQAAZcntIvPiiy9q2bJlGjVqVCnEAQAAKDm3x8j4+Pioa9eupZEFAADALW4XmUmTJmnRokWlkQUAAMAtbp9amjJligYOHKhGjRqpRYsWBW6I98EHH3gsHAAAQHHcLjIPP/ywNmzYoOjoaNWqVYsBvgAAwDZuF5nXX39dq1at0sCBA0sjDwAAQIm5PUYmKChIjRo1Ko0sAAAAbnG7yMyYMUOxsbHKzs4ujTwAAAAl5vappZdeeklHjhxRcHCwIiIiCgz23b17t8fCAQAAFMftIjNkyJBSiAEAAOA+t4tMbGxsaeQAAABw2xU9NFKSdu3apQMHDkiSWrZsqfbt23ssFAAAQEm4XWROnjypO++8UwkJCapRo4YkKT09XdHR0Xr33XdVp04dT2cEAAAolNtXLU2YMEFnz57Vt99+q7S0NKWlpWn//v3KzMzUww8/XBoZAQAACuX2EZn4+HitXbtWzZs3dy5r0aKFFi1apL59+3o0HAAAQHHcPiKTn59f4JJrSapYsaLy8/M9EgoAAKAk3C4yPXv21COPPKJjx445l/3888+aNGmSevXq5dFwAAAAxXH71NI///lPDR48WBEREQoPD5ckpaSkqFWrVvrPf/7j8YAAADNFPP6Z3RFK5Ohcnh1oMreLTHh4uHbv3q21a9fq4MGDkqTmzZurd+/eHg8HAABQnCu6j4zD4VCfPn3Up08fT+cBAAAosRKPkVm/fr1atGihzMzMAusyMjLUsmVLbdq0yaPhAAAAilPiIrNw4UKNHTtWAQEBBdYFBgbq/vvv1/z58z0aDgAAoDglLjJ79+5V//79i1zft29f7dq1yyOhAAAASqLERebEiROF3j/mEl9fX506dcojoQAAAEqixEWmXr162r9/f5Hr9+3bp9DQUI+EAgAAKIkSF5kBAwbo6aef1vnz5wus+/XXXxUbG6tBgwZ5NBwAAEBxSlxknnrqKaWlpen666/X888/r9WrV2v16tWaN2+emjZtqrS0ND355JNuffjGjRt16623KiwsTA6HQx999JHLesuy9Mwzzyg0NFT+/v7q3bu3Dh065NZnAACA8qvERSY4OFhbt25Vq1atNH36dN1+++26/fbb9cQTT6hVq1bavHmzgoOD3frwrKwstW3bVosWLSp0/fPPP6+XXnpJS5Ys0VdffaWqVauqX79+hR4VAgAA1x63bojXoEEDff755zpz5owOHz4sy7LUpEkT1axZ84o+PCYmRjExMYWusyxLCxcu1FNPPaXbbrtNkvTGG28oODhYH330ke68884r+kwAAFB+XNGdfWvWrKkOHTp4OouL5ORkHT9+3OXRB4GBgerUqZO2bdtWZJHJyclRTk6Oc76wG/gBAIDy4YqKTFk4fvy4JBU4XRUcHOxcV5i4uDjNnDmzVLMB3siEB/TxcD4AnlbiMTKmmD59ujIyMpxTSkqK3ZEAAEAp8doiExISIum3G/H93okTJ5zrCuPn56eAgACXCQAAlE9eW2QiIyMVEhKidevWOZdlZmbqq6++UufOnW1MBgAAvIWtY2TOnTunw4cPO+eTk5O1Z88eBQUF6brrrtPEiRM1e/ZsNWnSRJGRkXr66acVFhamIUOG2BcaAAB4DVuLzM6dOxUdHe2cnzx5siRp5MiRWrFihaZNm6asrCz97W9/U3p6urp166b4+HhVrlzZrsgAAMCL2FpkoqKiZFlWkesdDodmzZqlWbNmlWEqAABgCq8dIwMAAHA5FBkAAGAsigwAADAWRQYAABiLIgMAAIxFkQEAAMby2odGAgCA3/BQ2KJxRAYAABiLIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxKDIAAMBYFBkAAGAsigwAADAWRQYAABiLIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxKDIAAMBYFBkAAGAsry4yM2bMkMPhcJmaNWtmdywAAOAlfO0OcDktW7bU2rVrnfO+vl4fGQAAlBGvbwW+vr4KCQmxOwYAAPBCXn1qSZIOHTqksLAwNWzYUH/5y1/0448/Frt9Tk6OMjMzXSYAAFA+eXWR6dSpk1asWKH4+HgtXrxYycnJuuWWW3T27NkiXxMXF6fAwEDnFB4eXoaJAQBAWfLqIhMTE6M77rhDbdq0Ub9+/fT5558rPT1d7733XpGvmT59ujIyMpxTSkpKGSYGAABlyevHyPxejRo1dP311+vw4cNFbuPn5yc/P78yTAUAAOzi1Udk/ujcuXM6cuSIQkND7Y4CAAC8gFcXmSlTpigxMVFHjx7V1q1bdfvtt6tChQoaMWKE3dEAAIAX8OpTSz/99JNGjBih06dPq06dOurWrZu2b9+uOnXq2B0NAAB4Aa8uMu+++67dEQAAgBfz6lNLAAAAxaHIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwlldffu3NIh7/zO4Il3V07kC7IwAAUKo4IgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxKDIAAMBYFBkAAGAsigwAADAWRQYAABiLIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxKDIAAMBYFBkAAGAsigwAADCWEUVm0aJFioiIUOXKldWpUyft2LHD7kgAAMALeH2R+e9//6vJkycrNjZWu3fvVtu2bdWvXz+dPHnS7mgAAMBmXl9k5s+fr7Fjx2r06NFq0aKFlixZoipVqmjZsmV2RwMAADbztTtAcXJzc7Vr1y5Nnz7duczHx0e9e/fWtm3bCn1NTk6OcnJynPMZGRmSpMzMTI9my8/J9uj7lQZP/8ylwYT9KLEvPcWE/SixLz3FhP0osS89xdP78dL7WZZV/IaWF/v5558tSdbWrVtdlk+dOtXq2LFjoa+JjY21JDExMTExMTGVgyklJaXYruDVR2SuxPTp0zV58mTnfH5+vtLS0lSrVi05HA4bkxUtMzNT4eHhSklJUUBAgN1xjMa+9Bz2pWewHz2Hfek5JuxLy7J09uxZhYWFFbudVxeZ2rVrq0KFCjpx4oTL8hMnTigkJKTQ1/j5+cnPz89lWY0aNUorokcFBAR47RfKNOxLz2Ffegb70XPYl57j7fsyMDDwstt49WDfSpUq6cYbb9S6deucy/Lz87Vu3Tp17tzZxmQAAMAbePURGUmaPHmyRo4cqZtuukkdO3bUwoULlZWVpdGjR9sdDQAA2Mzri8zw4cN16tQpPfPMMzp+/LjatWun+Ph4BQcH2x3NY/z8/BQbG1vglBjcx770HPalZ7AfPYd96TnlaV86LOty1zUBAAB4J68eIwMAAFAcigwAADAWRQYAABiLIgMAAIxFkQEAAMby+suvgct58803tWTJEiUnJ2vbtm1q0KCBFi5cqMjISN122212x8M1Jjo6utjHoaxfv74M0wDlH0UGRlu8eLGeeeYZTZw4Uc8995wuXrwo6bfHUixcuJAicxnuPK3Wm29j7k3atWvnMp+Xl6c9e/Zo//79GjlypD2hgP9fbm6uTp48qfz8fJfl1113nU2Jrh73kfEC5fGLVVZatGihOXPmaMiQIapevbr27t2rhg0bav/+/YqKitIvv/xid0Sv5uPjU+KHqV4qibgyM2bM0Llz5/TCCy/YHcXrffzxx4qJiVHFihX18ccfF7vt4MGDyyiV2Q4dOqQxY8Zo69atLssty5LD4TD6/98ckbFRef5ilZXk5GS1b9++wHI/Pz9lZWXZkMgsGzZscP776NGjevzxxzVq1Cjns8y2bdum119/XXFxcXZFLDfuvvtudezYkSJTAkOGDNHx48dVt25dDRkypMjt+O9kyY0aNUq+vr769NNPFRoaWuI/YExAkbFRef5ilZXIyEjt2bNHDRo0cFkeHx+v5s2b25TKHD169HD+e9asWZo/f75GjBjhXDZ48GC1bt1ar776KqdFrtK2bdtUuXJlu2MY4fdHp/94pBpXZs+ePdq1a5eaNWtmdxSPo8jYqDx/scrK5MmTNX78eJ0/f16WZWnHjh165513FBcXp9dee83ueEbZtm2blixZUmD5TTfdpPvuu8+GRGYaOnSoy7xlWUpNTdXOnTv19NNP25TKTHl5eerfv7+WLFmiJk2a2B3HaC1atCi3p9opMjYqz1+ssnLffffJ399fTz31lLKzs3XXXXcpLCxML774ou6880674xklPDxcS5cu1fPPP++y/LXXXlN4eLhNqcwTGBjoMu/j46OmTZtq1qxZ6tu3r02pzFSxYkXt27fP7hjlwrx58zRt2jTNmTNHrVu3VsWKFV3WmzyYn8G+Nlq/fr2eeuqpcvnFskN2drbOnTununXr2h3FSJ9//rmGDRumxo0bq1OnTpKkHTt26NChQ1q1apUGDBhgc0JciyZNmiQ/Pz/NnTvX7ihG8/H57bZxfxzCUB7GZFJkbFSev1gwU0pKihYvXqyDBw9Kkpo3b64HHniAIzJXgKsRPWPChAl644031KRJE914442qWrWqy/r58+fblMwsiYmJxa7//Xg501BkbFSev1hlJTIysthB0j/88EMZpgGk77//Xvfeey9XI16lH374QREREerVq1eR2zgcDm4wCMbI2ImicvUmTpzoMp+Xl6evv/5a8fHxmjp1qj2hDLZp0ya98sor+uGHH7Ry5UrVq1dPb775piIjI9WtWze74xlh9OjRXI3oAU2aNFFqaqrzFgHDhw/XSy+9pODgYJuTmWnjxo3Fru/evXsZJfE8iozN0tPT9e9//1sHDhyQJLVs2VJjxowpMGAQhXvkkUcKXb5o0SLt3LmzjNOYbdWqVfrrX/+qv/zlL9q9e7dycnIkSRkZGZozZ44+//xzmxOagasRPeOPJwvWrFnDvaGuQlRUVIFlvy/ZJh8p5KGRNtq5c6caNWqkBQsWKC0tTWlpaZo/f74aNWqk3bt32x3PaDExMVq1apXdMYwye/ZsLVmyREuXLnUZeN61a1e+j27gasTSwSiIq3PmzBmX6eTJk4qPj1eHDh30xRdf2B3vqnBExkaTJk3S4MGDtXTpUvn6/vY/xYULF3Tfffdp4sSJlz0UiKK9//77CgoKsjuGUZKSkgo9vBwYGKj09PSyD2SQ3z+zqjxf5lqWHA5HgdNynKa7coUd5e/Tp48qVaqkyZMna9euXTak8gyKjI127tzpUmIkydfXV9OmTdNNN91kYzLvN2vWLD366KPq1q2by3/cLMvS8ePHderUKf3rX/+yMaF5QkJCdPjwYUVERLgs37x5sxo2bGhPKEPUqFGjwPfwj4NUGezrHsuyNGrUKPn5+UmSzp8/rwceeKDAVUsffPCBHfHKjeDgYCUlJdkd46pQZGwUEBCgH3/8scC59JSUFFWvXt2mVGaYOXOmHnjgAd12220uv0B8fHxUp04dRUVFMUbBTWPHjtUjjzyiZcuWyeFw6NixY9q2bZumTJnCHWkv4/fPrIJn/PGRGHfffbdNScqHP95Y8NIdp+fOnVvgie2m4fJrGz388MP68MMP9cILL6hLly6SpC1btmjq1KkaNmyYFi5caG9AL+bj4+N8qByuTnJysiIjI2VZlubMmaO4uDhlZ2dL+u3hm1OmTNGzzz5rc0rvN2vWLE2ZMkVVqlSxOwpQwKUn3f/xV/7NN9+sZcuWGf2HH0XGRrm5uZo6daqWLFmiCxcuyLIsVapUSePGjdPcuXOdh1RRkI+Pj06cOKE6derYHcV4Pj4+atCggaKjoxUdHa2oqCidPXtW586dU4sWLVStWjW7IxqhQoUKSk1NpVzDK/3vf/9zmb909Lo8PMiUIuMFsrOzdeTIEUlSo0aN+IuuBHx8fBQYGHjZwX9paWlllMhcCQkJzumrr75Sbm6uGjZsqJ49e6pnz56Kiori3h0lwFFCeKNt27bp9OnTGjRokHPZG2+8odjYWGVlZWnIkCF6+eWXjf7DmSJjgzFjxpRou2XLlpVyEnP5+Pho4cKFl73fzh/Ps6N458+f19atW53FZseOHcrLy1OzZs307bff2h3Pq3GUEN4oJiZGUVFReuyxxyRJ33zzjW644QaNGjVKzZs319///nfdf//9mjFjhr1BrwJFxgaXDuW3b9++2HsjfPjhh2WYyiz89Vu6cnNztWXLFq1Zs0avvPKKzp07x9U2l8FRQnij0NBQffLJJ84rYZ988kklJiZq8+bNkqSVK1cqNjZW3333nZ0xrwpXLdlg3Lhxeuedd5ScnKzRo0fr7rvv5p4nbuJ+Ep6Vm5ur7du3a8OGDc5TTOHh4erevbv++c9/8jiNEpo5cyZ35YZXOXPmjMup4cTERMXExDjnO3TooJSUFDuieQxHZGySk5OjDz74QMuWLdPWrVs1cOBA3Xvvverbty+/pEuAIzKe07NnT3311VeKjIxUjx49dMstt6hHjx4KDQ21O5pR+E7CGzVo0EBvvvmmunfvrtzcXNWoUUOffPKJ8z5H33zzjXr06GH0kUIeUWATPz8/jRgxQl9++aW+++47tWzZUg8++KAiIiJ07tw5u+N5vfz8fH5heMimTZtUq1Yt9ezZU7169VKfPn0oMVeAP0DgjQYMGKDHH39cmzZt0vTp01WlShXdcsstzvX79u1To0aNbEx49SgyXuD31/czDgFlLT09Xa+++qqqVKmiefPmKSwsTK1bt9ZDDz2k999/X6dOnbI7ohE4uA1v9Oyzz8rX11c9evTQ0qVLtXTpUlWqVMm5ftmyZerbt6+NCa8ep5Zs8vtTS5s3b9agQYM0evRo9e/fXz4+9EvY5+zZs9q8ebNzvMzevXvVpEkT7d+/3+5oAK5QRkaGqlWrpgoVKrgsT0tLU7Vq1VzKjWkY7GuDBx98UO+++67Cw8M1ZswYvfPOO6pdu7bdsQBJUtWqVRUUFKSgoCDVrFlTvr6+OnDggN2xAFyFogahl4cLTTgiYwMfHx9dd911at++fbHn1XkYGspCfn6+du7cqYSEBG3YsEFbtmxRVlaW6tWr57zbb3R0tBo0aGB3VAAogCMyNrjnnnsYGAivUaNGDWVlZSkkJETR0dFasGCBoqKijB8ACODawBEZ4Br3yiuvKDo6Wtdff73dUQDAbRQZAABgLC6PAQAAxqLIAAAAY1FkAACAsSgyALzOwYMHdfPNN6ty5cpq165diV+XkJAgh8Oh9PT0UstWFsrLzwGUBYoMUA6NGjVKDodDDodDFStWVHBwsPr06aNly5YpPz/f7niXFRsbq6pVqyopKUnr1q0rdJuoqChNnDixbIMB8DoUGaCc6t+/v1JTU3X06FGtWbNG0dHReuSRRzRo0CBduHDB7njFOnLkiLp166YGDRqoVq1adscB4MUoMkA55efnp5CQENWrV0833HCDnnjiCa1evVpr1qzRihUrnNvNnz9frVu3VtWqVRUeHq4HH3zQ+QT2rKwsBQQE6P3333d5748++khVq1bV2bNnlZubq4ceekihoaGqXLmyGjRooLi4uCJz5efna9asWapfv778/PzUrl07xcfHO9c7HA7t2rVLs2bNksPh0IwZMwq8x6hRo5SYmKgXX3zReeTp6NGjzvW7du3STTfdpCpVqqhLly5KSkpyef3q1at1ww03qHLlymrYsKFmzpxZZLnbv3+/fHx8nA/PTEtLk4+Pj+68807nNrNnz1a3bt1cXhMTE6Nq1aopODhYf/3rX/XLL7+47IO4uDhFRkbK399fbdu2LbCPfy87O1sxMTHq2rUrp5uAP7IAlDsjR460brvttkLXtW3b1oqJiXHOL1iwwFq/fr2VnJxsrVu3zmratKk1btw45/qxY8daAwYMcHmPwYMHW/fcc49lWZb197//3QoPD7c2btxoHT161Nq0aZP19ttvF5lt/vz5VkBAgPXOO+9YBw8etKZNm2ZVrFjR+v777y3LsqzU1FSrZcuW1qOPPmqlpqZaZ8+eLfAe6enpVufOna2xY8daqampVmpqqnXhwgVrw4YNliSrU6dOVkJCgvXtt99at9xyi9WlSxfnazdu3GgFBARYK1assI4cOWJ98cUXVkREhDVjxoxC8+bn51u1a9e2Vq5caVmWZX300UdW7dq1rZCQEOc2vXv3tp588knLsizrzJkzVp06dazp06dbBw4csHbv3m316dPHio6Odm4/e/Zsq1mzZlZ8fLx15MgRa/ny5Zafn5+VkJBgWZbl/DnOnDljnTlzxurSpYvVt29fKysrq8j9ClyrKDJAOVRckRk+fLjVvHnzIl+7cuVKq1atWs75r776yqpQoYJ17Ngxy7Is68SJE5avr6/zl+6ECROsnj17Wvn5+SXKFhYWZj333HMuyzp06GA9+OCDzvm2bdtasbGxxb5Pjx49rEceecRl2aUCsHbtWueyzz77zJJk/frrr5ZlWVavXr2sOXPmuLzuzTfftEJDQ4v8rKFDh1rjx4+3LMuyJk6caE2dOtWqWbOmdeDAASs3N9eqUqWK9cUXX1iWZVnPPvus1bdvX5fXp6SkWJKspKQk6/z581aVKlWsrVu3umxz7733WiNGjHD5OQ4cOGC1adPGGjZsmJWTk1Ps/gCuVTxrCbjGWJbl8qyvtWvXKi4uTgcPHlRmZqYuXLig8+fPKzs7W1WqVFHHjh3VsmVLvf7663r88cf1n//8Rw0aNFD37t0l/Xaap0+fPmratKn69++vQYMGqW/fvoV+dmZmpo4dO6auXbu6LO/atav27t3rsZ+xTZs2zn+HhoZKkk6ePKnrrrtOe/fu1ZYtW/Tcc885t7l48aLLz/xHPXr00KuvvipJSkxM1Jw5c/T9998rISFBaWlpysvLc/5Me/fu1YYNG1StWrUC73PkyBHl5eUpOztbffr0cVmXm5ur9u3buyzr06ePOnbsqP/+97+qUKHCFe4NoHyjyADXmAMHDigyMlKSdPToUQ0aNEjjxo3Tc889p6CgIG3evFn33nuvcnNznb/U77vvPi1atEiPP/64li9frtGjRzvL0A033KDk5GStWbNGa9eu1Z///Gf17t272DEfpa1ixYrOf1/KeelqrXPnzmnmzJkaOnRogddVrly50Pe7dIXUoUOH9N1336lbt246ePCgEhISdObMGed4nEvvf+utt2revHkF3ic0NFT79++XJH322WeqV6+ey3o/Pz+X+YEDB2rVqlX67rvv1Lp165L++MA1hSIDXEPWr1+vb775RpMmTZL026DY/Px8/eMf/5CPz29j/997770Cr7v77rs1bdo0vfTSS/ruu+80cuRIl/UBAQEaPny4hg8frj/96U/q37+/0tLSFBQUVGC7sLAwbdmyRT169HAu37Jlizp27OjWz1KpUiVdvHjRrddIvxWvpKQkNW7cuMSvad26tWrWrKnZs2erXbt2qlatmqKiojRv3jydOXNGUVFRLu+/atUqRUREyNe34H9iW7RoIT8/P/34448u+6Awc+fOVbVq1dSrVy8lJCSoRYsWJc4MXCsoMkA5lZOTo+PHj+vixYs6ceKE4uPjFRcXp0GDBumee+6RJDVu3Fh5eXl6+eWXdeutt2rLli1asmRJgfeqWbOmhg4dqqlTp6pv376qX7++c938+fMVGhqq9u3by8fHRytXrlRISIhq1KhRaK6pU6cqNjZWjRo1Urt27bR8+XLt2bNHb731lls/X0REhL766isdPXpU1apVK1CaivLMM89o0KBBuu666/SnP/1JPj4+2rt3r/bv36/Zs2cX+hqHw6Hu3bvrrbfe0pQpUyT9dvoqJydH69at0+TJk53bjh8/XkuXLtWIESM0bdo0BQUF6fDhw3r33Xf12muvqXr16poyZYomTZqk/Px8devWTRkZGdqyZYsCAgIKlMQXXnhBFy9eVM+ePZWQkKBmzZq5tZ+Acs/uQToAPG/kyJGWJEuS5evra9WpU8fq3bu3tWzZMuvixYsu286fP98KDQ21/P39rX79+llvvPGG84qZ31u3bp0lyXrvvfdclr/66qtWu3btrKpVq1oBAQFWr169rN27dxeZ7eLFi9aMGTOsevXqWRUrVrTatm1rrVmzxmWbkgz2TUpKsm6++WbL39/fkmQlJye7XO1zyddff+1cf0l8fLzVpUsXy9/f3woICLA6duxovfrqq8V+3oIFCyxJLllvu+02y9fXt8CVVd9//711++23WzVq1LD8/f2tZs2aWRMnTnQOiM7Pz7cWLlxoNW3a1KpYsaJVp04dq1+/flZiYqJlWVahP8eECROs0NBQKykpqdicwLXGYVmWZVuLAmCMN998U5MmTdKxY8dUqVIlu+MAgCROLQG4jOzsbKWmpmru3Lm6//77KTEAvAp39gVQrOeff17NmjVTSEiIpk+fbnccAHDBqSUAAGAsjsgAAABjUWQAAICxKDIAAMBYFBkAAGAsigwAADAWRQYAABiLIgMAAIxFkQEAAMaiyAAAAGP9f7aBa/Gd6CQ2AAAAAElFTkSuQmCC", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ax = sorted_day_counts.plot.bar()\n", + "ax.set_ylabel(\"Commit counts\")\n", + "ax.set_xlabel(\"Days of the week\")" + ] + }, + { + "cell_type": "markdown", + "id": "0a73668f-ed23-4eb1-9c78-519e5a2b48cb", + "metadata": {}, + "source": [ + "#### Find all commit authors names." + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "4aab2ca9-f459-4aad-8177-2be1185b9564", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'gsingh58'" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "authors = re.findall(r\"Author:\\s+(.+?)\\s*<\", git_log_output)\n", + "authors[0]" + ] + }, + { + "cell_type": "markdown", + "id": "6fca3840-b836-4555-9e0d-cd8e9b730ea2", + "metadata": {}, + "source": [ + "#### `git log` from projects repo" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "eebd36f3-1d2d-485f-a86c-44dc7767b365", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "commit 129a4745b416e3f0be08795dca69d02d528fe893\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Mon Mar 11 10:37:16 2024 -0500\n", + "\n", + " Update file README.md\n", + "\n", + "commit 413d84dceb0f48e111b25d9f7765513181feb6d6\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sat Mar 9 18:46:18 2024 -0600\n", + "\n", + " Update 2 files\n", + " \n", + " - /Labs/Lab10/README\n", + " - /Labs/Lab10/README.md\n", + "\n", + "commit bd2acf092cfeacdc994dac733300ab61a3373b26\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sat Mar 9 18:45:42 2024 -0600\n", + "\n", + " lab10\n", + "\n", + "commit f84c2a89a44d374da385bb499738ec82b12b7965\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sat Mar 9 18:42:28 2024 -0600\n", + "\n", + " Update file EDGAR.md\n", + "\n", + "commit 11b505faae9964182b99288210f54bae5ce3e211\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sat Mar 9 18:41:35 2024 -0600\n", + "\n", + " Update file README.md\n", + "\n", + "commit 5f35a23cf70d24e627fef5fd89c0711cb144dbc4\n", + "Author: JINLANG WANG <jwang2775@wisc.edu>\n", + "Date: Sat Mar 9 18:41:06 2024 -0600\n", + "\n", + " lab9\n", + "\n", + "commit d481d4de35443a07812af9216d6883300207ae6\n" + ] + } + ], + "source": [ + "git_log_output = str(check_output([\"git\", \"log\"], cwd=\"../../projects-and-labs\"), encoding=\"utf-8\")\n", + "print(git_log_output[:1000])" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "234c0230-ccc5-44a5-b975-2e37b4637444", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[' P4 pipeline update',\n", + " ' P4 released',\n", + " ' fixing pipeline and adding backup mp3 tester file to ease confusion',\n", + " ' mp3 install help',\n", + " ' pipeline update for MP3',\n", + " ' p3 pipeline changes',\n", + " ' P4 pipeline setup',\n", + " ' updating MP3 tester.py and MP3 pipeline',\n", + " ' P3 released',\n", + " ' P3 released',\n", + " ' P2 key updated',\n", + " ' MP2 Update/Fix to the tester',\n", + " ' MP2 key fix + readme update',\n", + " ' mp1 readme updated',\n", + " ' P2 Release',\n", + " ' gitlab tutorial + mp1 release',\n", + " ' initial commit (P1)']" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "re.findall(r\".*[pP][1-6].*\", git_log_output)" + ] + }, + { + "cell_type": "markdown", + "id": "0801ffab-e486-4b56-a8f2-4520a8117e98", + "metadata": {}, + "source": [ + "### Emails example" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "7f232c86-1f9a-44f1-9266-a8c4920cdbef", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Gurmail [Instructor] - gsingh58(AT) cs.wisc.edu\n", + "Jinlang [Head TA] - jwang2775 (AT) wisc.edu\n", + "Elliot [TA] - eepickens (AT) cs.wisc.edu\n", + "Alex [TA] - aclinton (AT) wisc.edu\n", + "Bowman [TA] - bnbrown3 (AT) wisc.edu\n", + "Hafeez [TA] - aneesali (AT) wisc.edu\n", + "William [TA] - wycong (AT) wisc.edu\n", + "Someone [PM] - someone@wisc.edu\n", + "\n" + ] + } + ], + "source": [ + "s = \"\"\"\n", + "Gurmail [Instructor] - gsingh58(AT) cs.wisc.edu\n", + "Jinlang [Head TA] - jwang2775 (AT) wisc.edu\n", + "Elliot [TA] - eepickens (AT) cs.wisc.edu\n", + "Alex [TA] - aclinton (AT) wisc.edu\n", + "Bowman [TA] - bnbrown3 (AT) wisc.edu\n", + "Hafeez [TA] - aneesali (AT) wisc.edu\n", + "William [TA] - wycong (AT) wisc.edu\n", + "Someone [PM] - someone@wisc.edu\n", + "\"\"\"\n", + "print(s)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "e9ab8445-333a-4db9-a6a5-0f7a1c3d5b45", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('gsingh58(AT) cs.wisc.edu',\n", + " 'gsingh58',\n", + " '(AT)',\n", + " '(AT)',\n", + " 'cs.wisc.edu',\n", + " 'wisc.',\n", + " 'edu'),\n", + " ('jwang2775 (AT) wisc.edu',\n", + " 'jwang2775',\n", + " '(AT)',\n", + " '(AT)',\n", + " 'wisc.edu',\n", + " '',\n", + " 'edu'),\n", + " ('eepickens (AT) cs.wisc.edu',\n", + " 'eepickens',\n", + " '(AT)',\n", + " '(AT)',\n", + " 'cs.wisc.edu',\n", + " 'wisc.',\n", + " 'edu'),\n", + " ('aclinton (AT) wisc.edu', 'aclinton', '(AT)', '(AT)', 'wisc.edu', '', 'edu'),\n", + " ('bnbrown3 (AT) wisc.edu', 'bnbrown3', '(AT)', '(AT)', 'wisc.edu', '', 'edu'),\n", + " ('aneesali (AT) wisc.edu', 'aneesali', '(AT)', '(AT)', 'wisc.edu', '', 'edu'),\n", + " ('wycong (AT) wisc.edu', 'wycong', '(AT)', '(AT)', 'wisc.edu', '', 'edu'),\n", + " ('someone@wisc.edu', 'someone', '@', '', 'wisc.edu', '', 'edu')]" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "name = r\"\\w+\"\n", + "at = r\"@|([\\(\\[]?[Aa][Tt][\\)\\]]?)\"\n", + "domain = r\"\\w+\\.(\\w+\\.)?(edu|com|org|net|io|gov)\"\n", + "\n", + "full_regex = f\"(({name})\\s*({at})\\s*({domain}))\"\n", + "\n", + "re.findall(full_regex, s)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "290b5fa7-b5ea-4e61-9d67-0862d65199ad", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "REGEX: ((\\w+)\\s*(@|([\\(\\[]?[Aa][Tt][\\)\\]]?))\\s*(\\w+\\.(\\w+\\.)?(edu|com|org|net|io|gov)))\n", + "gsingh58@cs.wisc.edu\n", + "jwang2775@wisc.edu\n", + "eepickens@cs.wisc.edu\n", + "aclinton@wisc.edu\n", + "bnbrown3@wisc.edu\n", + "aneesali@wisc.edu\n", + "wycong@wisc.edu\n", + "someone@wisc.edu\n" + ] + } + ], + "source": [ + "print(\"REGEX:\", full_regex)\n", + "for match in re.findall(full_regex, s):\n", + " print(match[1] + \"@\" + match[4])" + ] + }, + { + "cell_type": "markdown", + "id": "1d808a52-3312-4d25-8a18-b3cb49c341ed", + "metadata": {}, + "source": [ + "### Self-practice\n", + "\n", + "Q1: Which regex will NOT match \"123\"\n", + "1. r\"\\d\\d\\d\"\n", + "2. r\"\\d{3}\"\n", + "3. r\"\\D\\D\\D\"\n", + "4. r\"...\"\n", + "\n", + "Q2: What will r\"^A\" match?\n", + "1. \"A\"\n", + "2. \"^A\"\n", + "3. \"BA\"\n", + "4. \"B\"\n", + "5. \"BB\"\n", + "\n", + "Q3: Which one can match \"HH\"?\n", + "1. r\"HA+H\"\n", + "2. r\"HA+?H\"\n", + "3. r\"H(A+)?H\"\n", + "\n", + "Q4: Which string(s) will match r\"^(ha)*$\"\n", + "1. \"\"\n", + "2. \"hahah\"\n", + "3. \"that\"\n", + "4. \"HAHA\"\n", + "\n", + "Q5: What is the type of the following?re.findall(r\"(\\d) (\\w+)\", some_str)[0]\n", + "1. list\n", + "2. tuple\n", + "3. string\n", + "\n", + "Q6: What will it do?\n", + "```python\n", + "re.sub(r\"(\\d{3})-(\\d{3}-\\d{4})\",\n", + " r\"(\\g<1>) \\g<2>\",\n", + " \"608-123-4567\")\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "f15a75e9-b5a0-46ae-a84e-08fd345bcb71", + "metadata": {}, + "source": [ + "The answers of these questions can be found in self_practice.ipynb. You may want to try to answer these questions yourself and then verify your answers." + ] } ], "metadata": { diff --git a/lecture_material/14-web-4-and-regex-1/regex_1_lec_001.ipynb b/lecture_material/15-regex/regex_lec_001.ipynb similarity index 98% rename from lecture_material/14-web-4-and-regex-1/regex_1_lec_001.ipynb rename to lecture_material/15-regex/regex_lec_001.ipynb index 5a44224e43a7102f13a9cce57cc6df52ef774ec6..594a3f8867eaf377d4c5e23bf1fd2a19528d420e 100644 --- a/lecture_material/14-web-4-and-regex-1/regex_1_lec_001.ipynb +++ b/lecture_material/15-regex/regex_lec_001.ipynb @@ -10,7 +10,7 @@ "id": "1b1bb64b", "metadata": {}, "source": [ - "# Regex 1\n", + "# Regex\n", "\n", "## Reading\n", "\n", @@ -103,7 +103,9 @@ "outputs": [], "source": [ "#import statements\n", - "import re" + "import re\n", + "from subprocess import check_output\n", + "import pandas as pd" ] }, { @@ -952,6 +954,315 @@ "source": [ "print(re.sub(r\"(\\d+)\", \"<b>\\g<1></b>\", msg))" ] + }, + { + "cell_type": "markdown", + "id": "c0a4d8c9-722b-490a-b913-f1d9dedb83c3", + "metadata": {}, + "source": [ + "In CS <b>320</b>, there are <b>28</b> lectures, <b>11</b> quizzes, <b>3</b> exams, <b>6</b> projects, and <b>1000</b> things to learn. CS <b>320</b> is awesome!" + ] + }, + { + "cell_type": "markdown", + "id": "1cf71b2a-98fc-462e-a689-818f1f3a3317", + "metadata": {}, + "source": [ + "### Git log example" + ] + }, + { + "cell_type": "markdown", + "id": "eacdeac4-176e-4588-9250-651627a9df39", + "metadata": {}, + "source": [ + "#### Run `git log` as a shell command" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76052b01-1363-46f8-8c6a-d7a21de893cc", + "metadata": {}, + "outputs": [], + "source": [ + "!git log" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d2fe250-7333-4612-8ce1-be224d3826ab", + "metadata": {}, + "outputs": [], + "source": [ + "git_log_output = str(check_output([\"git\", \"log\"]), encoding=\"utf-8\")\n", + "print(git_log_output[:500])" + ] + }, + { + "cell_type": "markdown", + "id": "a122d61e-daed-469d-b68c-f0c4e5a9d0c1", + "metadata": {}, + "source": [ + "#### GOAL: find all the commit numbers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f06f214-f6d1-49a5-a14e-f83fb4ba9a29", + "metadata": {}, + "outputs": [], + "source": [ + "commits = re.findall(r\"\", git_log_output)\n", + "# recent 10 commit numbers\n", + "commits[:10]" + ] + }, + { + "cell_type": "markdown", + "id": "1996d812-51c3-4d0f-aa0b-a867c6aec4d7", + "metadata": {}, + "source": [ + "#### What days of the week does the team push things into this repo?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eeaac309-3b8d-46ab-9072-a0f061007277", + "metadata": {}, + "outputs": [], + "source": [ + "print(git_log_output[:500])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f358c85-9abf-4dfe-9d0a-9f9914093dbf", + "metadata": {}, + "outputs": [], + "source": [ + "days = re.findall(r\"\", git_log_output)\n", + "days" + ] + }, + { + "cell_type": "markdown", + "id": "5e4ab9be-020d-4274-83e7-7564f45c4917", + "metadata": {}, + "source": [ + "#### Count unique days" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "768dd6c5-ce85-4d43-b930-6e0911d8d95c", + "metadata": {}, + "outputs": [], + "source": [ + "day_counts = pd.Series(days).value_counts()\n", + "day_counts" + ] + }, + { + "cell_type": "markdown", + "id": "96ee87eb-0eeb-4360-b74a-0f77e4085157", + "metadata": {}, + "source": [ + "#### Sort by day of the week" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c0b9e19-5986-446b-9280-25b4d5d1c0e5", + "metadata": {}, + "outputs": [], + "source": [ + "sorted_day_counts = day_counts.loc[[\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sun\"]]\n", + "sorted_day_counts" + ] + }, + { + "cell_type": "markdown", + "id": "02b735e7-562a-48de-ab1c-70a36ed4afa6", + "metadata": {}, + "source": [ + "#### Create a bar plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9826d060-dc83-4949-bf79-58d77ad0bf39", + "metadata": {}, + "outputs": [], + "source": [ + "ax = sorted_day_counts.plot.bar()\n", + "ax.set_ylabel(\"Commit counts\")\n", + "ax.set_xlabel(\"Days of the week\")" + ] + }, + { + "cell_type": "markdown", + "id": "1649e14e-c867-46be-a1bc-cc4eab80ff1a", + "metadata": {}, + "source": [ + "#### Find all commit authors names." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2105c891-c00b-4e00-b61f-109933ea9f83", + "metadata": {}, + "outputs": [], + "source": [ + "authors = re.findall(r\"\", git_log_output)\n", + "authors[0]" + ] + }, + { + "cell_type": "markdown", + "id": "a19905c2-c296-458d-8d6c-8563439f1621", + "metadata": {}, + "source": [ + "#### `git log` from projects repo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1317631c-e695-4158-a83c-d3e3edfc15f6", + "metadata": {}, + "outputs": [], + "source": [ + "git_log_output = str(check_output([\"git\", \"log\"], cwd=\"../../projects-and-labs\"), encoding=\"utf-8\")\n", + "print(git_log_output[:1000])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cf0f370-c8c5-4ff0-979f-d55f2a3bbaef", + "metadata": {}, + "outputs": [], + "source": [ + "re.findall(r\"\", git_log_output)" + ] + }, + { + "cell_type": "markdown", + "id": "d3199138-3104-44dd-a3ca-9f775817dad8", + "metadata": {}, + "source": [ + "### Emails example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6e6dae7-a374-40b1-a044-9091c51b60d2", + "metadata": {}, + "outputs": [], + "source": [ + "s = \"\"\"\n", + "Gurmail [Instructor] - gsingh58(AT) cs.wisc.edu\n", + "Jinlang [Head TA] - jwang2775 (AT) wisc.edu\n", + "Elliot [TA] - eepickens (AT) cs.wisc.edu\n", + "Alex [TA] - aclinton (AT) wisc.edu\n", + "Bowman [TA] - bnbrown3 (AT) wisc.edu\n", + "Hafeez [TA] - aneesali (AT) wisc.edu\n", + "William [TA] - wycong (AT) wisc.edu\n", + "\"\"\"\n", + "print(s)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df1c2095-5a58-4896-a3f5-e5e6a28fc4b6", + "metadata": {}, + "outputs": [], + "source": [ + "name = r\"\\w+\"\n", + "at = r\"@|([\\(\\[]?[Aa][Tt][\\)\\]]?)\"\n", + "domain = r\"\\w+\\.(\\w+\\.)?(edu|com|org|net|io|gov)\"\n", + "\n", + "full_regex = f\"(({name})\\s*({at})\\s*({domain}))\"\n", + "\n", + "re.findall(full_regex, s)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a778af2-0bbe-44e3-85f3-c868543ddc76", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"REGEX:\", full_regex)\n", + "for match in re.findall(full_regex, s):\n", + " print(match[1] + \"@\" + match[4])" + ] + }, + { + "cell_type": "markdown", + "id": "a1d55737-24a0-40bb-bedd-7125a921e7e7", + "metadata": {}, + "source": [ + "### Self-practice\n", + "\n", + "Q1: Which regex will NOT match \"123\"\n", + "1. r\"\\d\\d\\d\"\n", + "2. r\"\\d{3}\"\n", + "3. r\"\\D\\D\\D\"\n", + "4. r\"...\"\n", + "\n", + "Q2: What will r\"^A\" match?\n", + "1. \"A\"\n", + "2. \"^A\"\n", + "3. \"BA\"\n", + "4. \"B\"\n", + "5. \"BB\"\n", + "\n", + "Q3: Which one can match \"HH\"?\n", + "1. r\"HA+H\"\n", + "2. r\"HA+?H\"\n", + "3. r\"H(A+)?H\"\n", + "\n", + "Q4: Which string(s) will match r\"^(ha)*$\"\n", + "1. \"\"\n", + "2. \"hahah\"\n", + "3. \"that\"\n", + "4. \"HAHA\"\n", + "\n", + "Q5: What is the type of the following?re.findall(r\"(\\d) (\\w+)\", some_str)[0]\n", + "1. list\n", + "2. tuple\n", + "3. string\n", + "\n", + "Q6: What will it do?\n", + "```python\n", + "re.sub(r\"(\\d{3})-(\\d{3}-\\d{4})\",\n", + " r\"(\\g<1>) \\g<2>\",\n", + " \"608-123-4567\")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a76c9298-2460-4572-8656-e197ca1636ca", + "metadata": {}, + "outputs": [], + "source": [ + "The answers of these questions can be found in self_practice.ipynb. You may want to try to answer these questions yourself and then verify your answers." + ] } ], "metadata": { diff --git a/lecture_material/14-web-4-and-regex-1/regex_1_lec_002.ipynb b/lecture_material/15-regex/regex_lec_002.ipynb similarity index 98% rename from lecture_material/14-web-4-and-regex-1/regex_1_lec_002.ipynb rename to lecture_material/15-regex/regex_lec_002.ipynb index 5a44224e43a7102f13a9cce57cc6df52ef774ec6..594a3f8867eaf377d4c5e23bf1fd2a19528d420e 100644 --- a/lecture_material/14-web-4-and-regex-1/regex_1_lec_002.ipynb +++ b/lecture_material/15-regex/regex_lec_002.ipynb @@ -10,7 +10,7 @@ "id": "1b1bb64b", "metadata": {}, "source": [ - "# Regex 1\n", + "# Regex\n", "\n", "## Reading\n", "\n", @@ -103,7 +103,9 @@ "outputs": [], "source": [ "#import statements\n", - "import re" + "import re\n", + "from subprocess import check_output\n", + "import pandas as pd" ] }, { @@ -952,6 +954,315 @@ "source": [ "print(re.sub(r\"(\\d+)\", \"<b>\\g<1></b>\", msg))" ] + }, + { + "cell_type": "markdown", + "id": "c0a4d8c9-722b-490a-b913-f1d9dedb83c3", + "metadata": {}, + "source": [ + "In CS <b>320</b>, there are <b>28</b> lectures, <b>11</b> quizzes, <b>3</b> exams, <b>6</b> projects, and <b>1000</b> things to learn. CS <b>320</b> is awesome!" + ] + }, + { + "cell_type": "markdown", + "id": "1cf71b2a-98fc-462e-a689-818f1f3a3317", + "metadata": {}, + "source": [ + "### Git log example" + ] + }, + { + "cell_type": "markdown", + "id": "eacdeac4-176e-4588-9250-651627a9df39", + "metadata": {}, + "source": [ + "#### Run `git log` as a shell command" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76052b01-1363-46f8-8c6a-d7a21de893cc", + "metadata": {}, + "outputs": [], + "source": [ + "!git log" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d2fe250-7333-4612-8ce1-be224d3826ab", + "metadata": {}, + "outputs": [], + "source": [ + "git_log_output = str(check_output([\"git\", \"log\"]), encoding=\"utf-8\")\n", + "print(git_log_output[:500])" + ] + }, + { + "cell_type": "markdown", + "id": "a122d61e-daed-469d-b68c-f0c4e5a9d0c1", + "metadata": {}, + "source": [ + "#### GOAL: find all the commit numbers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f06f214-f6d1-49a5-a14e-f83fb4ba9a29", + "metadata": {}, + "outputs": [], + "source": [ + "commits = re.findall(r\"\", git_log_output)\n", + "# recent 10 commit numbers\n", + "commits[:10]" + ] + }, + { + "cell_type": "markdown", + "id": "1996d812-51c3-4d0f-aa0b-a867c6aec4d7", + "metadata": {}, + "source": [ + "#### What days of the week does the team push things into this repo?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eeaac309-3b8d-46ab-9072-a0f061007277", + "metadata": {}, + "outputs": [], + "source": [ + "print(git_log_output[:500])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f358c85-9abf-4dfe-9d0a-9f9914093dbf", + "metadata": {}, + "outputs": [], + "source": [ + "days = re.findall(r\"\", git_log_output)\n", + "days" + ] + }, + { + "cell_type": "markdown", + "id": "5e4ab9be-020d-4274-83e7-7564f45c4917", + "metadata": {}, + "source": [ + "#### Count unique days" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "768dd6c5-ce85-4d43-b930-6e0911d8d95c", + "metadata": {}, + "outputs": [], + "source": [ + "day_counts = pd.Series(days).value_counts()\n", + "day_counts" + ] + }, + { + "cell_type": "markdown", + "id": "96ee87eb-0eeb-4360-b74a-0f77e4085157", + "metadata": {}, + "source": [ + "#### Sort by day of the week" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c0b9e19-5986-446b-9280-25b4d5d1c0e5", + "metadata": {}, + "outputs": [], + "source": [ + "sorted_day_counts = day_counts.loc[[\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sun\"]]\n", + "sorted_day_counts" + ] + }, + { + "cell_type": "markdown", + "id": "02b735e7-562a-48de-ab1c-70a36ed4afa6", + "metadata": {}, + "source": [ + "#### Create a bar plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9826d060-dc83-4949-bf79-58d77ad0bf39", + "metadata": {}, + "outputs": [], + "source": [ + "ax = sorted_day_counts.plot.bar()\n", + "ax.set_ylabel(\"Commit counts\")\n", + "ax.set_xlabel(\"Days of the week\")" + ] + }, + { + "cell_type": "markdown", + "id": "1649e14e-c867-46be-a1bc-cc4eab80ff1a", + "metadata": {}, + "source": [ + "#### Find all commit authors names." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2105c891-c00b-4e00-b61f-109933ea9f83", + "metadata": {}, + "outputs": [], + "source": [ + "authors = re.findall(r\"\", git_log_output)\n", + "authors[0]" + ] + }, + { + "cell_type": "markdown", + "id": "a19905c2-c296-458d-8d6c-8563439f1621", + "metadata": {}, + "source": [ + "#### `git log` from projects repo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1317631c-e695-4158-a83c-d3e3edfc15f6", + "metadata": {}, + "outputs": [], + "source": [ + "git_log_output = str(check_output([\"git\", \"log\"], cwd=\"../../projects-and-labs\"), encoding=\"utf-8\")\n", + "print(git_log_output[:1000])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cf0f370-c8c5-4ff0-979f-d55f2a3bbaef", + "metadata": {}, + "outputs": [], + "source": [ + "re.findall(r\"\", git_log_output)" + ] + }, + { + "cell_type": "markdown", + "id": "d3199138-3104-44dd-a3ca-9f775817dad8", + "metadata": {}, + "source": [ + "### Emails example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6e6dae7-a374-40b1-a044-9091c51b60d2", + "metadata": {}, + "outputs": [], + "source": [ + "s = \"\"\"\n", + "Gurmail [Instructor] - gsingh58(AT) cs.wisc.edu\n", + "Jinlang [Head TA] - jwang2775 (AT) wisc.edu\n", + "Elliot [TA] - eepickens (AT) cs.wisc.edu\n", + "Alex [TA] - aclinton (AT) wisc.edu\n", + "Bowman [TA] - bnbrown3 (AT) wisc.edu\n", + "Hafeez [TA] - aneesali (AT) wisc.edu\n", + "William [TA] - wycong (AT) wisc.edu\n", + "\"\"\"\n", + "print(s)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df1c2095-5a58-4896-a3f5-e5e6a28fc4b6", + "metadata": {}, + "outputs": [], + "source": [ + "name = r\"\\w+\"\n", + "at = r\"@|([\\(\\[]?[Aa][Tt][\\)\\]]?)\"\n", + "domain = r\"\\w+\\.(\\w+\\.)?(edu|com|org|net|io|gov)\"\n", + "\n", + "full_regex = f\"(({name})\\s*({at})\\s*({domain}))\"\n", + "\n", + "re.findall(full_regex, s)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a778af2-0bbe-44e3-85f3-c868543ddc76", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"REGEX:\", full_regex)\n", + "for match in re.findall(full_regex, s):\n", + " print(match[1] + \"@\" + match[4])" + ] + }, + { + "cell_type": "markdown", + "id": "a1d55737-24a0-40bb-bedd-7125a921e7e7", + "metadata": {}, + "source": [ + "### Self-practice\n", + "\n", + "Q1: Which regex will NOT match \"123\"\n", + "1. r\"\\d\\d\\d\"\n", + "2. r\"\\d{3}\"\n", + "3. r\"\\D\\D\\D\"\n", + "4. r\"...\"\n", + "\n", + "Q2: What will r\"^A\" match?\n", + "1. \"A\"\n", + "2. \"^A\"\n", + "3. \"BA\"\n", + "4. \"B\"\n", + "5. \"BB\"\n", + "\n", + "Q3: Which one can match \"HH\"?\n", + "1. r\"HA+H\"\n", + "2. r\"HA+?H\"\n", + "3. r\"H(A+)?H\"\n", + "\n", + "Q4: Which string(s) will match r\"^(ha)*$\"\n", + "1. \"\"\n", + "2. \"hahah\"\n", + "3. \"that\"\n", + "4. \"HAHA\"\n", + "\n", + "Q5: What is the type of the following?re.findall(r\"(\\d) (\\w+)\", some_str)[0]\n", + "1. list\n", + "2. tuple\n", + "3. string\n", + "\n", + "Q6: What will it do?\n", + "```python\n", + "re.sub(r\"(\\d{3})-(\\d{3}-\\d{4})\",\n", + " r\"(\\g<1>) \\g<2>\",\n", + " \"608-123-4567\")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a76c9298-2460-4572-8656-e197ca1636ca", + "metadata": {}, + "outputs": [], + "source": [ + "The answers of these questions can be found in self_practice.ipynb. You may want to try to answer these questions yourself and then verify your answers." + ] } ], "metadata": { diff --git a/lecture_material/15-regex_2/self_practice.ipynb b/lecture_material/15-regex/self_practice.ipynb similarity index 100% rename from lecture_material/15-regex_2/self_practice.ipynb rename to lecture_material/15-regex/self_practice.ipynb diff --git a/lecture_material/15-regex_2/regex_2.ipynb b/lecture_material/15-regex_2/regex_2.ipynb deleted file mode 100644 index 623d72461d7b75a77c129d08ff070d364c0d74bb..0000000000000000000000000000000000000000 --- a/lecture_material/15-regex_2/regex_2.ipynb +++ /dev/null @@ -1,1960 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "e60c1c48", - "metadata": {}, - "source": [ - "# Regex 2" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "0dba68b0", - "metadata": {}, - "outputs": [], - "source": [ - "#import statements\n", - "import re\n", - "from subprocess import check_output\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "b97c8008-8e39-4a9f-89a1-0d1ddbb1ac01", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "A DAG is a directed graph without cycles. A tree is a DAG where every node has one parent (except the root, which has none). To learn more, visit www.example.com or call 1-608-123-4567. :) ¯\\_(ツ)_/¯\n", - "1-608-123-4567\n", - "a-bcd-efg-hijg (not a phone number)\n", - "1-608-123-456 (not a phone number)\n", - "608-123-4567\n", - "123-4567\n", - "1-123-4567 (not a phone number)\n", - "\n", - "In CS 320, there are 11 quizzes, 6 projects, 28 lectures, and 1000 things to learn. CS 320 is awesome!\n", - "In CS 320, there are 11 quizzes, 6 projects,\n", - "28 lectures, and 1000 things to learn. CS 320 is awesome!\n" - ] - } - ], - "source": [ - "# Example strings\n", - "# from DS100 book...\n", - "def reg(regex, text):\n", - " \"\"\"\n", - " Prints the string with the regex match highlighted.\n", - " \"\"\"\n", - " print(re.sub(f'({regex})', r'\\033[1;30;43m\\1\\033[m', text))\n", - "s1 = \" \".join([\"A DAG is a directed graph without cycles.\",\n", - " \"A tree is a DAG where every node has one parent (except the root, which has none).\",\n", - " \"To learn more, visit www.example.com or call 1-608-123-4567. :) ¯\\_(ツ)_/¯\"])\n", - "print(s1)\n", - "\n", - "s2 = \"\"\"1-608-123-4567\n", - "a-bcd-efg-hijg (not a phone number)\n", - "1-608-123-456 (not a phone number)\n", - "608-123-4567\n", - "123-4567\n", - "1-123-4567 (not a phone number)\n", - "\"\"\"\n", - "print(s2)\n", - "\n", - "s3 = \"In CS 320, there are 11 quizzes, 6 projects, 28 lectures, and 1000 things to learn. CS 320 is awesome!\"\n", - "print(s3)\n", - "\n", - "s4 = \"\"\"In CS 320, there are 11 quizzes, 6 projects,\n", - "28 lectures, and 1000 things to learn. CS 320 is awesome!\"\"\"\n", - "print(s4)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "924069c5-82be-4423-b659-2beee8e226be", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "A DAG is a directed graph without cycles. A tree is a DAG where every node has one parent (except the root, which has none). To learn more, visit www.example.com or call 1-608-123-4567. :) ¯\\_(ツ)_/¯\n" - ] - } - ], - "source": [ - "print(s1)" - ] - }, - { - "cell_type": "markdown", - "id": "6a2eff34", - "metadata": {}, - "source": [ - "### Regex is case sensitive\n", - "\n", - "### Character classes\n", - "\n", - "- Character classes can be mentioned within `[...]`\n", - "- `^` means `NOT` of a character class\n", - "- `-` enables us to mention range of characters, for example `[A-Z]`\n", - "- `|` enables us to perform `OR`\n", - "\n", - "### Metacharacters\n", - "\n", - "- predefined character classes\n", - " - `\\d` => digits\n", - " - `\\s` => whitespace (space, tab, newline)\n", - " - `\\w` => \"word\" characters (digits, letters, underscores, etc) --- helpful for variable name matches and whole word matches (as it doesn't match whitespace --- `\\s`)\n", - " - `.` => wildcard: anything except newline\n", - "- capitalized version of character classes mean `NOT`, for example `\\D` => everything except digits\n", - "\n", - "### REPETITION\n", - "\n", - "- `<character>{<num matches>}` - for example: `w{3}`\n", - "- matches cannot overlap\n", - "\n", - "### Variable length repitition operators\n", - "\n", - "- `*` => 0 or more (greedy: match as many characters as possible)\n", - "- `+` => 1 or more (greedy: match as many characters as possible)\n", - "- `?` => 0 or 1\n", - "- `*?` => 0 or more (non-greedy: match as few characters as possible)\n", - "- `+?` => 1 or more (non-greedy: match as few characters as possible)\n", - "\n", - "#### Find everything inside of parentheses." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "11f75d92-f215-47f4-8804-5e5727d3be55", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;30;43mA DAG is a directed graph without cycles. A tree is a DAG where every node has one parent (except the root, which has none). To learn more, visit www.example.com or call 1-608-123-4567. :) ¯\\_(ツ)_/¯\u001b[m\u001b[1;30;43m\u001b[m\n" - ] - } - ], - "source": [ - "# this doesn't work\n", - "# it captures everything because () have special meaning (coming up)\n", - "reg(r\"(.*)\", s1)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "b488460e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "A DAG is a directed graph without cycles. A tree is a DAG where every node has one parent \u001b[1;30;43m(except the root, which has none). To learn more, visit www.example.com or call 1-608-123-4567. :) ¯\\_(ツ)\u001b[m_/¯\n" - ] - } - ], - "source": [ - "# How can we change this to not use special meaning of ()?\n", - "# * is greedy: match as many characters as possible\n", - "reg(r\"\\(.*\\)\", s1)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "42155b54-d27b-4418-81a1-c005c508d738", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "A DAG is a directed graph without cycles. A tree is a DAG where every node has one parent \u001b[1;30;43m(except the root, which has none)\u001b[m. To learn more, visit www.example.com or call 1-608-123-4567. :) ¯\\_\u001b[1;30;43m(ツ)\u001b[m_/¯\n" - ] - } - ], - "source": [ - "# non-greedy: stop at the first possible spot instead of the last possible spot\n", - "reg(r\"\\(.*?\\)\", s1)" - ] - }, - { - "cell_type": "markdown", - "id": "0fd70cfd", - "metadata": {}, - "source": [ - "### Anchor characters\n", - "- `^` => start of string\n", - " - `^` is overloaded --- what was the other usage?\n", - "- `$` => end of string\n", - "\n", - "#### Find everything in the first sentence." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "40fed5db", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;30;43mA DAG is a directed graph without cycles.\u001b[m\u001b[1;30;43m A tree is a DAG where every node has one parent (except the root, which has none).\u001b[m\u001b[1;30;43m To learn more, visit www.\u001b[m\u001b[1;30;43mexample.\u001b[m\u001b[1;30;43mcom or call 1-608-123-4567.\u001b[m :) ¯\\_(ツ)_/¯\n" - ] - } - ], - "source": [ - "# doesn't work because remember regex finds all possible matches\n", - "# so it matches every single sentence \n", - "# (even though we are doing non-greedy match)\n", - "reg(r\".*?\\.\", s1)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "7e97abd8", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;30;43mA DAG is a directed graph without cycles.\u001b[m A tree is a DAG where every node has one parent (except the root, which has none). To learn more, visit www.example.com or call 1-608-123-4567. :) ¯\\_(ツ)_/¯\n" - ] - } - ], - "source": [ - "reg(r\"^.*?\\.\", s1)" - ] - }, - { - "cell_type": "markdown", - "id": "f66a4651", - "metadata": {}, - "source": [ - "#### Find everything in the first two sentences." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "cfa8353e-4402-4f64-b3f8-35e73b2d7a68", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;30;43mA DAG is a directed graph without cycles. A tree is a DAG where every node has one parent (except the root, which has none).\u001b[m To learn more, visit www.example.com or call 1-608-123-4567. :) ¯\\_(ツ)_/¯\n" - ] - } - ], - "source": [ - "reg(r\"^(.*?\\.){2}\", s1)" - ] - }, - { - "cell_type": "markdown", - "id": "76570acd", - "metadata": {}, - "source": [ - "#### Find last \"word\" in the sentence." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "f35a6c81-47b5-48eb-8357-888fe832f85b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "A DAG is a directed graph without cycles. A tree is a DAG where every node has one parent (except the root, which has none). To learn more, visit www.example.com or call 1-608-123-4567. :) \u001b[1;30;43m¯\\_(ツ)_/¯\u001b[m\n" - ] - } - ], - "source": [ - "reg(r\"\\S+$\", s1)" - ] - }, - { - "cell_type": "markdown", - "id": "4b25fb66", - "metadata": {}, - "source": [ - "### Case study: find all phone numbers." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "8ecbeaf0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1-608-123-4567\n", - "a-bcd-efg-hijg (not a phone number)\n", - "1-608-123-456 (not a phone number)\n", - "608-123-4567\n", - "123-4567\n", - "1-123-4567 (not a phone number)\n", - "\n" - ] - } - ], - "source": [ - "print(s2)\n", - "# The country code (1) in the front is optional\n", - "# The area code (608) is also optional\n", - "# Doesn't make sense to match country code without area code though!" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "88a01725-790c-4f21-b334-e3d31bed24b5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;30;43m1-608-123-4567\u001b[m\n", - "a-bcd-efg-hijg (not a phone number)\n", - "1-608-123-456 (not a phone number)\n", - "608-123-4567\n", - "123-4567\n", - "1-123-4567 (not a phone number)\n", - "\n" - ] - } - ], - "source": [ - "# Full US phone numbers\n", - "reg(r\"\\d-\\d{3}-\\d{3}-\\d{4}\", s2)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "ea8ce0be-98f8-4f9c-bcc0-1d8a0bb183a8", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;30;43m1-608-123-4567\u001b[m\n", - "a-bcd-efg-hijg (not a phone number)\n", - "1-608-123-456 (not a phone number)\n", - "\u001b[1;30;43m608-123-4567\u001b[m\n", - "123-4567\n", - "1-123-4567 (not a phone number)\n", - "\n" - ] - } - ], - "source": [ - "# The country code (1) in the front is optional\n", - "reg(r\"(\\d-)?\\d{3}-\\d{3}-\\d{4}\", s2)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "5befb23a-848f-44d4-aaac-6b21ac0bbf43", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;30;43m1-608-123-4567\u001b[m\n", - "a-bcd-efg-hijg (not a phone number)\n", - "1-608-123-456 (not a phone number)\n", - "\u001b[1;30;43m608-123-4567\u001b[m\n", - "\u001b[1;30;43m123-4567\u001b[m\n", - "\u001b[1;30;43m1-123-4567\u001b[m (not a phone number)\n", - "\n" - ] - } - ], - "source": [ - "# The area code (608) is also optional\n", - "# Doesn't make sense to have country code without area code though!\n", - "reg(r\"(\\d-)?(\\d{3}-)?\\d{3}-\\d{4}\", s2)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "34164fd1-a422-45a0-8e11-54199ca77120", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;30;43m1-608-123-4567\u001b[m\n", - "a-bcd-efg-hijg (not a phone number)\n", - "1-608-123-456 (not a phone number)\n", - "\u001b[1;30;43m608-123-4567\u001b[m\n", - "\u001b[1;30;43m123-4567\u001b[m\n", - "1-\u001b[1;30;43m123-4567\u001b[m (not a phone number)\n", - "\n" - ] - } - ], - "source": [ - "# This is good enough for 320 quizzes/tests\n", - "# But clearly, the last match is not correct\n", - "reg(r\"((\\d-)?\\d{3}-)?\\d{3}-\\d{4}\", s2)" - ] - }, - { - "cell_type": "markdown", - "id": "8a2ee4e2", - "metadata": {}, - "source": [ - "Regex documentation link: https://docs.python.org/3/library/re.html." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "694a585b-b5a7-4a6f-a0f1-60521f7dfc47", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;30;43m1-608-123-4567\u001b[m\n", - "a-bcd-efg-hijg (not a phone number)\n", - "1-608-123-456 (not a phone number)\n", - "\u001b[1;30;43m608-123-4567\u001b[m\n", - "\u001b[1;30;43m123-4567\u001b[m\n", - "1-123-4567 (not a phone number)\n", - "\n" - ] - } - ], - "source": [ - "# BONUS: negative lookbehind (I won't test this)\n", - "reg(r\"(?<!\\d\\-)((\\d-)?\\d{3}-)?\\d{3}-\\d{4}\", s2)" - ] - }, - { - "cell_type": "markdown", - "id": "3973350b", - "metadata": {}, - "source": [ - "There is also a negative lookahead. For example, how to avoid matching \"1-608-123-456\" in \"1-608-123-4569999\". You can explore this if you are interested." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "4988d765", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;30;43m608-123-4569\u001b[m999\n" - ] - } - ], - "source": [ - "reg(r\"(?<!\\d\\-)((\\d-)?\\d{3}-)?\\d{3}-\\d{4}\", \"608-123-4569999\")" - ] - }, - { - "cell_type": "markdown", - "id": "b02ae9e0", - "metadata": {}, - "source": [ - "### Testing your regex\n", - "- you could use `reg(...)` function\n", - "- another useful resource: https://regex101.com/" - ] - }, - { - "cell_type": "markdown", - "id": "4a973271", - "metadata": {}, - "source": [ - "### `re` module\n", - "- `re.findall(<PATTERN>, <SEARCH STRING>)`: regular expression matches\n", - " - returns a list of strings \n", - "- `re.sub(<PATTERN>, <REPLACEMENT>, <SEARCH STRING>)`: regular expression match + substitution\n", - " - returns a new string with the substitutions (remember strings are immutable)" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "73ec525f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "In CS 320,\tthere are 28 lectures, 11 quizzes, 3 exams,\t6 projects, and 1000 things to learn. CS 320 is awesome!\n" - ] - } - ], - "source": [ - "msg = \"In CS 320,\\tthere are 28 lectures, 11 quizzes, 3 exams,\\t6 projects, and 1000 things to learn. CS 320 is awesome!\"\n", - "print(msg)" - ] - }, - { - "cell_type": "markdown", - "id": "34998a5e", - "metadata": {}, - "source": [ - "#### Find all digits." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "7f42c25a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['320', '28', '11', '3', '6', '1000', '320']" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "re.findall(r\"\\d+\", msg)" - ] - }, - { - "cell_type": "markdown", - "id": "70b2f488", - "metadata": {}, - "source": [ - "### Groups\n", - "- we can capture matches using `()` => this is the special meaning of `()`\n", - "- returns a list of tuples, where length of the tuple will be number of groups\n", - "\n", - "#### Find all digits and the word that comes after that." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "5309adee", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('28', 'lectures'),\n", - " ('11', 'quizzes'),\n", - " ('3', 'exams'),\n", - " ('6', 'projects'),\n", - " ('1000', 'things'),\n", - " ('320', 'is')]" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "matches = re.findall(r\"(\\d+) (\\w+)\", msg)\n", - "matches" - ] - }, - { - "cell_type": "markdown", - "id": "c138ff9a", - "metadata": {}, - "source": [ - "#### Goal: make a dict (course component => count, like \"projects\" => 6)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "6654bd71", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'lectures': 28,\n", - " 'quizzes': 11,\n", - " 'exams': 3,\n", - " 'projects': 6,\n", - " 'things': 1000,\n", - " 'is': 320}" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "course_dict = {}\n", - "for count, component in matches:\n", - " course_dict[component] = int(count)\n", - "course_dict" - ] - }, - { - "cell_type": "markdown", - "id": "c4b6b505", - "metadata": {}, - "source": [ - "### Unlike matches, groups can overlap\n", - "\n", - "#### Find and group all digits and the word that comes after that." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "491c3460", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('28 lectures', '28', 'lectures'),\n", - " ('11 quizzes', '11', 'quizzes'),\n", - " ('3 exams', '3', 'exams'),\n", - " ('6 projects', '6', 'projects'),\n", - " ('1000 things', '1000', 'things'),\n", - " ('320 is', '320', 'is')]" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "re.findall(r\"((\\d+) (\\w+))\", msg)" - ] - }, - { - "cell_type": "markdown", - "id": "d2227e69", - "metadata": {}, - "source": [ - "#### Substitute all digits with \"###\"." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "6d1fede1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'In CS ###,\\tthere are ### lectures, ### quizzes, ### exams,\\t### projects, and ### things to learn. CS ### is awesome!'" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "re.sub(r\"\\d+\", \"###\", msg)" - ] - }, - { - "cell_type": "markdown", - "id": "9d531122", - "metadata": {}, - "source": [ - "#### Goal: normalize whitespace (everything will be a single space)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "4becbe70", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "In CS 320,\tthere are 28 lectures, 11 quizzes, 3 exams,\t6 projects, and 1000 things to learn. CS 320 is awesome!\n" - ] - } - ], - "source": [ - "print(msg)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "72a6eb42", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'In CS 320, there are 28 lectures, 11 quizzes, 3 exams, 6 projects, and 1000 things to learn. CS 320 is awesome!'" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "re.sub(r\"\\s+\", \" \", msg)" - ] - }, - { - "cell_type": "markdown", - "id": "6faf33fd", - "metadata": {}, - "source": [ - "### How to use groups is substitution?\n", - "- `\\g<N>` gives you the result of the N'th grouping.\n", - "\n", - "#### Substitute all course component counts with HTML bold tags." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "8df577fd", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "In CS <b>320</b>,\tthere are <b>28</b> lectures, <b>11</b> quizzes, <b>3</b> exams,\t<b>6</b> projects, and <b>1000</b> things to learn. CS <b>320</b> is awesome!\n" - ] - } - ], - "source": [ - "print(re.sub(r\"(\\d+)\", \"<b>\\g<1></b>\", msg))" - ] - }, - { - "cell_type": "markdown", - "id": "35a15a41", - "metadata": {}, - "source": [ - "In CS <b>320</b>, there are <b>28</b> lectures, <b>10</b> quizzes, <b>3</b> exams, <b>6</b> projects, and <b>1000</b> things to learn. CS <b>320</b> is awesome!" - ] - }, - { - "cell_type": "markdown", - "id": "bede932a", - "metadata": {}, - "source": [ - "### Git log example" - ] - }, - { - "cell_type": "markdown", - "id": "f816ab34", - "metadata": {}, - "source": [ - "#### Run `git log` as a shell command" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "f55eec9a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[33mcommit f5b5fff2409b01c2e12f3a7baae131e9a23d964b\u001b[m\u001b[33m (\u001b[m\u001b[1;36mHEAD -> \u001b[m\u001b[1;32mmain\u001b[m\u001b[33m, \u001b[m\u001b[1;31morigin/main\u001b[m\u001b[33m, \u001b[m\u001b[1;31morigin/HEAD\u001b[m\u001b[33m)\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Wed Mar 13 09:39:09 2024 -0500\n", - "\n", - " regex_2_lec_002 renamed as regex_1_lec_002\n", - "\n", - "\u001b[33mcommit e017cc00d97679786d97c469733117c016e3ba9b\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Wed Mar 13 09:34:43 2024 -0500\n", - "\n", - " web_4_lec_002 added\n", - "\n", - "\u001b[33mcommit 7897fec9ecf701e61b2ed2713fabe72e726ffd7c\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Mar 12 05:51:48 2024 -0500\n", - "\n", - " lec14 and lec15 added\n", - "\n", - "\u001b[33mcommit c13cfb21e69230b393ef2051027d029322c12cac\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Mon Mar 11 10:36:36 2024 -0500\n", - "\n", - " Update file README.md\n", - "\n", - "\u001b[33mcommit 95bfde13c281a6eb28f0cd98f6dc393762732d4c\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sat Mar 9 18:45:08 2024 -0600\n", - "\n", - " lab10\n", - "\n", - "\u001b[33mcommit 90111df9e72309597c67da15bb61de78bff126e7\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sat Mar 9 18:43:47 2024 -0600\n", - "\n", - " lab9\n", - "\n", - "\u001b[33mcommit c401ade096b0dd3e1178f46d067b2fbb98d499f3\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sat Mar 9 18:43:20 2024 -0600\n", - "\n", - " Update 2 files\n", - " \n", - " - /Labs/Lab9/EDGAR.md\n", - " - /Labs/Lab9/README.md\n", - "\n", - "\u001b[33mcommit c99c65b5efdaa91f8fce8fadffc41c4747e0a3a0\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Thu Mar 7 08:36:02 2024 -0600\n", - "\n", - " lec13 updated\n", - "\n", - "\u001b[33mcommit df5877233cc57005e3003c19c0dbf89aafc53804\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Thu Mar 7 07:02:16 2024 -0600\n", - "\n", - " lec12 & 13 updated\n", - "\n", - "\u001b[33mcommit 26470de563bbe3bc65a73718aee1988ba64e8601\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Mar 5 05:22:26 2024 -0600\n", - "\n", - " lec12 and 13: ipynb files added\n", - "\n", - "\u001b[33mcommit 406a4231cd865f81653eb97f7d5f61136360f9e5\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sun Mar 3 17:41:42 2024 -0600\n", - "\n", - " Update file README.md\n", - "\n", - "\u001b[33mcommit 64745e91caeed8149cb33be037b795f88b4ddd39\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Sun Mar 3 05:03:29 2024 -0600\n", - "\n", - " lec12 and lec13 updated\n", - "\n", - "\u001b[33mcommit 42568d125bb87a10c178ec1d4640e467f2d826b0\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Wed Feb 28 22:31:11 2024 -0600\n", - "\n", - " lab8\n", - "\n", - "\u001b[33mcommit e00ff203a601993ec07c23420ff223707228e820\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Wed Feb 28 22:26:10 2024 -0600\n", - "\n", - " lab6\n", - "\n", - "\u001b[33mcommit 408cff34905add795fb9cce796913372b184a60b\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Wed Feb 28 22:25:47 2024 -0600\n", - "\n", - " add lab7\n", - "\n", - "\u001b[33mcommit dd5c119d6d72ee039a3eb10608d2f57f56933d72\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Wed Feb 28 22:22:15 2024 -0600\n", - "\n", - " add lab6\n", - "\n", - "\u001b[33mcommit 27ce693c05fe8beae4bd2b15715454c6c3c3f3f7\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Feb 27 00:24:06 2024 -0600\n", - "\n", - " lec11 updated\n", - "\n", - "\u001b[33mcommit 8fda6507879ad4a220675d7af1dcc4f2dc6eb8cb\u001b[m\n", - "Author: gsingh58 <gurmail-singh@cs.wisc.edu>\n", - "Date: Thu Feb 22 16:57:25 2024 -0600\n", - "\n", - " redundant html files deleted\n", - "\n", - "\u001b[33mcommit 3693ad5b1f9ff5e2ed5af15baac3579715d26c9d\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Thu Feb 22 03:45:27 2024 -0600\n", - "\n", - " lec10 updated\n", - "\n", - "\u001b[33mcommit 6b7ac80ca513020f4ac43898001896aeec597179\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Thu Feb 22 01:48:04 2024 -0600\n", - "\n", - " lec9 updated again\n", - "\n", - "\u001b[33mcommit 3c0d23f6d2568da03b4ad8032e912cf2899af829\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Wed Feb 21 15:07:18 2024 -0600\n", - "\n", - " add screenshot req\n", - "\n", - "\u001b[33mcommit 5ca95af225907454cf9ffc50d6fadd2dae8838a1\u001b[m\n", - "Author: gsingh58 <gurmail-singh@cs.wisc.edu>\n", - "Date: Tue Feb 20 13:36:44 2024 -0600\n", - "\n", - " lec9 solution updated\n", - "\n", - "\u001b[33mcommit eb9f52e5c56b09918c76c25d431fae6e4f0aca59\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Tue Feb 20 09:32:16 2024 -0600\n", - "\n", - " add lab4 req\n", - "\n", - "\u001b[33mcommit 5caf51ade3733e981b801edd007155334dd70a29\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Feb 20 07:37:59 2024 -0600\n", - "\n", - " Lec9 updated HTML removed\n", - "\n", - "\u001b[33mcommit e8915e7e2c1daef12cf968949c7641afa11c0758\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Feb 20 07:33:57 2024 -0600\n", - "\n", - " lec9 updated\n", - "\n", - "\u001b[33mcommit 15b3e1d7c9ea971f161b2fb4df4f253a45588fe8\u001b[m\n", - "Author: gsingh58 <gurmail-singh@cs.wisc.edu>\n", - "Date: Thu Feb 15 13:45:20 2024 -0600\n", - "\n", - " a starter.ipynb file deleted\n", - "\n", - "\u001b[33mcommit 13ab0a06e5fa6ad7918b3564ab92d8e4a1914be1\u001b[m\n", - "Author: gsingh58 <gurmail-singh@cs.wisc.edu>\n", - "Date: Thu Feb 15 13:44:18 2024 -0600\n", - "\n", - " lec8 slides added\n", - "\n", - "\u001b[33mcommit faf0945797857eff185318c4146fdd00b57acd87\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Wed Feb 14 15:37:26 2024 -0600\n", - "\n", - " add lab5\n", - "\n", - "\u001b[33mcommit 19cd94a871641d13e77c58b03bfa50b53645473d\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Tue Feb 13 10:35:16 2024 -0600\n", - "\n", - " add exams\n", - "\n", - "\u001b[33mcommit 960fccecbdc787ecbe01fbe805e2d58255a47e35\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Tue Feb 13 03:49:40 2024 -0600\n", - "\n", - " Add new directory\n", - "\n", - "\u001b[33mcommit aa18ba4a4daa4c07a794b95aa647d33cbc4b21fc\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Tue Feb 13 03:48:21 2024 -0600\n", - "\n", - " Add new directory\n", - "\n", - "\u001b[33mcommit bf9845e5336eb0db7f309b55665c7bc46035bbf4\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Tue Feb 13 03:48:08 2024 -0600\n", - "\n", - " Add new directory\n", - "\n", - "\u001b[33mcommit 64fd3ae4f7677738f7fa88f75434674fb5b673fb\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Tue Feb 13 03:47:08 2024 -0600\n", - "\n", - " Add new directory\n", - "\n", - "\u001b[33mcommit 7daae2c37e007f641988e96e8252281a926089dd\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Feb 13 03:40:01 2024 -0600\n", - "\n", - " worksheet add\n", - "\n", - "\u001b[33mcommit 489b6fba8db75a0b2ca55af0e774d21bdcf53447\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Feb 13 03:23:22 2024 -0600\n", - "\n", - " readings moved\n", - "\n", - "\u001b[33mcommit 84ec381168bf2faefc4013883ecad4041ca97dd7\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Feb 13 02:45:35 2024 -0600\n", - "\n", - " lec7 and 8 updated\n", - "\n", - "\u001b[33mcommit 4eb6f5c315386e467751a7636dbfc3ecce49aa40\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Thu Feb 8 14:19:12 2024 -0600\n", - "\n", - " add lab4\n", - "\n", - "\u001b[33mcommit ffce38e0a69d68e43186e37a74f9c5db7838d57a\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Thu Feb 8 08:14:20 2024 -0600\n", - "\n", - " lec6 updated\n", - "\n", - "\u001b[33mcommit 0436dacdd2a05927ee0cdc6b4f21c6af8bfd0f7f\u001b[m\n", - "Author: gsingh58 <gurmail-singh@cs.wisc.edu>\n", - "Date: Tue Feb 6 16:21:28 2024 -0600\n", - "\n", - " in_class_demo_lec2\n", - "\n", - "\u001b[33mcommit a5926f4b1152a2d1851a0c0d46e10337ec2e3307\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Tue Feb 6 13:20:11 2024 -0600\n", - "\n", - " update lab2\n", - "\n", - "\u001b[33mcommit e46ee7eb2210f5f4dceae060fe876f65bfaf9291\u001b[m\n", - "Author: gsingh58 <gurmail-singh@cs.wisc.edu>\n", - "Date: Tue Feb 6 11:23:25 2024 -0600\n", - "\n", - " wi.zip uploaded\n", - "\n", - "\u001b[33mcommit a58c5160b1df3fde7bb3a61efe0fb488d48a4f1e\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Feb 6 08:16:41 2024 -0600\n", - "\n", - " lec5 notes updated\n", - "\n", - "\u001b[33mcommit aa58287550760afedc5d42c6d6a7418e256fc216\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Feb 6 07:30:46 2024 -0600\n", - "\n", - " lec5 slides updated\n", - "\n", - "\u001b[33mcommit 775fe85700e8ea29573f379b762337214b0672a1\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Mon Feb 5 17:17:27 2024 -0600\n", - "\n", - " update lab2\n", - "\n", - "\u001b[33mcommit 513506b35bef83e8c9bdfd9101f3dd7e492f6630\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sun Feb 4 19:34:38 2024 -0600\n", - "\n", - " add lab3\n", - "\n", - "\u001b[33mcommit a171f55cba1a8ff24be8235638b36394974f5f4e\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sun Feb 4 19:30:18 2024 -0600\n", - "\n", - " Update file README.md\n", - "\n", - "\u001b[33mcommit c205681624d12c40cda836a9aa7bfecdfb9e1b5e\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sun Feb 4 19:29:13 2024 -0600\n", - "\n", - " Update file README.md\n", - "\n", - "\u001b[33mcommit 1961c8e880f7b8bea9fe276fe5cfeaf070c8fda3\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sun Feb 4 19:27:30 2024 -0600\n", - "\n", - " update lab2\n", - "\n", - "\u001b[33mcommit d6f156b6680afce23e971fa02c7550b7ed1c8464\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sun Feb 4 19:27:00 2024 -0600\n", - "\n", - " update lab2\n", - "\n", - "\u001b[33mcommit 9933dd1f67182d29af40d794604474a942ac8d05\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sun Feb 4 19:13:10 2024 -0600\n", - "\n", - " add lab2\n", - "\n", - "\u001b[33mcommit ad8795c898efc2630a667e90db4fb3b198a1f281\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Sun Feb 4 07:52:55 2024 -0600\n", - "\n", - " redundant repos deleted\n", - "\n", - "\u001b[33mcommit 0c4c4e75d8efdcb5aaf5d0ef39e27b7dc7d3baf1\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Thu Feb 1 07:17:47 2024 -0600\n", - "\n", - " lec4 updated\n", - "\n", - "\u001b[33mcommit 78d0d8c28355c33713d8dc3a9cb271c9dc13b9fb\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Thu Feb 1 07:16:55 2024 -0600\n", - "\n", - " few more files added\n", - "\n", - "\u001b[33mcommit e15b0eae22fc127b87d155207ccf93687075429b\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Wed Jan 31 09:35:12 2024 -0600\n", - "\n", - " update lab1\n", - "\n", - "\u001b[33mcommit f6bf4ed07310ec20bdc81b07a7b84e931abc1d52\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Tue Jan 30 10:03:13 2024 -0600\n", - "\n", - " Update lab1.md\n", - "\n", - "\u001b[33mcommit 6ab88b7c35171871a9824fbde72b4886990744fc\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Jan 30 08:18:02 2024 -0600\n", - "\n", - " a typo removed\n", - "\n", - "\u001b[33mcommit eb8dbdafb79b044801352602a23cce59e0cf2f5b\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Jan 30 07:55:27 2024 -0600\n", - "\n", - " slides name changed\n", - "\n", - "\u001b[33mcommit acaeff6953d1faf55deac9ec23605368d8423407\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Jan 30 07:45:22 2024 -0600\n", - "\n", - " html file added\n", - "\n", - "\u001b[33mcommit d8ef60dc6f2e014366c4878c4811019b2704da84\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Jan 30 07:37:33 2024 -0600\n", - "\n", - " lec3 updated\n", - "\n", - "\u001b[33mcommit d7f3849918a34536c532fa2dbad09199dd06ed15\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sat Jan 27 17:26:22 2024 -0600\n", - "\n", - " add lab1\n", - "\n", - "\u001b[33mcommit 845fad19e5e68b7ba37b8d9814d0d7391d28475d\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sat Jan 27 17:24:03 2024 -0600\n", - "\n", - " add lab1\n", - "\n", - "\u001b[33mcommit 3e1e4395110c0bd26b3f8cd638cd2987585e84db\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sat Jan 27 17:23:06 2024 -0600\n", - "\n", - " add lab1\n", - "\n", - "\u001b[33mcommit 34bcc1d6d1451a19515690ee2ccdbfd2b2bfd9c5\u001b[m\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sat Jan 27 17:22:04 2024 -0600\n", - "\n", - " add lab1\n", - "\n", - "\u001b[33mcommit 8199778a4ca97efeac99f1a091f2f2038d3eabd2\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Fri Jan 26 21:33:46 2024 -0600\n", - "\n", - " Add new directory\n", - "\n", - "\u001b[33mcommit 75866c80ee6a563bc31cd2182bdafd7371d01f62\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Fri Jan 26 21:33:36 2024 -0600\n", - "\n", - " Add new directory\n", - "\n", - "\u001b[33mcommit 8cd6029682fbe3253e1b045dc5c2523f99946e7e\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Fri Jan 26 21:33:26 2024 -0600\n", - "\n", - " Add new directory\n", - "\n", - "\u001b[33mcommit 99b66918e05eef2a193fee51f6159bf1268d361b\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Fri Jan 26 21:33:17 2024 -0600\n", - "\n", - " Add new directory\n", - "\n", - "\u001b[33mcommit 63c27755fdb1c938a8c55a9152970cba77f08ff9\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Fri Jan 26 21:27:17 2024 -0600\n", - "\n", - " Add new directory\n", - "\n", - "\u001b[33mcommit 5ab60fd384fd1d3934b864d8d74358491c4c1cbc\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Fri Jan 26 21:26:12 2024 -0600\n", - "\n", - " Add new directory\n", - "\n", - "\u001b[33mcommit 66e8e7233ecfd7e2493c07c5ff8e6006d20bb24c\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Fri Jan 26 21:15:22 2024 -0600\n", - "\n", - " Add new directory\n", - "\n", - "\u001b[33mcommit ad08df7aa2e0bc2396dfe580c76f8b386b0248ff\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Fri Jan 26 20:25:44 2024 -0600\n", - "\n", - " Add new directory\n", - "\n", - "\u001b[33mcommit 86f9157696f95c31e4d914b4e89fb5974f537e87\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Fri Jan 26 20:25:29 2024 -0600\n", - "\n", - " Add new directory\n", - "\n", - "\u001b[33mcommit fe4faf3439ab3b8caab1748762d83891187f54e9\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Fri Jan 26 20:14:28 2024 -0600\n", - "\n", - " Add new directory\n", - "\n", - "\u001b[33mcommit bfb93b54fb43fc18d6dacd22865416f32027c0bb\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Fri Jan 26 20:14:06 2024 -0600\n", - "\n", - " Add new directory\n", - "\n", - "\u001b[33mcommit ed7d967ede5422f9f3e2eeba85a105ffa2d03db0\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Thu Jan 25 08:10:04 2024 -0600\n", - "\n", - " lec2 py file added\n", - "\n", - "\u001b[33mcommit 81f38731851c3b844e0d4855f3ccfbb259579d2f\u001b[m\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Thu Jan 25 07:38:47 2024 -0600\n", - "\n", - " lec2 update\n", - "\n", - "\u001b[33mcommit 92279f04219c9d9fdcf504f10e3cb7f41b9a9c3a\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Tue Jan 23 04:23:37 2024 -0600\n", - "\n", - " Configure SAST in `.gitlab-ci.yml`, creating this file if it does not already exist\n", - "\n", - "\u001b[33mcommit 4721ddd9ae732b4ca058962aa3df2eb1614f45b0\u001b[m\n", - "Author: GURMAIL SINGH <gurmail.singh@wisc.edu>\n", - "Date: Tue Jan 23 04:23:36 2024 -0600\n", - "\n", - " Initial commit\n" - ] - } - ], - "source": [ - "!git log" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "ad6cd0a1", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "commit f5b5fff2409b01c2e12f3a7baae131e9a23d964b\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Wed Mar 13 09:39:09 2024 -0500\n", - "\n", - " regex_2_lec_002 renamed as regex_1_lec_002\n", - "\n", - "commit e017cc00d97679786d97c469733117c016e3ba9b\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Wed Mar 13 09:34:43 2024 -0500\n", - "\n", - " web_4_lec_002 added\n", - "\n", - "commit 7897fec9ecf701e61b2ed2713fabe72e726ffd7c\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Mar 12 05:51:48 2024 -0500\n", - "\n", - " lec14 and lec15 added\n", - "\n", - "commit c13\n" - ] - } - ], - "source": [ - "git_log_output = str(check_output([\"git\", \"log\"]), encoding=\"utf-8\")\n", - "print(git_log_output[:500])" - ] - }, - { - "cell_type": "markdown", - "id": "f550942e", - "metadata": {}, - "source": [ - "#### GOAL: find all the commit numbers" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "33489c0f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['f5b5fff2409b01c2e12f3a7baae131e9a23d964b',\n", - " 'e017cc00d97679786d97c469733117c016e3ba9b',\n", - " '7897fec9ecf701e61b2ed2713fabe72e726ffd7c',\n", - " 'c13cfb21e69230b393ef2051027d029322c12cac',\n", - " '95bfde13c281a6eb28f0cd98f6dc393762732d4c',\n", - " '90111df9e72309597c67da15bb61de78bff126e7',\n", - " 'c401ade096b0dd3e1178f46d067b2fbb98d499f3',\n", - " 'c99c65b5efdaa91f8fce8fadffc41c4747e0a3a0',\n", - " 'df5877233cc57005e3003c19c0dbf89aafc53804',\n", - " '26470de563bbe3bc65a73718aee1988ba64e8601']" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "commits = re.findall(r\"[0-9a-f]{40}\", git_log_output)\n", - "# recent 10 commit numbers\n", - "commits[:10]" - ] - }, - { - "cell_type": "markdown", - "id": "a2d1c384", - "metadata": {}, - "source": [ - "#### What days of the week does the team push things into this repo?" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "d2243734", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "commit f5b5fff2409b01c2e12f3a7baae131e9a23d964b\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Wed Mar 13 09:39:09 2024 -0500\n", - "\n", - " regex_2_lec_002 renamed as regex_1_lec_002\n", - "\n", - "commit e017cc00d97679786d97c469733117c016e3ba9b\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Wed Mar 13 09:34:43 2024 -0500\n", - "\n", - " web_4_lec_002 added\n", - "\n", - "commit 7897fec9ecf701e61b2ed2713fabe72e726ffd7c\n", - "Author: gsingh58 <gurmail-singh@wisc.edu>\n", - "Date: Tue Mar 12 05:51:48 2024 -0500\n", - "\n", - " lec14 and lec15 added\n", - "\n", - "commit c13\n" - ] - } - ], - "source": [ - "print(git_log_output[:500])" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "40198305", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['Wed',\n", - " 'Wed',\n", - " 'Tue',\n", - " 'Mon',\n", - " 'Sat',\n", - " 'Sat',\n", - " 'Sat',\n", - " 'Thu',\n", - " 'Thu',\n", - " 'Tue',\n", - " 'Sun',\n", - " 'Sun',\n", - " 'Wed',\n", - " 'Wed',\n", - " 'Wed',\n", - " 'Wed',\n", - " 'Tue',\n", - " 'Thu',\n", - " 'Thu',\n", - " 'Thu',\n", - " 'Wed',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Thu',\n", - " 'Thu',\n", - " 'Wed',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Thu',\n", - " 'Thu',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Mon',\n", - " 'Sun',\n", - " 'Sun',\n", - " 'Sun',\n", - " 'Sun',\n", - " 'Sun',\n", - " 'Sun',\n", - " 'Sun',\n", - " 'Thu',\n", - " 'Thu',\n", - " 'Wed',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Tue',\n", - " 'Sat',\n", - " 'Sat',\n", - " 'Sat',\n", - " 'Sat',\n", - " 'Fri',\n", - " 'Fri',\n", - " 'Fri',\n", - " 'Fri',\n", - " 'Fri',\n", - " 'Fri',\n", - " 'Fri',\n", - " 'Fri',\n", - " 'Fri',\n", - " 'Fri',\n", - " 'Fri',\n", - " 'Thu',\n", - " 'Thu',\n", - " 'Tue',\n", - " 'Tue']" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "days = re.findall(r\"Date:\\s+(\\w+)\", git_log_output)\n", - "days" - ] - }, - { - "cell_type": "markdown", - "id": "d04f0835", - "metadata": {}, - "source": [ - "#### Count unique days" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "24c3f4a0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Tue 27\n", - "Thu 13\n", - "Fri 11\n", - "Wed 9\n", - "Sun 9\n", - "Sat 7\n", - "Mon 2\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "day_counts = pd.Series(days).value_counts()\n", - "day_counts" - ] - }, - { - "cell_type": "markdown", - "id": "7c2c6899", - "metadata": {}, - "source": [ - "#### Sort by day of the week" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "cca2506c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Mon 2\n", - "Tue 27\n", - "Wed 9\n", - "Thu 13\n", - "Fri 11\n", - "Sun 9\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sorted_day_counts = day_counts.loc[[\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sun\"]]\n", - "sorted_day_counts" - ] - }, - { - "cell_type": "markdown", - "id": "c7f30c6a", - "metadata": {}, - "source": [ - "#### Create a bar plot" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "18eb90d6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Text(0.5, 0, 'Days of the week')" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjIAAAHBCAYAAABzIlFzAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAxQklEQVR4nO3deVhWdeL//9eNKOICiguLkuCS+1ZpmabgjpqaTmM2TW45Zbaoqem0oGaiTaNW42jaqNW0TGZlm3zKBdz1q6ZmGanhRIlLIqAwAsr5/dHP++qORW694dxvfD6u61wXZ7nv+8WJ5MU573OOw7IsSwAAAAbysTsAAADA1aLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACM5Wt3gNKWn5+v48ePq3r16nI4HHbHAQAAJWBZls6dO6ewsDD5+BR93KXcF5njx48rPDzc7hgAAOAqpKSkqH79+kWuL/dFpnr16pJ+3REBAQE2pwEAACWRmZmp8PBw5+/xopT7InP5dFJAQABFBgAAw1xpWAiDfQEAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACM5Wt3AFzfIqZ9ZneEEjk2t7/dEQAAheCIDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxKDIAAMBYFBkAAGAsigwAADAWRQYAABiLIgMAAIxFkQEAAMaiyAAAAGPZWmTi4uLUoUMHVa9eXXXr1tXgwYOVlJTksk1UVJQcDofL9NBDD9mUGAAAeBNbi0xiYqLGjx+vHTt26Msvv1ReXp569+6trKwsl+3Gjh2r1NRU5/TCCy/YlBgAAHgTXzs/PD4+3mV+5cqVqlu3rvbs2aOuXbs6l1epUkUhISFlHQ8AAHg5rxojk5GRIUkKCgpyWf7WW2+pdu3aatWqlaZPn67s7Owi3yMnJ0eZmZkuEwAAKJ9sPSLzW/n5+ZowYYI6d+6sVq1aOZffe++9atCggcLCwnTgwAE9+eSTSkpK0gcffFDo+8TFxWnmzJllFRsAANjIYVmWZXcISRo3bpzWrl2rLVu2qH79+kVut2HDBvXo0UNHjhxRo0aNCqzPyclRTk6Ocz4zM1Ph4eHKyMhQQEBAqWTH1YuY9pndEUrk2Nz+dkcAgOtKZmamAgMDr/j72yuOyDzyyCP69NNPtWnTpmJLjCTdeuutklRkkfHz85Ofn1+p5AQAAN7F1iJjWZYeffRRffjhh0pISFBkZOQVX7Nv3z5JUmhoaCmnAwAA3s7WIjN+/Hi9/fbbWrNmjapXr64TJ05IkgIDA+Xv76+jR4/q7bffVr9+/VSrVi0dOHBAEydOVNeuXdWmTRs7owMAAC9ga5FZvHixpF9vevdbK1as0MiRI1WpUiWtW7dOCxcuVFZWlsLDwzV06FA9/fTTNqQFAADexvZTS8UJDw9XYmJiGaUBAACm8ar7yAAAALiDIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxKDIAAMBYFBkAAGAsigwAADAWRQYAABiLIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxKDIAAMBYFBkAAGAsigwAADAWRQYAABiLIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFi2Fpm4uDh16NBB1atXV926dTV48GAlJSW5bHPhwgWNHz9etWrVUrVq1TR06FCdPHnSpsQAAMCb2FpkEhMTNX78eO3YsUNffvml8vLy1Lt3b2VlZTm3mThxoj755BOtWrVKiYmJOn78uIYMGWJjagAA4C187fzw+Ph4l/mVK1eqbt262rNnj7p27aqMjAz961//0ttvv63u3btLklasWKHmzZtrx44duu222+yIDQAAvIRXjZHJyMiQJAUFBUmS9uzZo7y8PPXs2dO5TbNmzXTDDTdo+/bttmQEAADew9YjMr+Vn5+vCRMmqHPnzmrVqpUk6cSJE6pUqZJq1Kjhsm1wcLBOnDhR6Pvk5OQoJyfHOZ+ZmVlqmQEAgL285ojM+PHjdfDgQb377rvX9D5xcXEKDAx0TuHh4R5KCAAAvI1XFJlHHnlEn376qTZu3Kj69es7l4eEhCg3N1fp6eku2588eVIhISGFvtf06dOVkZHhnFJSUkozOgAAsJGtRcayLD3yyCP68MMPtWHDBkVGRrqsv/nmm1WxYkWtX7/euSwpKUk//vijOnXqVOh7+vn5KSAgwGUCAADlk61jZMaPH6+3335ba9asUfXq1Z3jXgIDA+Xv76/AwECNGTNGkyZNUlBQkAICAvToo4+qU6dOXLEEAADsLTKLFy+WJEVFRbksX7FihUaOHClJWrBggXx8fDR06FDl5OSoT58++uc//1nGSQEAgDeytchYlnXFbSpXrqxFixZp0aJFZZAIAACYxCsG+wIAAFwNigwAADAWRQYAABiLIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwlttFJiUlRT/99JNzfteuXZowYYKWLl3q0WAAAABX4naRuffee7Vx40ZJ0okTJ9SrVy/t2rVLTz31lGbNmuXxgAAAAEVxu8gcPHhQHTt2lCS99957atWqlbZt26a33npLK1eu9HQ+AACAIrldZPLy8uTn5ydJWrdunQYOHChJatasmVJTUz2bDgAAoBhuF5mWLVtqyZIl2rx5s7788kv17dtXknT8+HHVqlXL4wEBAACK4naRmTdvnl599VVFRUVp+PDhatu2rSTp448/dp5yAgAAKAu+7r4gKipKv/zyizIzM1WzZk3n8r/85S+qWrWqR8MBAAAUx+0jMt27d9e5c+dcSowkBQUFadiwYR4LBgAAcCVuF5mEhATl5uYWWH7hwgVt3rzZI6EAAABKosSnlg4cOOD8+ttvv9WJEyec85cuXVJ8fLzq1avn2XQAAADFKHGRadeunRwOhxwOh7p3715gvb+/v1555RWPhgMAAChOiYtMcnKyLMtSw4YNtWvXLtWpU8e5rlKlSqpbt64qVKhQKiEBAAAKU+Ii06BBA0lSfn5+qYUBAABwh9uXX0vS4cOHtXHjRp06dapAsXn22Wc9EgwAAOBK3C4yy5Yt07hx41S7dm2FhITI4XA41zkcDooMAAAoM24XmdmzZ+v555/Xk08+WRp5AAAASszt+8icPXtWd999d2lkAQAAcIvbRebuu+/WF198URpZAAAA3OL2qaXGjRvrmWee0Y4dO9S6dWtVrFjRZf1jjz3msXAAAADFcViWZbnzgsjIyKLfzOHQDz/8cM2hPCkzM1OBgYHKyMhQQECA3XHwOxHTPrM7Qokcm9vf7ggAcF0p6e9vt4/IJCcnX1MwAAAAT3F7jAwAAIC3cPuIzOjRo4tdv3z58qsOAwAA4A63i8zZs2dd5vPy8nTw4EGlp6cX+jBJAACA0uJ2kfnwww8LLMvPz9e4cePUqFEjj4QCAAAoCY+MkfHx8dGkSZO0YMECT7wdAABAiXhssO/Ro0d18eJFT70dAADAFbl9amnSpEku85ZlKTU1VZ999plGjBjhsWAAAABX4naR+eqrr1zmfXx8VKdOHf3973+/4hVNAAAAnuR2kdm4cWNp5AAAAHCb20XmstOnTyspKUmS1LRpU9WpU8djoQAAAErC7cG+WVlZGj16tEJDQ9W1a1d17dpVYWFhGjNmjLKzs0sjIwAAQKHcLjKTJk1SYmKiPvnkE6Wnpys9PV1r1qxRYmKinnjiidLICAAAUCi3Ty2tXr1a77//vqKiopzL+vXrJ39/f/3xj3/U4sWLPZkPAACgSG4fkcnOzlZwcHCB5XXr1uXUEgAAKFNuF5lOnTopNjZWFy5ccC773//+p5kzZ6pTp04eDQcAAFAct08tvfTSS+rTp4/q16+vtm3bSpL279+vypUr6//+7/88HhAAAKAobh+RadWqlQ4fPqy4uDi1a9dO7dq109y5c3X48GG1bNnSrffatGmT7rzzToWFhcnhcOijjz5yWT9y5Eg5HA6XqW/fvu5GBgAA5dRV3UemSpUqGjt27DV/eFZWltq2bavRo0dryJAhhW7Tt29frVixwjnv5+d3zZ8LAADKB7eLTFxcnIKDgws8jmD58uU6ffq0nnzyyRK/V0xMjGJiYordxs/PTyEhIe7GBAAA1wG3Ty29+uqratasWYHlLVu21JIlSzwS6rcSEhJUt25dNW3aVOPGjdOZM2eK3T4nJ0eZmZkuEwAAKJ/cLjInTpxQaGhogeV16tRRamqqR0Jd1rdvX73xxhtav3695s2bp8TERMXExOjSpUtFviYuLk6BgYHOKTw83KOZAACA93D71FJ4eLi2bt2qyMhIl+Vbt25VWFiYx4JJ0j333OP8unXr1mrTpo0aNWqkhIQE9ejRo9DXTJ8+XZMmTXLOZ2ZmUmYAACin3C4yY8eO1YQJE5SXl6fu3btLktavX6+pU6eW+iMKGjZsqNq1a+vIkSNFFhk/Pz8GBAMAcJ1wu8hMmTJFZ86c0cMPP6zc3FxJUuXKlfXkk09q+vTpHg/4Wz/99JPOnDlT6KktAABw/XG7yDgcDs2bN0/PPPOMDh06JH9/fzVp0uSqjoKcP39eR44ccc4nJydr3759CgoKUlBQkGbOnKmhQ4cqJCRER48e1dSpU9W4cWP16dPH7c8CAADlz1XdR0aSqlWrpg4dOlzTh+/evVvR0dHO+ctjW0aMGKHFixfrwIEDev3115Wenq6wsDD17t1bzz33HKeOAACApGsoMp4QFRUly7KKXM8jDwAAQHHcvvwaAADAW1BkAACAsdwuMps2bdLFixcLLL948aI2bdrkkVAAAAAl4XaRiY6OVlpaWoHlGRkZLgN3AQAASpvbRcayLDkcjgLLz5w5o6pVq3okFAAAQEmU+KqlIUOGSPr1PjIjR450uQT60qVLOnDggG6//XbPJwQAAChCiYtMYGCgpF+PyFSvXl3+/v7OdZUqVdJtt92msWPHej4hAABAEUpcZFasWCFJioiI0OTJkzmNBAAAbOf2DfFiY2NLIwcAAIDbSlRkbrrpJq1fv141a9ZU+/btCx3se9nevXs9Fg4AAKA4JSoygwYNcg7uHTx4cGnmAQAAKLESFZnfnk7i1BIAAPAW1/TQyPPnzys/P99lWUBAwDUFAgAAKCm3b4iXnJys/v37q2rVqgoMDFTNmjVVs2ZN1ahRQzVr1iyNjAAAAIVy+4jMfffdJ8uytHz5cgUHBxc78BcAAKA0uV1k9u/frz179qhp06alkQcAAKDE3D611KFDB6WkpJRGFgAAALe4fUTmtdde00MPPaSff/5ZrVq1UsWKFV3Wt2nTxmPhAAAAiuN2kTl9+rSOHj2qUaNGOZc5HA7nU7EvXbrk0YAAAABFcbvIjB49Wu3bt9c777zDYF8A5VLEtM/sjnBFx+b2tzsC4BXcLjL//e9/9fHHH6tx48alkQcAAKDE3B7s2717d+3fv780sgAAALjF7SMyd955pyZOnKivv/5arVu3LjDYd+DAgR4LBwAAUBy3i8xDDz0kSZo1a1aBdQz2BQAAZcntIvP7ZysBAADYxe0xMgAAAN7iqp5+/f/+3//Txo0bderUqQJHaObPn++RYAAAAFfidpGZM2eOnn76aTVt2rTAfWS4pwwAAChLbheZl156ScuXL9fIkSNLIQ4AAEDJuT1GxsfHR507dy6NLAAAAG5xu8hMnDhRixYtKo0sAAAAbnH71NLkyZPVv39/NWrUSC1atChwQ7wPPvjAY+EAAACK43aReeyxx7Rx40ZFR0erVq1aDPAFAAC2cbvIvP7661q9erX69+fJqwAAwF5uj5EJCgpSo0aNSiMLAACAW9wuMjNmzFBsbKyys7NLIw8AAECJuX1q6eWXX9bRo0cVHBysiIiIAoN99+7d67FwAAAAxXG7yAwePLgUYgAAALjP7SITGxtbGjkAAADcdlUPjZSkPXv26NChQ5Kkli1bqn379h4LBQAAUBJuF5lTp07pnnvuUUJCgmrUqCFJSk9PV3R0tN59913VqVPH0xkBAAAK5fZVS48++qjOnTunb775RmlpaUpLS9PBgweVmZmpxx57rDQyAgAAFMrtIzLx8fFat26dmjdv7lzWokULLVq0SL179/ZoOAAAgOK4fUQmPz+/wCXXklSxYkXl5+d7JBQAAEBJuF1kunfvrscff1zHjx93Lvv55581ceJE9ejRw6PhAAAAiuP2qaV//OMfGjhwoCIiIhQeHi5JSklJUatWrfTvf//b4wEBAGaKmPaZ3RFK5Nhcnh1oMreLTHh4uPbu3at169bpu+++kyQ1b95cPXv29Hg4AACA4lzVfWQcDod69eqlXr16eToPAABAiZV4jMyGDRvUokULZWZmFliXkZGhli1bavPmzR4NBwAAUJwSF5mFCxdq7NixCggIKLAuMDBQDz74oObPn+/RcAAAAMUpcZHZv3+/+vbtW+T63r17a8+ePR4JBQAAUBIlLjInT54s9P4xl/n6+ur06dMeCQUAAFASJS4y9erV08GDB4tcf+DAAYWGhnokFAAAQEmUuMj069dPzzzzjC5cuFBg3f/+9z/FxsZqwIABHg0HAABQnBIXmaefflppaWm68cYb9cILL2jNmjVas2aN5s2bp6ZNmyotLU1PPfWUWx++adMm3XnnnQoLC5PD4dBHH33kst6yLD377LMKDQ2Vv7+/evbsqcOHD7v1GQAAoPwqcZEJDg7Wtm3b1KpVK02fPl133XWX7rrrLv31r39Vq1attGXLFgUHB7v14VlZWWrbtq0WLVpU6PoXXnhBL7/8spYsWaKdO3eqatWq6tOnT6FHhQAAwPXHrRviNWjQQJ9//rnOnj2rI0eOyLIsNWnSRDVr1ryqD4+JiVFMTEyh6yzL0sKFC/X0009r0KBBkqQ33nhDwcHB+uijj3TPPfdc1WcCAIDy46ru7FuzZk116NDB01lcJCcn68SJEy6PPggMDNStt96q7du3F1lkcnJylJOT45wv7AZ+AACgfHD76ddl5cSJE5JU4HRVcHCwc11h4uLiFBgY6JwuP9gSAACUP15bZK7W9OnTlZGR4ZxSUlLsjgQAAEqJ1xaZkJAQSb/eiO+3Tp486VxXGD8/PwUEBLhMAACgfPLaIhMZGamQkBCtX7/euSwzM1M7d+5Up06dbEwGAAC8xVUN9vWU8+fP68iRI8755ORk7du3T0FBQbrhhhs0YcIEzZ49W02aNFFkZKSeeeYZhYWFafDgwfaFBgAAXsPWIrN7925FR0c75ydNmiRJGjFihFauXKmpU6cqKytLf/nLX5Senq4uXbooPj5elStXtisyAADwIrYWmaioKFmWVeR6h8OhWbNmadasWWWYCgAAmMJrx8gAAABcCUUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxbL38GoDnREz7zO4IV3Rsbn+7IwBG4v/vonFEBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxKDIAAMBYFBkAAGAsigwAADAWRQYAABiLIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxKDIAAMBYFBkAAGAsigwAADAWRQYAABiLIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyvLjIzZsyQw+FwmZo1a2Z3LAAA4CV87Q5wJS1bttS6deuc876+Xh8ZAACUEa9vBb6+vgoJCbE7BgAA8EJefWpJkg4fPqywsDA1bNhQf/rTn/Tjjz8Wu31OTo4yMzNdJgAAUD55dZG59dZbtXLlSsXHx2vx4sVKTk7WHXfcoXPnzhX5mri4OAUGBjqn8PDwMkwMAADKklcXmZiYGN19991q06aN+vTpo88//1zp6el67733inzN9OnTlZGR4ZxSUlLKMDEAAChLXj9G5rdq1KihG2+8UUeOHClyGz8/P/n5+ZVhKgAAYBevPiLze+fPn9fRo0cVGhpqdxQAAOAFvLrITJ48WYmJiTp27Ji2bdumu+66SxUqVNDw4cPtjgYAALyAV59a+umnnzR8+HCdOXNGderUUZcuXbRjxw7VqVPH7mgAAMALeHWReffdd+2OAAAAvJhXn1oCAAAoDkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxKDIAAMBYFBkAAGAsigwAADAWRQYAABiLIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxqLIAAAAY1FkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxKDIAAMBYFBkAAGAsX7sDmCpi2md2R7iiY3P72x0BAIBSxREZAABgLIoMAAAwFkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICxKDIAAMBYFBkAAGAsigwAADAWRQYAABiLIgMAAIxFkQEAAMaiyAAAAGNRZAAAgLEoMgAAwFgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjEWRAQAAxjKiyCxatEgRERGqXLmybr31Vu3atcvuSAAAwAt4fZH5z3/+o0mTJik2NlZ79+5V27Zt1adPH506dcruaAAAwGZeX2Tmz5+vsWPHatSoUWrRooWWLFmiKlWqaPny5XZHAwAANvO1O0BxcnNztWfPHk2fPt25zMfHRz179tT27dsLfU1OTo5ycnKc8xkZGZKkzMxMj2bLz8n26PuVBk9/z6XBhP0osS89xYT9KLEvPcWE/SixLz3F0/vx8vtZllX8hpYX+/nnny1J1rZt21yWT5kyxerYsWOhr4mNjbUkMTExMTExMZWDKSUlpdiu4NVHZK7G9OnTNWnSJOd8fn6+0tLSVKtWLTkcDhuTFS0zM1Ph4eFKSUlRQECA3XGMxr70HPalZ7AfPYd96Tkm7EvLsnTu3DmFhYUVu51XF5natWurQoUKOnnypMvykydPKiQkpNDX+Pn5yc/Pz2VZjRo1SiuiRwUEBHjtD5Rp2Jeew770DPaj57AvPcfb92VgYOAVt/Hqwb6VKlXSzTffrPXr1zuX5efna/369erUqZONyQAAgDfw6iMykjRp0iSNGDFCt9xyizp27KiFCxcqKytLo0aNsjsaAACwmdcXmWHDhun06dN69tlndeLECbVr107x8fEKDg62O5rH+Pn5KTY2tsApMbiPfek57EvPYD96DvvSc8rTvnRY1pWuawIAAPBOXj1GBgAAoDgUGQAAYCyKDAAAMBZFBgAAGIsiAwAAjOX1l18DV/Lmm29qyZIlSk5O1vbt29WgQQMtXLhQkZGRGjRokN3xcJ2Jjo4u9nEoGzZsKMM0QPlHkYHRFi9erGeffVYTJkzQ888/r0uXLkn69bEUCxcupMhcgTtPq/Xm25h7k3bt2rnM5+Xlad++fTp48KBGjBhhTyjg/5ebm6tTp04pPz/fZfkNN9xgU6Jrx31kvEB5/MEqKy1atNCcOXM0ePBgVa9eXfv371fDhg118OBBRUVF6ZdffrE7olfz8fEp8cNUL5dEXJ0ZM2bo/PnzevHFF+2O4vU+/vhjxcTEqGLFivr444+L3XbgwIFllMpshw8f1ujRo7Vt2zaX5ZZlyeFwGP3/N0dkbFSef7DKSnJystq3b19guZ+fn7KysmxIZJaNGzc6vz527JimTZumkSNHOp9ltn37dr3++uuKi4uzK2K5cd9996ljx44UmRIYPHiwTpw4obp162rw4MFFbse/kyU3cuRI+fr66tNPP1VoaGiJ/4AxAUXGRuX5B6usREZGat++fWrQoIHL8vj4eDVv3tymVObo1q2b8+tZs2Zp/vz5Gj58uHPZwIED1bp1ay1dupTTItdo+/btqly5st0xjPDbo9O/P1KNq7Nv3z7t2bNHzZo1szuKx1FkbFSef7DKyqRJkzR+/HhduHBBlmVp165deueddxQXF6fXXnvN7nhG2b59u5YsWVJg+S233KIHHnjAhkRmGjJkiMu8ZVlKTU3V7t279cwzz9iUykx5eXnq27evlixZoiZNmtgdx2gtWrQot6faKTI2Ks8/WGXlgQcekL+/v55++mllZ2fr3nvvVVhYmF566SXdc889dsczSnh4uJYtW6YXXnjBZflrr72m8PBwm1KZJzAw0GXex8dHTZs21axZs9S7d2+bUpmpYsWKOnDggN0xyoV58+Zp6tSpmjNnjlq3bq2KFSu6rDd5MD+DfW20YcMGPf300+XyB8sO2dnZOn/+vOrWrWt3FCN9/vnnGjp0qBo3bqxbb71VkrRr1y4dPnxYq1evVr9+/WxOiOvRxIkT5efnp7lz59odxWg+Pr/eNu73QxjKw5hMioyNyvMPFsyUkpKixYsX67vvvpMkNW/eXA899BBHZK4CVyN6xqOPPqo33nhDTZo00c0336yqVau6rJ8/f75NycySmJhY7PrfjpczDUXGRuX5B6usREZGFjtI+ocffijDNID0/fffa8yYMVyNeI1++OEHRUREqEePHkVu43A4uMEgGCNjJ4rKtZswYYLLfF5enr766ivFx8drypQp9oQy2ObNm/Xqq6/qhx9+0KpVq1SvXj29+eabioyMVJcuXeyOZ4RRo0ZxNaIHNGnSRKmpqc5bBAwbNkwvv/yygoODbU5mpk2bNhW7vmvXrmWUxPMoMjZLT0/Xv/71Lx06dEiS1LJlS40ePbrAgEEU7vHHHy90+aJFi7R79+4yTmO21atX689//rP+9Kc/ae/evcrJyZEkZWRkaM6cOfr8889tTmgGrkb0jN+fLFi7di33hroGUVFRBZb9tmSbfKSQh0baaPfu3WrUqJEWLFigtLQ0paWlaf78+WrUqJH27t1rdzyjxcTEaPXq1XbHMMrs2bO1ZMkSLVu2zGXgeefOnfl5dANXI5YORkFcm7Nnz7pMp06dUnx8vDp06KAvvvjC7njXhCMyNpo4caIGDhyoZcuWydf31/8UFy9e1AMPPKAJEyZc8VAgivb+++8rKCjI7hhGSUpKKvTwcmBgoNLT08s+kEF++8yq8nyZa1lyOBwFTstxmu7qFXaUv1evXqpUqZImTZqkPXv22JDKMygyNtq9e7dLiZEkX19fTZ06VbfccouNybzfrFmz9MQTT6hLly4u/7hZlqUTJ07o9OnT+uc//2ljQvOEhIToyJEjioiIcFm+ZcsWNWzY0J5QhqhRo0aBn8PfD1JlsK97LMvSyJEj5efnJ0m6cOGCHnrooQJXLX3wwQd2xCs3goODlZSUZHeMa0KRsVFAQIB+/PHHAufSU1JSVL16dZtSmWHmzJl66KGHNGjQIJdfID4+PqpTp46ioqIYo+CmsWPH6vHHH9fy5cvlcDh0/Phxbd++XZMnT+aOtFfw22dWwTN+/0iM++67z6Yk5cPvbyx4+Y7Tc+fOLfDEdtNw+bWNHnvsMX344Yd68cUXdfvtt0uStm7dqilTpmjo0KFauHChvQG9mI+Pj/Ohcrg2ycnJioyMlGVZmjNnjuLi4pSdnS3p14dvTp48Wc8995zNKb3frFmzNHnyZFWpUsXuKEABl590//tf+bfddpuWL19u9B9+FBkb5ebmasqUKVqyZIkuXrwoy7JUqVIljRs3TnPnznUeUkVBPj4+OnnypOrUqWN3FOP5+PioQYMGio6OVnR0tKKionTu3DmdP39eLVq0ULVq1eyOaIQKFSooNTWVcg2v9N///tdl/vLR6/LwIFOKjBfIzs7W0aNHJUmNGjXiL7oS8PHxUWBg4BUH/6WlpZVRInMlJCQ4p507dyo3N1cNGzZU9+7d1b17d0VFRXHvjhLgKCG80fbt23XmzBkNGDDAueyNN95QbGyssrKyNHjwYL3yyitG/+FMkbHB6NGjS7Td8uXLSzmJuXx8fLRw4cIr3m/n9+fZUbwLFy5o27ZtzmKza9cu5eXlqVmzZvrmm2/sjufVOEoIbxQTE6OoqCg9+eSTkqSvv/5aN910k0aOHKnmzZvrb3/7mx588EHNmDHD3qDXgCJjg8uH8tu3b1/svRE+/PDDMkxlFv76LV25ubnaunWr1q5dq1dffVXnz5/napsr4CghvFFoaKg++eQT55WwTz31lBITE7VlyxZJ0qpVqxQbG6tvv/3WzpjXhKuWbDBu3Di98847Sk5O1qhRo3TfffdxzxM3cT8Jz8rNzdWOHTu0ceNG5ymm8PBwde3aVf/4xz94nEYJzZw5k7tyw6ucPXvW5dRwYmKiYmJinPMdOnRQSkqKHdE8hiMyNsnJydEHH3yg5cuXa9u2berfv7/GjBmj3r1780u6BDgi4zndu3fXzp07FRkZqW7duumOO+5Qt27dFBoaanc0o/AzCW/UoEEDvfnmm+ratatyc3NVo0YNffLJJ877HH399dfq1q2b0UcKeUSBTfz8/DR8+HB9+eWX+vbbb9WyZUs9/PDDioiI0Pnz5+2O5/Xy8/P5heEhmzdvVq1atdS9e3f16NFDvXr1osRcBf4AgTfq16+fpk2bps2bN2v69OmqUqWK7rjjDuf6AwcOqFGjRjYmvHYUGS/w2+v7GYeAspaenq6lS5eqSpUqmjdvnsLCwtS6dWs98sgjev/993X69Gm7IxqBg9vwRs8995x8fX3VrVs3LVu2TMuWLVOlSpWc65cvX67evXvbmPDacWrJJr89tbRlyxYNGDBAo0aNUt++feXjQ7+Efc6dO6ctW7Y4x8vs379fTZo00cGDB+2OBuAqZWRkqFq1aqpQoYLL8rS0NFWrVs2l3JiGwb42ePjhh/Xuu+8qPDxco0eP1jvvvKPatWvbHQuQJFWtWlVBQUEKCgpSzZo15evrq0OHDtkdC8A1KGoQenm40IQjMjbw8fHRDTfcoPbt2xd7Xp2HoaEs5Ofna/fu3UpISNDGjRu1detWZWVlqV69es67/UZHR6tBgwZ2RwWAAjgiY4P777+fgYHwGjVq1FBWVpZCQkIUHR2tBQsWKCoqyvgBgACuDxyRAa5zr776qqKjo3XjjTfaHQUA3EaRAQAAxuLyGAAAYCyKDAAAMBZFBgAAGIsiA8DrfPfdd7rttttUuXJltWvXrsSvS0hIkMPhUHp6eqllKwvl5fsAygJFBiiHRo4cKYfDIYfDoYoVKyo4OFi9evXS8uXLlZ+fb3e8K4qNjVXVqlWVlJSk9evXF7pNVFSUJkyYULbBAHgdigxQTvXt21epqak6duyY1q5dq+joaD3++OMaMGCALl68aHe8Yh09elRdunRRgwYNVKtWLbvjAPBiFBmgnPLz81NISIjq1aunm266SX/961+1Zs0arV27VitXrnRuN3/+fLVu3VpVq1ZVeHi4Hn74YecT2LOyshQQEKD333/f5b0/+ugjVa1aVefOnVNubq4eeeQRhYaGqnLlymrQoIHi4uKKzJWfn69Zs2apfv368vPzU7t27RQfH+9c73A4tGfPHs2aNUsOh0MzZswo8B4jR45UYmKiXnrpJeeRp2PHjjnX79mzR7fccouqVKmi22+/XUlJSS6vX7NmjW666SZVrlxZDRs21MyZM4ssdwcPHpSPj4/z4ZlpaWny8fHRPffc49xm9uzZ6tKli8trYmJiVK1aNQUHB+vPf/6zfvnlF5d9EBcXp8jISPn7+6tt27YF9vFvZWdnKyYmRp07d+Z0E/B7FoByZ8SIEdagQYMKXde2bVsrJibGOb9gwQJrw4YNVnJysrV+/XqradOm1rhx45zrx44da/Xr18/lPQYOHGjdf//9lmVZ1t/+9jcrPDzc2rRpk3Xs2DFr8+bN1ttvv11ktvnz51sBAQHWO++8Y3333XfW1KlTrYoVK1rff/+9ZVmWlZqaarVs2dJ64oknrNTUVOvcuXMF3iM9Pd3q1KmTNXbsWCs1NdVKTU21Ll68aG3cuNGSZN16661WQkKC9c0331h33HGHdfvttztfu2nTJisgIMBauXKldfToUeuLL76wIiIirBkzZhSaNz8/36pdu7a1atUqy7Is66OPPrJq165thYSEOLfp2bOn9dRTT1mWZVlnz5616tSpY02fPt06dOiQtXfvXqtXr15WdHS0c/vZs2dbzZo1s+Lj462jR49aK1assPz8/KyEhATLsizn93H27Fnr7Nmz1u2332717t3bysrKKnK/AtcrigxQDhVXZIYNG2Y1b968yNeuWrXKqlWrlnN+586dVoUKFazjx49blmVZJ0+etHx9fZ2/dB999FGre/fuVn5+fomyhYWFWc8//7zLsg4dOlgPP/ywc75t27ZWbGxsse/TrVs36/HHH3dZdrkArFu3zrnss88+syRZ//vf/yzLsqwePXpYc+bMcXndm2++aYWGhhb5WUOGDLHGjx9vWZZlTZgwwZoyZYpVs2ZN69ChQ1Zubq5VpUoV64svvrAsy7Kee+45q3fv3i6vT0lJsSRZSUlJ1oULF6wqVapY27Ztc9lmzJgx1vDhw12+j0OHDllt2rSxhg4dauXk5BS7P4DrFc9aAq4zlmW5POtr3bp1iouL03fffafMzExdvHhRFy5cUHZ2tqpUqaKOHTuqZcuWev311zVt2jT9+9//VoMGDdS1a1dJv57m6dWrl5o2baq+fftqwIAB6t27d6GfnZmZqePHj6tz584uyzt37qz9+/d77Hts06aN8+vQ0FBJ0qlTp3TDDTdo//792rp1q55//nnnNpcuXXL5nn+vW7duWrp0qSQpMTFRc+bM0ffff6+EhASlpaUpLy/P+T3t379fGzduVLVq1Qq8z9GjR5WXl6fs7Gz16tXLZV1ubq7at2/vsqxXr17q2LGj/vOf/6hChQpXuTeA8o0iA1xnDh06pMjISEnSsWPHNGDAAI0bN07PP/+8goKCtGXLFo0ZM0a5ubnOX+oPPPCAFi1apGnTpmnFihUaNWqUswzddNNNSk5O1tq1a7Vu3Tr98Y9/VM+ePYsd81HaKlas6Pz6cs7LV2udP39eM2fO1JAhQwq8rnLlyoW+3+UrpA4fPqxvv/1WXbp00XfffaeEhASdPXvWOR7n8vvfeeedmjdvXoH3CQ0N1cGDByVJn332merVq+ey3s/Pz2W+f//+Wr16tb799lu1bt26pN8+cF2hyADXkQ0bNujrr7/WxIkTJf06KDY/P19///vf5ePz69j/9957r8Dr7rvvPk2dOlUvv/yyvv32W40YMcJlfUBAgIYNG6Zhw4bpD3/4g/r27au0tDQFBQUV2C4sLExbt25Vt27dnMu3bt2qjh07uvW9VKpUSZcuXXLrNdKvxSspKUmNGzcu8Wtat26tmjVravbs2WrXrp2qVaumqKgozZs3T2fPnlVUVJTL+69evVoRERHy9S34T2yLFi3k5+enH3/80WUfFGbu3LmqVq2aevTooYSEBLVo0aLEmYHrBUUGKKdycnJ04sQJXbp0SSdPnlR8fLzi4uI0YMAA3X///ZKkxo0bKy8vT6+88oruvPNObd26VUuWLCnwXjVr1tSQIUM0ZcoU9e7dW/Xr13eumz9/vkJDQ9W+fXv5+Pho1apVCgkJUY0aNQrNNWXKFMXGxqpRo0Zq166dVqxYoX379umtt95y6/uLiIjQzp07dezYMVWrVq1AaSrKs88+qwEDBuiGG27QH/7wB/n4+Gj//v06ePCgZs+eXehrHA6HunbtqrfeekuTJ0+W9Ovpq5ycHK1fv16TJk1ybjt+/HgtW7ZMw4cP19SpUxUUFKQjR47o3Xff1Wuvvabq1atr8uTJmjhxovLz89WlSxdlZGRo69atCggIKFASX3zxRV26dEndu3dXQkKCmjVr5tZ+Aso9uwfpAPC8ESNGWJIsSZavr69Vp04dq2fPntby5cutS5cuuWw7f/58KzQ01PL397f69OljvfHGG84rZn5r/fr1liTrvffec1m+dOlSq127dlbVqlWtgIAAq0ePHtbevXuLzHbp0iVrxowZVr169ayKFStabdu2tdauXeuyTUkG+yYlJVm33Xab5e/vb0mykpOTXa72ueyrr75yrr8sPj7euv322y1/f38rICDA6tixo7V06dJiP2/BggWWJJesgwYNsnx9fQtcWfX9999bd911l1WjRg3L39/fatasmTVhwgTngOj8/Hxr4cKFVtOmTa2KFStaderUsfr06WMlJiZalmUV+n08+uijVmhoqJWUlFRsTuB647Asy7KtRQEwxptvvqmJEyfq+PHjqlSpkt1xAEASp5YAXEF2drZSU1M1d+5cPfjgg5QYAF6FO/sCKNYLL7ygZs2aKSQkRNOnT7c7DgC44NQSAAAwFkdkAACAsSgyAADAWBQZAABgLIoMAAAwFkUGAAAYiyIDAACMRZEBAADGosgAAABjUWQAAICx/j/VYW3ZuJQ1KwAAAABJRU5ErkJggg==", - "text/plain": [ - "<Figure size 640x480 with 1 Axes>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ax = sorted_day_counts.plot.bar()\n", - "ax.set_ylabel(\"Commit counts\")\n", - "ax.set_xlabel(\"Days of the week\")" - ] - }, - { - "cell_type": "markdown", - "id": "42758038", - "metadata": {}, - "source": [ - "#### Find all commit authors names." - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "a87693e9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'gsingh58'" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "authors = re.findall(r\"Author:\\s+(.+?)\\s*<\", git_log_output)\n", - "authors[0]" - ] - }, - { - "cell_type": "markdown", - "id": "cbc516d7", - "metadata": {}, - "source": [ - "#### `git log` from projects repo" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "e2f85aa2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "commit 129a4745b416e3f0be08795dca69d02d528fe893\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Mon Mar 11 10:37:16 2024 -0500\n", - "\n", - " Update file README.md\n", - "\n", - "commit 413d84dceb0f48e111b25d9f7765513181feb6d6\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sat Mar 9 18:46:18 2024 -0600\n", - "\n", - " Update 2 files\n", - " \n", - " - /Labs/Lab10/README\n", - " - /Labs/Lab10/README.md\n", - "\n", - "commit bd2acf092cfeacdc994dac733300ab61a3373b26\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sat Mar 9 18:45:42 2024 -0600\n", - "\n", - " lab10\n", - "\n", - "commit f84c2a89a44d374da385bb499738ec82b12b7965\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sat Mar 9 18:42:28 2024 -0600\n", - "\n", - " Update file EDGAR.md\n", - "\n", - "commit 11b505faae9964182b99288210f54bae5ce3e211\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sat Mar 9 18:41:35 2024 -0600\n", - "\n", - " Update file README.md\n", - "\n", - "commit 5f35a23cf70d24e627fef5fd89c0711cb144dbc4\n", - "Author: JINLANG WANG <jwang2775@wisc.edu>\n", - "Date: Sat Mar 9 18:41:06 2024 -0600\n", - "\n", - " lab9\n", - "\n", - "commit d481d4de35443a07812af9216d6883300207ae6\n" - ] - } - ], - "source": [ - "git_log_output = str(check_output([\"git\", \"log\"], cwd=\"../../projects-and-labs\"), encoding=\"utf-8\")\n", - "print(git_log_output[:1000])" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "59acc090", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[' P4 pipeline update',\n", - " ' P4 released',\n", - " ' fixing pipeline and adding backup mp3 tester file to ease confusion',\n", - " ' mp3 install help',\n", - " ' pipeline update for MP3',\n", - " ' p3 pipeline changes',\n", - " ' P4 pipeline setup',\n", - " ' updating MP3 tester.py and MP3 pipeline',\n", - " ' P3 released',\n", - " ' P3 released',\n", - " ' P2 key updated',\n", - " ' MP2 Update/Fix to the tester',\n", - " ' MP2 key fix + readme update',\n", - " ' mp1 readme updated',\n", - " ' P2 Release',\n", - " ' gitlab tutorial + mp1 release',\n", - " ' initial commit (P1)']" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "re.findall(r\".*[pP][1-6].*\", git_log_output)" - ] - }, - { - "cell_type": "markdown", - "id": "29085f57", - "metadata": {}, - "source": [ - "### Emails example" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "25b005af", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Gurmail [Instructor] - gsingh58(AT) cs.wisc.edu\n", - "Jinlang [Head TA] - jwang2775 (AT) wisc.edu\n", - "Elliot [TA] - eepickens (AT) cs.wisc.edu\n", - "Alex [TA] - aclinton (AT) wisc.edu\n", - "Bowman [TA] - bnbrown3 (AT) wisc.edu\n", - "Hafeez [TA] - aneesali (AT) wisc.edu\n", - "William [TA] - wycong (AT) wisc.edu\n", - "Someone [PM] - someone@wisc.edu\n", - "\n" - ] - } - ], - "source": [ - "s = \"\"\"\n", - "Gurmail [Instructor] - gsingh58(AT) cs.wisc.edu\n", - "Jinlang [Head TA] - jwang2775 (AT) wisc.edu\n", - "Elliot [TA] - eepickens (AT) cs.wisc.edu\n", - "Alex [TA] - aclinton (AT) wisc.edu\n", - "Bowman [TA] - bnbrown3 (AT) wisc.edu\n", - "Hafeez [TA] - aneesali (AT) wisc.edu\n", - "William [TA] - wycong (AT) wisc.edu\n", - "Someone [PM] - someone@wisc.edu\n", - "\"\"\"\n", - "print(s)" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "ea45c263", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('gsingh58(AT) cs.wisc.edu',\n", - " 'gsingh58',\n", - " '(AT)',\n", - " '(AT)',\n", - " 'cs.wisc.edu',\n", - " 'wisc.',\n", - " 'edu'),\n", - " ('jwang2775 (AT) wisc.edu',\n", - " 'jwang2775',\n", - " '(AT)',\n", - " '(AT)',\n", - " 'wisc.edu',\n", - " '',\n", - " 'edu'),\n", - " ('eepickens (AT) cs.wisc.edu',\n", - " 'eepickens',\n", - " '(AT)',\n", - " '(AT)',\n", - " 'cs.wisc.edu',\n", - " 'wisc.',\n", - " 'edu'),\n", - " ('aclinton (AT) wisc.edu', 'aclinton', '(AT)', '(AT)', 'wisc.edu', '', 'edu'),\n", - " ('bnbrown3 (AT) wisc.edu', 'bnbrown3', '(AT)', '(AT)', 'wisc.edu', '', 'edu'),\n", - " ('aneesali (AT) wisc.edu', 'aneesali', '(AT)', '(AT)', 'wisc.edu', '', 'edu'),\n", - " ('wycong (AT) wisc.edu', 'wycong', '(AT)', '(AT)', 'wisc.edu', '', 'edu'),\n", - " ('someone@wisc.edu', 'someone', '@', '', 'wisc.edu', '', 'edu')]" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "name = r\"\\w+\"\n", - "at = r\"@|([\\(\\[]?[Aa][Tt][\\)\\]]?)\"\n", - "domain = r\"\\w+\\.(\\w+\\.)?(edu|com|org|net|io|gov)\"\n", - "\n", - "full_regex = f\"(({name})\\s*({at})\\s*({domain}))\"\n", - "\n", - "re.findall(full_regex, s)" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "7d04d86f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "REGEX: ((\\w+)\\s*(@|([\\(\\[]?[Aa][Tt][\\)\\]]?))\\s*(\\w+\\.(\\w+\\.)?(edu|com|org|net|io|gov)))\n", - "gsingh58@cs.wisc.edu\n", - "jwang2775@wisc.edu\n", - "eepickens@cs.wisc.edu\n", - "aclinton@wisc.edu\n", - "bnbrown3@wisc.edu\n", - "aneesali@wisc.edu\n", - "wycong@wisc.edu\n", - "someone@wisc.edu\n" - ] - } - ], - "source": [ - "print(\"REGEX:\", full_regex)\n", - "for match in re.findall(full_regex, s):\n", - " print(match[1] + \"@\" + match[4])" - ] - }, - { - "cell_type": "markdown", - "id": "5aa7f04e", - "metadata": {}, - "source": [ - "### Self-practice\n", - "\n", - "Q1: Which regex will NOT match \"123\"\n", - "1. r\"\\d\\d\\d\"\n", - "2. r\"\\d{3}\"\n", - "3. r\"\\D\\D\\D\"\n", - "4. r\"...\"\n", - "\n", - "Q2: What will r\"^A\" match?\n", - "1. \"A\"\n", - "2. \"^A\"\n", - "3. \"BA\"\n", - "4. \"B\"\n", - "5. \"BB\"\n", - "\n", - "Q3: Which one can match \"HH\"?\n", - "1. r\"HA+H\"\n", - "2. r\"HA+?H\"\n", - "3. r\"H(A+)?H\"\n", - "\n", - "Q4: Which string(s) will match r\"^(ha)*$\"\n", - "1. \"\"\n", - "2. \"hahah\"\n", - "3. \"that\"\n", - "4. \"HAHA\"\n", - "\n", - "Q5: What is the type of the following?re.findall(r\"(\\d) (\\w+)\", some_str)[0]\n", - "1. list\n", - "2. tuple\n", - "3. string\n", - "\n", - "Q6: What will it do?\n", - "```python\n", - "re.sub(r\"(\\d{3})-(\\d{3}-\\d{4})\",\n", - " r\"(\\g<1>) \\g<2>\",\n", - " \"608-123-4567\")\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "3ff199e4", - "metadata": {}, - "source": [ - "The answers of these questions can be found in self_practice.ipynb. You may want to try to answer these questions yourself and then verify your answers." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/lecture_material/15-regex_2/regex_2_lec_001.ipynb b/lecture_material/15-regex_2/regex_2_lec_001.ipynb deleted file mode 100644 index db06eb7fa32b923d38c746a1408a37396c9ee014..0000000000000000000000000000000000000000 --- a/lecture_material/15-regex_2/regex_2_lec_001.ipynb +++ /dev/null @@ -1,835 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "e60c1c48", - "metadata": {}, - "source": [ - "# Regex 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0dba68b0", - "metadata": {}, - "outputs": [], - "source": [ - "#import statements\n", - "import re\n", - "from subprocess import check_output\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b97c8008-8e39-4a9f-89a1-0d1ddbb1ac01", - "metadata": {}, - "outputs": [], - "source": [ - "# Example strings\n", - "# from DS100 book...\n", - "def reg(regex, text):\n", - " \"\"\"\n", - " Prints the string with the regex match highlighted.\n", - " \"\"\"\n", - " print(re.sub(f'({regex})', r'\\033[1;30;43m\\1\\033[m', text))\n", - "s1 = \" \".join([\"A DAG is a directed graph without cycles.\",\n", - " \"A tree is a DAG where every node has one parent (except the root, which has none).\",\n", - " \"To learn more, visit www.example.com or call 1-608-123-4567. :) ¯\\_(ツ)_/¯\"])\n", - "print(s1)\n", - "\n", - "s2 = \"\"\"1-608-123-4567\n", - "a-bcd-efg-hijg (not a phone number)\n", - "1-608-123-456 (not a phone number)\n", - "608-123-4567\n", - "123-4567\n", - "1-123-4567 (not a phone number)\n", - "\"\"\"\n", - "print(s2)\n", - "\n", - "s3 = \"In CS 320, there are 11 quizzes, 6 projects, 28 lectures, and 1000 things to learn. CS 320 is awesome!\"\n", - "print(s3)\n", - "\n", - "s4 = \"\"\"In CS 320, there are 11 quizzes, 6 projects,\n", - "28 lectures, and 1000 things to learn. CS 320 is awesome!\"\"\"\n", - "print(s4)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "924069c5-82be-4423-b659-2beee8e226be", - "metadata": {}, - "outputs": [], - "source": [ - "print(s1)" - ] - }, - { - "cell_type": "markdown", - "id": "6a2eff34", - "metadata": {}, - "source": [ - "### Regex is case sensitive\n", - "\n", - "### Character classes\n", - "\n", - "- Character classes can be mentioned within `[...]`\n", - "- `^` means `NOT` of a character class\n", - "- `-` enables us to mention range of characters, for example `[A-Z]`\n", - "- `|` enables us to perform `OR`\n", - "\n", - "### Metacharacters\n", - "\n", - "- predefined character classes\n", - " - `\\d` => digits\n", - " - `\\s` => whitespace (space, tab, newline)\n", - " - `\\w` => \"word\" characters (digits, letters, underscores, etc) --- helpful for variable name matches and whole word matches (as it doesn't match whitespace --- `\\s`)\n", - " - `.` => wildcard: anything except newline\n", - "- capitalized version of character classes mean `NOT`, for example `\\D` => everything except digits\n", - "\n", - "### REPETITION\n", - "\n", - "- `<character>{<num matches>}` - for example: `w{3}`\n", - "- matches cannot overlap\n", - "\n", - "### Variable length repitition operators\n", - "\n", - "- `*` => 0 or more (greedy: match as many characters as possible)\n", - "- `+` => 1 or more (greedy: match as many characters as possible)\n", - "- `?` => 0 or 1\n", - "- `*?` => 0 or more (non-greedy: match as few characters as possible)\n", - "- `+?` => 1 or more (non-greedy: match as few characters as possible)\n", - "\n", - "#### Find everything inside of parentheses." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "11f75d92-f215-47f4-8804-5e5727d3be55", - "metadata": {}, - "outputs": [], - "source": [ - "# this doesn't work\n", - "# it captures everything because () have special meaning (coming up)\n", - "reg(r\"\", s1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b488460e", - "metadata": {}, - "outputs": [], - "source": [ - "# How can we change this to not use special meaning of ()?\n", - "# * is greedy: match as many characters as possible\n", - "reg(r\"(.*)\", s1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "42155b54-d27b-4418-81a1-c005c508d738", - "metadata": {}, - "outputs": [], - "source": [ - "# non-greedy: stop at the first possible spot instead of the last possible spot\n", - "reg(r\"\\(.*\\)\", s1)" - ] - }, - { - "cell_type": "markdown", - "id": "0fd70cfd", - "metadata": {}, - "source": [ - "### Anchor characters\n", - "- `^` => start of string\n", - " - `^` is overloaded --- what was the other usage?\n", - "- `$` => end of string\n", - "\n", - "#### Find everything in the first sentence." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "40fed5db", - "metadata": {}, - "outputs": [], - "source": [ - "# doesn't work because remember regex finds all possible matches\n", - "# so it matches every single sentence \n", - "# (even though we are doing non-greedy match)\n", - "reg(r\"\", s1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e97abd8", - "metadata": {}, - "outputs": [], - "source": [ - "reg(r\".*?\\.\", s1)" - ] - }, - { - "cell_type": "markdown", - "id": "f66a4651", - "metadata": {}, - "source": [ - "#### Find everything in the first two sentences." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cfa8353e-4402-4f64-b3f8-35e73b2d7a68", - "metadata": {}, - "outputs": [], - "source": [ - "reg(r\"\", s1)" - ] - }, - { - "cell_type": "markdown", - "id": "76570acd", - "metadata": {}, - "source": [ - "#### Find last \"word\" in the sentence." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f35a6c81-47b5-48eb-8357-888fe832f85b", - "metadata": {}, - "outputs": [], - "source": [ - "reg(r\"\", s1)" - ] - }, - { - "cell_type": "markdown", - "id": "4b25fb66", - "metadata": {}, - "source": [ - "### Case study: find all phone numbers." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8ecbeaf0", - "metadata": {}, - "outputs": [], - "source": [ - "print(s2)\n", - "# The country code (1) in the front is optional\n", - "# The area code (608) is also optional\n", - "# Doesn't make sense to match country code without area code though!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "88a01725-790c-4f21-b334-e3d31bed24b5", - "metadata": {}, - "outputs": [], - "source": [ - "# Full US phone numbers\n", - "reg(r\"\", s2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ea8ce0be-98f8-4f9c-bcc0-1d8a0bb183a8", - "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# The country code (1) in the front is optional\n", - "reg(r\"\", s2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5befb23a-848f-44d4-aaac-6b21ac0bbf43", - "metadata": {}, - "outputs": [], - "source": [ - "# The area code (608) is also optional\n", - "# Doesn't make sense to have country code without area code though!\n", - "reg(r\"\", s2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "34164fd1-a422-45a0-8e11-54199ca77120", - "metadata": {}, - "outputs": [], - "source": [ - "# This is good enough for 320 quizzes/tests\n", - "# But clearly, the last match is not correct\n", - "reg(r\"\", s2)" - ] - }, - { - "cell_type": "markdown", - "id": "8a2ee4e2", - "metadata": {}, - "source": [ - "Regex documentation link: https://docs.python.org/3/library/re.html." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "694a585b-b5a7-4a6f-a0f1-60521f7dfc47", - "metadata": {}, - "outputs": [], - "source": [ - "# BONUS: negative lookbehind (I won't test this)\n", - "reg(r\"(?<!\\d\\-)((\\d-)?\\d{3}-)?\\d{3}-\\d{4}\", s2)" - ] - }, - { - "cell_type": "markdown", - "id": "3973350b", - "metadata": {}, - "source": [ - "There is also a negative lookahead. For example, how to avoid matching \"1-608-123-456\" in \"1-608-123-4569999\". You can explore this if you are interested." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4988d765", - "metadata": {}, - "outputs": [], - "source": [ - "reg(r\"(?<!\\d\\-)((\\d-)?\\d{3}-)?\\d{3}-\\d{4}\", \"608-123-4569999\")" - ] - }, - { - "cell_type": "markdown", - "id": "b02ae9e0", - "metadata": {}, - "source": [ - "### Testing your regex\n", - "- you could use `reg(...)` function\n", - "- another useful resource: https://regex101.com/" - ] - }, - { - "cell_type": "markdown", - "id": "4a973271", - "metadata": {}, - "source": [ - "### `re` module\n", - "- `re.findall(<PATTERN>, <SEARCH STRING>)`: regular expression matches\n", - " - returns a list of strings \n", - "- `re.sub(<PATTERN>, <REPLACEMENT>, <SEARCH STRING>)`: regular expression match + substitution\n", - " - returns a new string with the substitutions (remember strings are immutable)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "73ec525f", - "metadata": {}, - "outputs": [], - "source": [ - "msg = \"In CS 320,\\tthere are 28 lectures, 11 quizzes, 3 exams,\\t6 projects, and 1000 things to learn. CS 320 is awesome!\"\n", - "print(msg)" - ] - }, - { - "cell_type": "markdown", - "id": "34998a5e", - "metadata": {}, - "source": [ - "#### Find all digits." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7f42c25a", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "70b2f488", - "metadata": {}, - "source": [ - "### Groups\n", - "- we can capture matches using `()` => this is the special meaning of `()`\n", - "- returns a list of tuples, where length of the tuple will be number of groups\n", - "\n", - "#### Find all digits and the word that comes after that." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5309adee", - "metadata": {}, - "outputs": [], - "source": [ - "matches = re.findall(r\"\", msg)\n", - "matches" - ] - }, - { - "cell_type": "markdown", - "id": "bc6a982c", - "metadata": {}, - "source": [ - "#### Goal: make a dict (course component => count, like \"projects\" => 6)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7f1a028", - "metadata": {}, - "outputs": [], - "source": [ - "course_dict = {}\n", - "for count, component in matches:\n", - " course_dict[component] = int(count)\n", - "course_dict" - ] - }, - { - "cell_type": "markdown", - "id": "c4b6b505", - "metadata": {}, - "source": [ - "### Unlike matches, groups can overlap\n", - "\n", - "#### Find and group all digits and the word that comes after that." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "491c3460", - "metadata": {}, - "outputs": [], - "source": [ - "re.findall(r\"(\\d+) (\\w+)\", msg)" - ] - }, - { - "cell_type": "markdown", - "id": "d2227e69", - "metadata": {}, - "source": [ - "#### Substitute all digits with \"###\"." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6d1fede1", - "metadata": {}, - "outputs": [], - "source": [ - "re.sub(r\"\", , msg)" - ] - }, - { - "cell_type": "markdown", - "id": "9d531122", - "metadata": {}, - "source": [ - "#### Goal: normalize whitespace (everything will be a single space)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4becbe70", - "metadata": {}, - "outputs": [], - "source": [ - "print(msg)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72a6eb42", - "metadata": {}, - "outputs": [], - "source": [ - "re.sub(r\"\", , msg)" - ] - }, - { - "cell_type": "markdown", - "id": "6faf33fd", - "metadata": {}, - "source": [ - "### How to use groups is substitution?\n", - "- `\\g<N>` gives you the result of the N'th grouping.\n", - "\n", - "#### Substitute all course component counts with HTML bold tags." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8df577fd", - "metadata": {}, - "outputs": [], - "source": [ - "print(re.sub(r\"(\\d+)\", \"<b></b>\", msg))" - ] - }, - { - "cell_type": "markdown", - "id": "35a15a41", - "metadata": {}, - "source": [ - "In CS <b>320</b>, there are <b>28</b> lectures, <b>11</b> quizzes, <b>3</b> exams, <b>6</b> projects, and <b>1000</b> things to learn. CS <b>320</b> is awesome!" - ] - }, - { - "cell_type": "markdown", - "id": "6b299526", - "metadata": {}, - "source": [ - "### Git log example" - ] - }, - { - "cell_type": "markdown", - "id": "a9b7261c", - "metadata": {}, - "source": [ - "#### Run `git log` as a shell command" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10e459b4", - "metadata": {}, - "outputs": [], - "source": [ - "!git log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ef440fed", - "metadata": {}, - "outputs": [], - "source": [ - "git_log_output = str(check_output([\"git\", \"log\"]), encoding=\"utf-8\")\n", - "print(git_log_output[:500])" - ] - }, - { - "cell_type": "markdown", - "id": "5c154b46", - "metadata": {}, - "source": [ - "#### GOAL: find all the commit numbers" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5ea954bf", - "metadata": {}, - "outputs": [], - "source": [ - "commits = re.findall(r\"\", git_log_output)\n", - "# recent 10 commit numbers\n", - "commits[:10]" - ] - }, - { - "cell_type": "markdown", - "id": "bc485b5f", - "metadata": {}, - "source": [ - "#### What days of the week does the team push things into this repo?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "57353c44", - "metadata": {}, - "outputs": [], - "source": [ - "print(git_log_output[:500])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d1ea6f59", - "metadata": {}, - "outputs": [], - "source": [ - "days = re.findall(r\"\", git_log_output)\n", - "days" - ] - }, - { - "cell_type": "markdown", - "id": "2c7efb55", - "metadata": {}, - "source": [ - "#### Count unique days" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3c2d7207", - "metadata": {}, - "outputs": [], - "source": [ - "day_counts = pd.Series(days).value_counts()\n", - "day_counts" - ] - }, - { - "cell_type": "markdown", - "id": "7317ca35", - "metadata": {}, - "source": [ - "#### Sort by day of the week" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5c5c58c", - "metadata": {}, - "outputs": [], - "source": [ - "sorted_day_counts = day_counts.loc[[\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sun\"]]\n", - "sorted_day_counts" - ] - }, - { - "cell_type": "markdown", - "id": "745afee5", - "metadata": {}, - "source": [ - "#### Create a bar plot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7bb8f6f", - "metadata": {}, - "outputs": [], - "source": [ - "ax = sorted_day_counts.plot.bar()\n", - "ax.set_ylabel(\"Commit counts\")\n", - "ax.set_xlabel(\"Days of the week\")" - ] - }, - { - "cell_type": "markdown", - "id": "ecfc71e6", - "metadata": {}, - "source": [ - "#### Find all commit authors names." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6153035a", - "metadata": {}, - "outputs": [], - "source": [ - "authors = re.findall(r\"\", git_log_output)\n", - "authors[0]" - ] - }, - { - "cell_type": "markdown", - "id": "3fa201fb", - "metadata": {}, - "source": [ - "#### `git log` from projects repo" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e200a8b0", - "metadata": {}, - "outputs": [], - "source": [ - "git_log_output = str(check_output([\"git\", \"log\"], cwd=\"../../projects-and-labs\"), encoding=\"utf-8\")\n", - "print(git_log_output[:1000])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "053b2607", - "metadata": {}, - "outputs": [], - "source": [ - "re.findall(r\"\", git_log_output)" - ] - }, - { - "cell_type": "markdown", - "id": "3ce53c79", - "metadata": {}, - "source": [ - "### Emails example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1968c0ff", - "metadata": {}, - "outputs": [], - "source": [ - "s = \"\"\"\n", - "Gurmail [Instructor] - gsingh58(AT) cs.wisc.edu\n", - "Jinlang [Head TA] - jwang2775 (AT) wisc.edu\n", - "Elliot [TA] - eepickens (AT) cs.wisc.edu\n", - "Alex [TA] - aclinton (AT) wisc.edu\n", - "Bowman [TA] - bnbrown3 (AT) wisc.edu\n", - "Hafeez [TA] - aneesali (AT) wisc.edu\n", - "William [TA] - wycong (AT) wisc.edu\n", - "\"\"\"\n", - "print(s)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5fbfdf12", - "metadata": {}, - "outputs": [], - "source": [ - "name = r\"\\w+\"\n", - "at = r\"@|([\\(\\[]?[Aa][Tt][\\)\\]]?)\"\n", - "domain = r\"\\w+\\.(\\w+\\.)?(edu|com|org|net|io|gov)\"\n", - "\n", - "full_regex = f\"(({name})\\s*({at})\\s*({domain}))\"\n", - "\n", - "re.findall(full_regex, s)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2257dbf1", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"REGEX:\", full_regex)\n", - "for match in re.findall(full_regex, s):\n", - " print(match[1] + \"@\" + match[4])" - ] - }, - { - "cell_type": "markdown", - "id": "16c6c169", - "metadata": {}, - "source": [ - "### Self-practice\n", - "\n", - "Q1: Which regex will NOT match \"123\"\n", - "1. r\"\\d\\d\\d\"\n", - "2. r\"\\d{3}\"\n", - "3. r\"\\D\\D\\D\"\n", - "4. r\"...\"\n", - "\n", - "Q2: What will r\"^A\" match?\n", - "1. \"A\"\n", - "2. \"^A\"\n", - "3. \"BA\"\n", - "4. \"B\"\n", - "5. \"BB\"\n", - "\n", - "Q3: Which one can match \"HH\"?\n", - "1. r\"HA+H\"\n", - "2. r\"HA+?H\"\n", - "3. r\"H(A+)?H\"\n", - "\n", - "Q4: Which string(s) will match r\"^(ha)*$\"\n", - "1. \"\"\n", - "2. \"hahah\"\n", - "3. \"that\"\n", - "4. \"HAHA\"\n", - "\n", - "Q5: What is the type of the following?re.findall(r\"(\\d) (\\w+)\", some_str)[0]\n", - "1. list\n", - "2. tuple\n", - "3. string\n", - "\n", - "Q6: What will it do?\n", - "```python\n", - "re.sub(r\"(\\d{3})-(\\d{3}-\\d{4})\",\n", - " r\"(\\g<1>) \\g<2>\",\n", - " \"608-123-4567\")\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "f1184ba1", - "metadata": {}, - "source": [ - "The answers of these questions can be found in self_practice.ipynb. You may want to try to answer these questions yourself and then verify your answers." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/lecture_material/15-regex_2/regex_2_lec_002.ipynb b/lecture_material/15-regex_2/regex_2_lec_002.ipynb deleted file mode 100644 index db06eb7fa32b923d38c746a1408a37396c9ee014..0000000000000000000000000000000000000000 --- a/lecture_material/15-regex_2/regex_2_lec_002.ipynb +++ /dev/null @@ -1,835 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "e60c1c48", - "metadata": {}, - "source": [ - "# Regex 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0dba68b0", - "metadata": {}, - "outputs": [], - "source": [ - "#import statements\n", - "import re\n", - "from subprocess import check_output\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b97c8008-8e39-4a9f-89a1-0d1ddbb1ac01", - "metadata": {}, - "outputs": [], - "source": [ - "# Example strings\n", - "# from DS100 book...\n", - "def reg(regex, text):\n", - " \"\"\"\n", - " Prints the string with the regex match highlighted.\n", - " \"\"\"\n", - " print(re.sub(f'({regex})', r'\\033[1;30;43m\\1\\033[m', text))\n", - "s1 = \" \".join([\"A DAG is a directed graph without cycles.\",\n", - " \"A tree is a DAG where every node has one parent (except the root, which has none).\",\n", - " \"To learn more, visit www.example.com or call 1-608-123-4567. :) ¯\\_(ツ)_/¯\"])\n", - "print(s1)\n", - "\n", - "s2 = \"\"\"1-608-123-4567\n", - "a-bcd-efg-hijg (not a phone number)\n", - "1-608-123-456 (not a phone number)\n", - "608-123-4567\n", - "123-4567\n", - "1-123-4567 (not a phone number)\n", - "\"\"\"\n", - "print(s2)\n", - "\n", - "s3 = \"In CS 320, there are 11 quizzes, 6 projects, 28 lectures, and 1000 things to learn. CS 320 is awesome!\"\n", - "print(s3)\n", - "\n", - "s4 = \"\"\"In CS 320, there are 11 quizzes, 6 projects,\n", - "28 lectures, and 1000 things to learn. CS 320 is awesome!\"\"\"\n", - "print(s4)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "924069c5-82be-4423-b659-2beee8e226be", - "metadata": {}, - "outputs": [], - "source": [ - "print(s1)" - ] - }, - { - "cell_type": "markdown", - "id": "6a2eff34", - "metadata": {}, - "source": [ - "### Regex is case sensitive\n", - "\n", - "### Character classes\n", - "\n", - "- Character classes can be mentioned within `[...]`\n", - "- `^` means `NOT` of a character class\n", - "- `-` enables us to mention range of characters, for example `[A-Z]`\n", - "- `|` enables us to perform `OR`\n", - "\n", - "### Metacharacters\n", - "\n", - "- predefined character classes\n", - " - `\\d` => digits\n", - " - `\\s` => whitespace (space, tab, newline)\n", - " - `\\w` => \"word\" characters (digits, letters, underscores, etc) --- helpful for variable name matches and whole word matches (as it doesn't match whitespace --- `\\s`)\n", - " - `.` => wildcard: anything except newline\n", - "- capitalized version of character classes mean `NOT`, for example `\\D` => everything except digits\n", - "\n", - "### REPETITION\n", - "\n", - "- `<character>{<num matches>}` - for example: `w{3}`\n", - "- matches cannot overlap\n", - "\n", - "### Variable length repitition operators\n", - "\n", - "- `*` => 0 or more (greedy: match as many characters as possible)\n", - "- `+` => 1 or more (greedy: match as many characters as possible)\n", - "- `?` => 0 or 1\n", - "- `*?` => 0 or more (non-greedy: match as few characters as possible)\n", - "- `+?` => 1 or more (non-greedy: match as few characters as possible)\n", - "\n", - "#### Find everything inside of parentheses." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "11f75d92-f215-47f4-8804-5e5727d3be55", - "metadata": {}, - "outputs": [], - "source": [ - "# this doesn't work\n", - "# it captures everything because () have special meaning (coming up)\n", - "reg(r\"\", s1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b488460e", - "metadata": {}, - "outputs": [], - "source": [ - "# How can we change this to not use special meaning of ()?\n", - "# * is greedy: match as many characters as possible\n", - "reg(r\"(.*)\", s1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "42155b54-d27b-4418-81a1-c005c508d738", - "metadata": {}, - "outputs": [], - "source": [ - "# non-greedy: stop at the first possible spot instead of the last possible spot\n", - "reg(r\"\\(.*\\)\", s1)" - ] - }, - { - "cell_type": "markdown", - "id": "0fd70cfd", - "metadata": {}, - "source": [ - "### Anchor characters\n", - "- `^` => start of string\n", - " - `^` is overloaded --- what was the other usage?\n", - "- `$` => end of string\n", - "\n", - "#### Find everything in the first sentence." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "40fed5db", - "metadata": {}, - "outputs": [], - "source": [ - "# doesn't work because remember regex finds all possible matches\n", - "# so it matches every single sentence \n", - "# (even though we are doing non-greedy match)\n", - "reg(r\"\", s1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e97abd8", - "metadata": {}, - "outputs": [], - "source": [ - "reg(r\".*?\\.\", s1)" - ] - }, - { - "cell_type": "markdown", - "id": "f66a4651", - "metadata": {}, - "source": [ - "#### Find everything in the first two sentences." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cfa8353e-4402-4f64-b3f8-35e73b2d7a68", - "metadata": {}, - "outputs": [], - "source": [ - "reg(r\"\", s1)" - ] - }, - { - "cell_type": "markdown", - "id": "76570acd", - "metadata": {}, - "source": [ - "#### Find last \"word\" in the sentence." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f35a6c81-47b5-48eb-8357-888fe832f85b", - "metadata": {}, - "outputs": [], - "source": [ - "reg(r\"\", s1)" - ] - }, - { - "cell_type": "markdown", - "id": "4b25fb66", - "metadata": {}, - "source": [ - "### Case study: find all phone numbers." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8ecbeaf0", - "metadata": {}, - "outputs": [], - "source": [ - "print(s2)\n", - "# The country code (1) in the front is optional\n", - "# The area code (608) is also optional\n", - "# Doesn't make sense to match country code without area code though!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "88a01725-790c-4f21-b334-e3d31bed24b5", - "metadata": {}, - "outputs": [], - "source": [ - "# Full US phone numbers\n", - "reg(r\"\", s2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ea8ce0be-98f8-4f9c-bcc0-1d8a0bb183a8", - "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# The country code (1) in the front is optional\n", - "reg(r\"\", s2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5befb23a-848f-44d4-aaac-6b21ac0bbf43", - "metadata": {}, - "outputs": [], - "source": [ - "# The area code (608) is also optional\n", - "# Doesn't make sense to have country code without area code though!\n", - "reg(r\"\", s2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "34164fd1-a422-45a0-8e11-54199ca77120", - "metadata": {}, - "outputs": [], - "source": [ - "# This is good enough for 320 quizzes/tests\n", - "# But clearly, the last match is not correct\n", - "reg(r\"\", s2)" - ] - }, - { - "cell_type": "markdown", - "id": "8a2ee4e2", - "metadata": {}, - "source": [ - "Regex documentation link: https://docs.python.org/3/library/re.html." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "694a585b-b5a7-4a6f-a0f1-60521f7dfc47", - "metadata": {}, - "outputs": [], - "source": [ - "# BONUS: negative lookbehind (I won't test this)\n", - "reg(r\"(?<!\\d\\-)((\\d-)?\\d{3}-)?\\d{3}-\\d{4}\", s2)" - ] - }, - { - "cell_type": "markdown", - "id": "3973350b", - "metadata": {}, - "source": [ - "There is also a negative lookahead. For example, how to avoid matching \"1-608-123-456\" in \"1-608-123-4569999\". You can explore this if you are interested." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4988d765", - "metadata": {}, - "outputs": [], - "source": [ - "reg(r\"(?<!\\d\\-)((\\d-)?\\d{3}-)?\\d{3}-\\d{4}\", \"608-123-4569999\")" - ] - }, - { - "cell_type": "markdown", - "id": "b02ae9e0", - "metadata": {}, - "source": [ - "### Testing your regex\n", - "- you could use `reg(...)` function\n", - "- another useful resource: https://regex101.com/" - ] - }, - { - "cell_type": "markdown", - "id": "4a973271", - "metadata": {}, - "source": [ - "### `re` module\n", - "- `re.findall(<PATTERN>, <SEARCH STRING>)`: regular expression matches\n", - " - returns a list of strings \n", - "- `re.sub(<PATTERN>, <REPLACEMENT>, <SEARCH STRING>)`: regular expression match + substitution\n", - " - returns a new string with the substitutions (remember strings are immutable)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "73ec525f", - "metadata": {}, - "outputs": [], - "source": [ - "msg = \"In CS 320,\\tthere are 28 lectures, 11 quizzes, 3 exams,\\t6 projects, and 1000 things to learn. CS 320 is awesome!\"\n", - "print(msg)" - ] - }, - { - "cell_type": "markdown", - "id": "34998a5e", - "metadata": {}, - "source": [ - "#### Find all digits." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7f42c25a", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "70b2f488", - "metadata": {}, - "source": [ - "### Groups\n", - "- we can capture matches using `()` => this is the special meaning of `()`\n", - "- returns a list of tuples, where length of the tuple will be number of groups\n", - "\n", - "#### Find all digits and the word that comes after that." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5309adee", - "metadata": {}, - "outputs": [], - "source": [ - "matches = re.findall(r\"\", msg)\n", - "matches" - ] - }, - { - "cell_type": "markdown", - "id": "bc6a982c", - "metadata": {}, - "source": [ - "#### Goal: make a dict (course component => count, like \"projects\" => 6)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7f1a028", - "metadata": {}, - "outputs": [], - "source": [ - "course_dict = {}\n", - "for count, component in matches:\n", - " course_dict[component] = int(count)\n", - "course_dict" - ] - }, - { - "cell_type": "markdown", - "id": "c4b6b505", - "metadata": {}, - "source": [ - "### Unlike matches, groups can overlap\n", - "\n", - "#### Find and group all digits and the word that comes after that." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "491c3460", - "metadata": {}, - "outputs": [], - "source": [ - "re.findall(r\"(\\d+) (\\w+)\", msg)" - ] - }, - { - "cell_type": "markdown", - "id": "d2227e69", - "metadata": {}, - "source": [ - "#### Substitute all digits with \"###\"." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6d1fede1", - "metadata": {}, - "outputs": [], - "source": [ - "re.sub(r\"\", , msg)" - ] - }, - { - "cell_type": "markdown", - "id": "9d531122", - "metadata": {}, - "source": [ - "#### Goal: normalize whitespace (everything will be a single space)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4becbe70", - "metadata": {}, - "outputs": [], - "source": [ - "print(msg)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72a6eb42", - "metadata": {}, - "outputs": [], - "source": [ - "re.sub(r\"\", , msg)" - ] - }, - { - "cell_type": "markdown", - "id": "6faf33fd", - "metadata": {}, - "source": [ - "### How to use groups is substitution?\n", - "- `\\g<N>` gives you the result of the N'th grouping.\n", - "\n", - "#### Substitute all course component counts with HTML bold tags." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8df577fd", - "metadata": {}, - "outputs": [], - "source": [ - "print(re.sub(r\"(\\d+)\", \"<b></b>\", msg))" - ] - }, - { - "cell_type": "markdown", - "id": "35a15a41", - "metadata": {}, - "source": [ - "In CS <b>320</b>, there are <b>28</b> lectures, <b>11</b> quizzes, <b>3</b> exams, <b>6</b> projects, and <b>1000</b> things to learn. CS <b>320</b> is awesome!" - ] - }, - { - "cell_type": "markdown", - "id": "6b299526", - "metadata": {}, - "source": [ - "### Git log example" - ] - }, - { - "cell_type": "markdown", - "id": "a9b7261c", - "metadata": {}, - "source": [ - "#### Run `git log` as a shell command" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10e459b4", - "metadata": {}, - "outputs": [], - "source": [ - "!git log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ef440fed", - "metadata": {}, - "outputs": [], - "source": [ - "git_log_output = str(check_output([\"git\", \"log\"]), encoding=\"utf-8\")\n", - "print(git_log_output[:500])" - ] - }, - { - "cell_type": "markdown", - "id": "5c154b46", - "metadata": {}, - "source": [ - "#### GOAL: find all the commit numbers" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5ea954bf", - "metadata": {}, - "outputs": [], - "source": [ - "commits = re.findall(r\"\", git_log_output)\n", - "# recent 10 commit numbers\n", - "commits[:10]" - ] - }, - { - "cell_type": "markdown", - "id": "bc485b5f", - "metadata": {}, - "source": [ - "#### What days of the week does the team push things into this repo?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "57353c44", - "metadata": {}, - "outputs": [], - "source": [ - "print(git_log_output[:500])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d1ea6f59", - "metadata": {}, - "outputs": [], - "source": [ - "days = re.findall(r\"\", git_log_output)\n", - "days" - ] - }, - { - "cell_type": "markdown", - "id": "2c7efb55", - "metadata": {}, - "source": [ - "#### Count unique days" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3c2d7207", - "metadata": {}, - "outputs": [], - "source": [ - "day_counts = pd.Series(days).value_counts()\n", - "day_counts" - ] - }, - { - "cell_type": "markdown", - "id": "7317ca35", - "metadata": {}, - "source": [ - "#### Sort by day of the week" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5c5c58c", - "metadata": {}, - "outputs": [], - "source": [ - "sorted_day_counts = day_counts.loc[[\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sun\"]]\n", - "sorted_day_counts" - ] - }, - { - "cell_type": "markdown", - "id": "745afee5", - "metadata": {}, - "source": [ - "#### Create a bar plot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7bb8f6f", - "metadata": {}, - "outputs": [], - "source": [ - "ax = sorted_day_counts.plot.bar()\n", - "ax.set_ylabel(\"Commit counts\")\n", - "ax.set_xlabel(\"Days of the week\")" - ] - }, - { - "cell_type": "markdown", - "id": "ecfc71e6", - "metadata": {}, - "source": [ - "#### Find all commit authors names." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6153035a", - "metadata": {}, - "outputs": [], - "source": [ - "authors = re.findall(r\"\", git_log_output)\n", - "authors[0]" - ] - }, - { - "cell_type": "markdown", - "id": "3fa201fb", - "metadata": {}, - "source": [ - "#### `git log` from projects repo" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e200a8b0", - "metadata": {}, - "outputs": [], - "source": [ - "git_log_output = str(check_output([\"git\", \"log\"], cwd=\"../../projects-and-labs\"), encoding=\"utf-8\")\n", - "print(git_log_output[:1000])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "053b2607", - "metadata": {}, - "outputs": [], - "source": [ - "re.findall(r\"\", git_log_output)" - ] - }, - { - "cell_type": "markdown", - "id": "3ce53c79", - "metadata": {}, - "source": [ - "### Emails example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1968c0ff", - "metadata": {}, - "outputs": [], - "source": [ - "s = \"\"\"\n", - "Gurmail [Instructor] - gsingh58(AT) cs.wisc.edu\n", - "Jinlang [Head TA] - jwang2775 (AT) wisc.edu\n", - "Elliot [TA] - eepickens (AT) cs.wisc.edu\n", - "Alex [TA] - aclinton (AT) wisc.edu\n", - "Bowman [TA] - bnbrown3 (AT) wisc.edu\n", - "Hafeez [TA] - aneesali (AT) wisc.edu\n", - "William [TA] - wycong (AT) wisc.edu\n", - "\"\"\"\n", - "print(s)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5fbfdf12", - "metadata": {}, - "outputs": [], - "source": [ - "name = r\"\\w+\"\n", - "at = r\"@|([\\(\\[]?[Aa][Tt][\\)\\]]?)\"\n", - "domain = r\"\\w+\\.(\\w+\\.)?(edu|com|org|net|io|gov)\"\n", - "\n", - "full_regex = f\"(({name})\\s*({at})\\s*({domain}))\"\n", - "\n", - "re.findall(full_regex, s)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2257dbf1", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"REGEX:\", full_regex)\n", - "for match in re.findall(full_regex, s):\n", - " print(match[1] + \"@\" + match[4])" - ] - }, - { - "cell_type": "markdown", - "id": "16c6c169", - "metadata": {}, - "source": [ - "### Self-practice\n", - "\n", - "Q1: Which regex will NOT match \"123\"\n", - "1. r\"\\d\\d\\d\"\n", - "2. r\"\\d{3}\"\n", - "3. r\"\\D\\D\\D\"\n", - "4. r\"...\"\n", - "\n", - "Q2: What will r\"^A\" match?\n", - "1. \"A\"\n", - "2. \"^A\"\n", - "3. \"BA\"\n", - "4. \"B\"\n", - "5. \"BB\"\n", - "\n", - "Q3: Which one can match \"HH\"?\n", - "1. r\"HA+H\"\n", - "2. r\"HA+?H\"\n", - "3. r\"H(A+)?H\"\n", - "\n", - "Q4: Which string(s) will match r\"^(ha)*$\"\n", - "1. \"\"\n", - "2. \"hahah\"\n", - "3. \"that\"\n", - "4. \"HAHA\"\n", - "\n", - "Q5: What is the type of the following?re.findall(r\"(\\d) (\\w+)\", some_str)[0]\n", - "1. list\n", - "2. tuple\n", - "3. string\n", - "\n", - "Q6: What will it do?\n", - "```python\n", - "re.sub(r\"(\\d{3})-(\\d{3}-\\d{4})\",\n", - " r\"(\\g<1>) \\g<2>\",\n", - " \"608-123-4567\")\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "f1184ba1", - "metadata": {}, - "source": [ - "The answers of these questions can be found in self_practice.ipynb. You may want to try to answer these questions yourself and then verify your answers." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}