lec13 modified

d418d2ca · gsingh58 · 2c730788 · d418d2ca · d418d2ca
Commit d418d2ca authored 8 months ago by gsingh58
--- a/lecture_material/13-Web_3/13-web.ipynb
+++ b/lecture_material/13-Web_3/13-web.ipynb
@@ -35,9 +35,9 @@
    "        - then separate argument-value pair by \"=\"\n",
    "        - use \"&\" as delimiter between two argument-value pairs\n",
    "    - examples: \n",
-    "        - http://34.123.132.20:5000/add?x=10&y=20\n",
-    "        - http://34.123.132.20:5000/survey?major=CS\n",
-    "        - http://34.123.132.20:5000/survey?major=Mechanical_Engineering"
+    "        - http://35.226.223.87:5000/add?x=10&y=20\n",
+    "        - http://35.226.223.87:5000/survey?major=CS\n",
+    "        - http://35.226.223.87:5000/survey?major=Mechanical_Engineering"
   ]
  },
  {
@@ -47,7 +47,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "base_url = \"http://34.123.132.20:5000/\""
+    "base_url = \"http://35.226.223.87:5000/\""
   ]
  },
  {
@@ -93,7 +93,7 @@
    {
     "data": {
      "text/plain": [
-       "True"
+       "False"
      ]
     },
     "execution_count": 4,
@@ -222,7 +222,7 @@
    {
     "data": {
      "text/plain": [
-       "0.5712421394829712"
+       "np.float64(0.5712421394829712)"
      ]
     },
     "execution_count": 7,
@@ -256,7 +256,7 @@
    {
     "data": {
      "text/plain": [
-       "0.00042033045869994034"
+       "np.float64(0.00042033045869994034)"
      ]
     },
     "execution_count": 8,
@@ -286,7 +286,7 @@
    {
     "data": {
      "text/plain": [
-       "0.02820356890423392"
+       "np.float64(0.02820356890423392)"
      ]
     },
     "execution_count": 9,
@@ -305,6 +305,14 @@
    "_, pvalue = stats.fisher_exact(df)\n",
    "pvalue"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d63d0930-3560-45bf-9c2c-3a9f14175040",
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
@@ -323,7 +331,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,

 %% Cell type:markdown id:cf313adf tags:

 # Web 3: More Flask and A/B testing

 %% Cell type:code id:d55e4bb4-9f29-4f4f-bba6-05054718259b tags:

 ``` python
 import requests
 import time
 import urllib.robotparser

 import pandas as pd
 # new import statement: requires pip3 install scipy
 from scipy import stats
 ```

 %% Cell type:markdown id:725234ef-a02b-47da-bfec-aaa16e852a58 tags:

 - `flask.request.args`: enables us to get the arguments passed as part of the URL
    - How do we pass arguments?
        - at the end of the URL, add a "?"
        - then separate argument-value pair by "="
        - use "&" as delimiter between two argument-value pairs
    - examples:
-        - http://34.123.132.20:5000/add?x=10&y=20
-        - http://34.123.132.20:5000/survey?major=CS
-        - http://34.123.132.20:5000/survey?major=Mechanical_Engineering
+        - http://35.226.223.87:5000/add?x=10&y=20
+        - http://35.226.223.87:5000/survey?major=CS
+        - http://35.226.223.87:5000/survey?major=Mechanical_Engineering

 %% Cell type:code id:8241e51c tags:

 ``` python
-base_url = "http://34.123.132.20:5000/"
+base_url = "http://35.226.223.87:5000/"
 ```

 %% Cell type:markdown id:23ba100b tags:

 ### `urllib.robotparser`

 - Documentation: https://docs.python.org/3/library/urllib.robotparser.html

 %% Cell type:code id:379c3ae5-7344-45b1-88c3-b35f0bd8eb5b tags:

 ``` python
 rp = urllib.robotparser.RobotFileParser()
 rp.set_url(base_url + "/robots.txt")
 rp.read()
 rp.can_fetch("cs320bot", base_url + "/slow")
 ```

 %% Output

    True

 %% Cell type:code id:2e3fb01c-4281-4cbf-8828-98e04d27d09a tags:

 ``` python
 rp.can_fetch("cs320bot", base_url + "never")
 ```

 %% Output

-    True
+    False

 %% Cell type:code id:6cc81b85 tags:

 ``` python
 def friendly_get(url):
    if not rp.can_fetch("cs320bot", url):
        raise Exception("you're not supposed to visit that page")
    while True:
        resp = requests.get(url)
        if resp.status_code == 429:
            seconds = int(resp.headers.get("Retry-After", 1))
            print(f"sleep {seconds}")
            time.sleep(seconds)
            continue
        resp.raise_for_status() # raise exception if not 200
        return resp

 friendly_get(base_url + "slow").text
 ```

 %% Output

    'welcome!'

 %% Cell type:markdown id:1dbd2fad-1bf5-437f-9ebb-6694ba860e27 tags:

 # A/B testing

 %% Cell type:code id:6922661b-af1b-41d5-bcb0-f2759e890f3e tags:

 ``` python
 df = pd.DataFrame({
    "click":    {"A": 50, "B": 55},
    "no-click": {"A": 50, "B": 45}
 })
 df
 # Which has the higher CTR A or B?
 ```

 %% Output

       click  no-click
    A     50        50
    B     55        45

 %% Cell type:code id:31b0de1b-f7fb-42ee-a087-3d884cc31590 tags:

 ``` python
 _, pvalue = stats.fisher_exact(df)
 pvalue
 # no evidence that A and B are difference because pvalue is not less than 5%
 ```

 %% Output

-    0.5712421394829712
+    np.float64(0.5712421394829712)

 %% Cell type:markdown id:5dde0ee6-b03e-4bbf-b896-1794a2bff610 tags:

 ### Two situations when pvalue will be lower than significance threshold

 1. Sample size is the same, but skew is very heavy --- unlikely to have that by chance
 2. Sample size is large, but skew is small

 %% Cell type:code id:b8719b9e-f366-47a0-9d96-50b4fdd8fb27 tags:

 ``` python
 # Scenario 1:
 # Sample size is the same, but skew is very heavy ---
 # unlikely to have that by chance

 df = pd.DataFrame({
    "click":    {"A": 50, "B": 75},
    "no-click": {"A": 50, "B": 25}
 })
 _, pvalue = stats.fisher_exact(df)
 pvalue
 ```

 %% Output

-    0.00042033045869994034
+    np.float64(0.00042033045869994034)

 %% Cell type:code id:72a18c43-0f07-4e7f-bb88-89a43563f295 tags:

 ``` python
 # Scenario 2:
 # Sample size is large, but skew is small

 df = pd.DataFrame({
    "click":    {"A": 500, "B": 550},
    "no-click": {"A": 500, "B": 450}
 })
 _, pvalue = stats.fisher_exact(df)
 pvalue
 ```

 %% Output

-    0.02820356890423392
+    np.float64(0.02820356890423392)
+
+%% Cell type:code id:d63d0930-3560-45bf-9c2c-3a9f14175040 tags:
+
+``` python
+```

--- a/lecture_material/13-Web_3/13-web_001.ipynb
+++ b/lecture_material/13-Web_3/13-web_001.ipynb
@@ -34,9 +34,9 @@
    "        - then separate argument-value pair by \"=\"\n",
    "        - use \"&\" as delimiter between two argument-value pairs\n",
    "    - examples: \n",
-    "        - http://34.123.132.20:5000/add?x=10&y=20\n",
-    "        - http://34.123.132.20:5000/survey?major=CS\n",
-    "        - http://34.123.132.20:5000/survey?major=Mechanical_Engineering"
+    "        - http://35.226.223.87:5000/add?x=10&y=20\n",
+    "        - http://35.226.223.87:5000/survey?major=CS\n",
+    "        - http://35.226.223.87:5000/survey?major=Mechanical_Engineering"
   ]
  },
  {
@@ -46,7 +46,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "base_url = \"http://34.123.132.20:5000/\""
+    "base_url = \"http://35.226.223.87:5000/\""
   ]
  },
  {
@@ -196,7 +196,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,

 %% Cell type:markdown id:cf313adf tags:

 # Web 3: More Flask and A/B testing

 %% Cell type:code id:d55e4bb4-9f29-4f4f-bba6-05054718259b tags:

 ``` python
 import requests
 import time

 import pandas as pd
 # new import statement: requires pip3 install scipy
 from scipy import stats
 ```

 %% Cell type:markdown id:a5f26f67-000a-4d68-8033-3e46023b8196 tags:

 - `flask.request.args`: enables us to get the arguments passed as part of the URL
    - How do we pass arguments?
        - at the end of the URL, add a "?"
        - then separate argument-value pair by "="
        - use "&" as delimiter between two argument-value pairs
    - examples:
-        - http://34.123.132.20:5000/add?x=10&y=20
-        - http://34.123.132.20:5000/survey?major=CS
-        - http://34.123.132.20:5000/survey?major=Mechanical_Engineering
+        - http://35.226.223.87:5000/add?x=10&y=20
+        - http://35.226.223.87:5000/survey?major=CS
+        - http://35.226.223.87:5000/survey?major=Mechanical_Engineering

 %% Cell type:code id:8241e51c tags:

 ``` python
-base_url = "http://34.123.132.20:5000/"
+base_url = "http://35.226.223.87:5000/"
 ```

 %% Cell type:markdown id:23ba100b tags:

 ### `urllib.robotparser`

 - Documentation: https://docs.python.org/3/library/urllib.robotparser.html

 %% Cell type:code id:379c3ae5-7344-45b1-88c3-b35f0bd8eb5b tags:

 ``` python
 ```

 %% Cell type:code id:2e3fb01c-4281-4cbf-8828-98e04d27d09a tags:

 ``` python
 ```

 %% Cell type:code id:0eb41e31-490e-419b-af15-db66e0dd6fd1 tags:

 ``` python
 def friendly_get(url):
    while True:
        resp = requests.get(url)
        if resp.status_code == 429:
            seconds = int(resp.headers.get("Retry-After", 1))
            print(f"sleep {seconds}")
            time.sleep(seconds)
            continue
        resp.raise_for_status() # raise exception if not 200
        return resp

 friendly_get(base_url + "slow").text
 ```

 %% Cell type:markdown id:fcfa857a-10e7-4e74-adcc-9c4d384e9986 tags:

 #  A/B testing

 %% Cell type:code id:d4839880-5642-49ce-b1cf-938ddf409229 tags:

 ``` python
 df = pd.DataFrame({
    "click":    {"A": 50, "B": 55},
    "no-click": {"A": 50, "B": 45}
 })
 df
 # Which has the higher CTR A or B?
 ```

 %% Cell type:code id:341e9430-d7c2-4cba-a2f1-b5fb95a1efa6 tags:

 ``` python
 _, pvalue = stats.fisher_exact(df)
 pvalue
 # no evidence that A and B are difference because pvalue is not less than 5%
 ```

 %% Cell type:markdown id:3fa3b996-ef70-4492-8221-dbe597af8541 tags:

 ### Two situations when pvalue will be lower than significance threshold

 1. Sample size is the same, but skew is very heavy --- unlikely to have that by chance
 2. Sample size is large, but skew is small

 %% Cell type:code id:245ac3cf-a7b4-4bd4-81d8-caf7fc4897fe tags:

 ``` python
 # Scenario 1:
 # Sample size is the same, but skew is very heavy ---
 # unlikely to have that by chance

 df = pd.DataFrame({
    "click":    {"A": 50, "B": 75},
    "no-click": {"A": 50, "B": 25}
 })
 _, pvalue = stats.fisher_exact(df)
 pvalue
 ```

 %% Cell type:code id:e3a1b056-3315-494d-9667-3bfa5ebfa863 tags:

 ``` python
 # Scenario 2:
 # Sample size is large, but skew is small

 df = pd.DataFrame({
    "click":    {"A": 500, "B": 550},
    "no-click": {"A": 500, "B": 450}
 })
 _, pvalue = stats.fisher_exact(df)
 pvalue
 ```