# Web 3: More Flask

In [1]:
import requests
import time
import urllib.robotparser

### Rate-limited webpage parsing

- `requests` module:
    - `resp = requests.get(<URL>)` method: enables us to send HTTP GET request
    - `resp.status_code`: status code of the response
    - `resp.text`: `str` text content of the response
    - `resp.headers`: `dict` content of response headers

In [2]:
base_url = "http://34.123.132.20:5000/"

### `urllib.robotparser`

- Documentation: https://docs.python.org/3/library/urllib.robotparser.html

In [3]:
rp = urllib.robotparser.RobotFileParser()
rp.set_url(base_url + "/robots.txt")
rp.read()
rp.can_fetch("cs320bot", base_url + "/slow")

True

In [4]:
rp.can_fetch("cs320bot", base_url + "/never")

True

In [5]:
def friendly_get(url):
    if not rp.can_fetch("cs320bot", url):
        raise Exception("you're not supposed to visit that page")
    while True:
        resp = requests.get(url)
        if resp.status_code == 429:
            seconds = int(resp.headers.get("Retry-After", 1))
            print(f"sleep {seconds}")
            time.sleep(seconds)
            continue
        resp.raise_for_status() # raise exception if not 200
        return resp
    
friendly_get(base_url + "/slow").text

'welcome!'