Skip to content
Snippets Groups Projects
Commit 26470de5 authored by gsingh58's avatar gsingh58
Browse files

lec12 and 13: ipynb files added

parent 406a4231
No related branches found
No related tags found
No related merge requests found
Pipeline #787521 passed
Showing with 958 additions and 0 deletions
%% Cell type:markdown id:cf313adf tags:
# Web 2: Flask
%% Cell type:code id:d55e4bb4-9f29-4f4f-bba6-05054718259b tags:
``` python
import requests
import time
import urllib.robotparser
```
%% Cell type:markdown id:527600aa tags:
### Rate-limited webpage parsing
- `requests` module:
- `resp = requests.get(<URL>)` method: enables us to send HTTP GET request
- `resp.status_code`: status code of the response
- `resp.text`: `str` text content of the response
- `resp.headers`: `dict` content of response headers
%% Cell type:code id:8241e51c tags:
``` python
base_url = "http://34.123.132.20:5000/"
```
%% Cell type:code id:6cc81b85 tags:
``` python
def friendly_get(url):
while True:
resp = requests.get(url)
if resp.status_code == 429:
seconds = int(resp.headers.get("Retry-After", 1))
print(f"sleep {seconds}")
time.sleep(seconds)
continue
resp.raise_for_status() # raise exception if not 200
return resp
friendly_get(base_url + "/slow").text
```
%% Output
'welcome!'
import flask # requires installation if not already installed - pip3 install flask
import time
import json
app = flask.Flask("my application") # name of the web application can be anything
major_counts = {}
last_visit = 0 # TODO: dict of visit times, for each IP
# TODO: create a slow page
# GOAL: don't let people visit this more often than once per 3s
# flask.request.remote_addr: enables us to take action based on the IP address from
# which we receive the request
@app.route("/slow")
def slow():
global last_visit
print("VISITOR", flask.request.remote_addr)
if time.time() - last_visit > 3:
last_visit = time.time()
return "welcome!"
else:
return flask.Response("<b>go away</b>",
status=429,
headers={"Retry-After": "3"})
# TODO: write code for creating a page for time.html
# TEMPLATE semi-static / semi-dynamic
@app.route("/time.html")
def clock():
with open("time.html") as f:
s = f.read()
s = s.replace("REPLACE_ME", str(time.time()))
return s
# TODO: create a dynamic page ha.html
# DYNAMIC
@app.route("/ha.html")
def laugh():
return "ha "*1000
# STATIC
# @ operator is called a "decorator"
@app.route("/")
def home():
with open("index.html") as f:
html = f.read()
return html
if __name__ == "__main__":
app.run(host="0.0.0.0", debug=True, threaded=False)
# app.run never returns, so don't define functions
# after this (the def lines will never be reached)
%% Cell type:markdown id:cf313adf tags:
# Web 2: Flask
%% Cell type:code id:d55e4bb4-9f29-4f4f-bba6-05054718259b tags:
``` python
import requests
import time
```
%% Cell type:markdown id:527600aa tags:
### Rate-limited webpage parsing
- `requests` module:
- `resp = requests.get(<URL>)` method: enables us to send HTTP GET request
- `resp.status_code`: status code of the response
- `resp.text`: `str` text content of the response
- `resp.headers`: `dict` content of response headers
%% Cell type:code id:8241e51c tags:
``` python
base_url = "http://34.123.132.20:5000/"
```
%% Cell type:code id:6cc81b85 tags:
``` python
def friendly_get(url):
while True:
resp = requests.get(url)
resp.raise_for_status() # raise exception if not 200
return resp
friendly_get(base_url + "/slow").text
```
import flask # requires installation if not already installed - pip3 install flask
import time
import json
app = flask.Flask("my application") # name of the web application can be anything
major_counts = {}
last_visit = 0 # TODO: dict of visit times, for each IP
# TODO: create a slow page
# GOAL: don't let people visit this more often than once per 3s
# flask.request.remote_addr: enables us to take action based on the IP address from
# which we receive the request
# TODO: write code for creating a page for time.html
# TEMPLATE semi-static / semi-dynamic
# TODO: create a dynamic page ha.html
# DYNAMIC
# STATIC
# @ operator is called a "decorator"
# STATIC
# @ operator is called a "decorator"
@app.route("/")
def home():
with open("index.html") as f:
html = f.read()
return html
if __name__ == "__main__":
app.run(host="0.0.0.0", debug=True, threaded=False)
# app.run never returns, so don't define functions
# after this (the def lines will never be reached)
%% Cell type:markdown id:cf313adf tags:
# Web 2: Flask
%% Cell type:code id:d55e4bb4-9f29-4f4f-bba6-05054718259b tags:
``` python
import requests
import time
```
%% Cell type:markdown id:527600aa tags:
### Rate-limited webpage parsing
- `requests` module:
- `resp = requests.get(<URL>)` method: enables us to send HTTP GET request
- `resp.status_code`: status code of the response
- `resp.text`: `str` text content of the response
- `resp.headers`: `dict` content of response headers
%% Cell type:code id:8241e51c tags:
``` python
base_url = "http://34.123.132.20:5000/"
```
%% Cell type:code id:6cc81b85 tags:
``` python
def friendly_get(url):
while True:
resp = requests.get(url)
resp.raise_for_status() # raise exception if not 200
return resp
friendly_get(base_url + "/slow").text
```
import flask # requires installation if not already installed - pip3 install flask
import time
import json
app = flask.Flask("my application") # name of the web application can be anything
major_counts = {}
last_visit = 0 # TODO: dict of visit times, for each IP
# TODO: create a slow page
# GOAL: don't let people visit this more often than once per 3s
# flask.request.remote_addr: enables us to take action based on the IP address from
# which we receive the request
# TODO: write code for creating a page for time.html
# TEMPLATE semi-static / semi-dynamic
# TODO: create a dynamic page ha.html
# DYNAMIC
# STATIC
# @ operator is called a "decorator"
# STATIC
# @ operator is called a "decorator"
@app.route("/")
def home():
with open("index.html") as f:
html = f.read()
return html
if __name__ == "__main__":
app.run(host="0.0.0.0", debug=True, threaded=False)
# app.run never returns, so don't define functions
# after this (the def lines will never be reached)
%% Cell type:markdown id:cf313adf tags:
# Web 3: More Flask
%% Cell type:code id:d55e4bb4-9f29-4f4f-bba6-05054718259b tags:
``` python
import requests
import time
import urllib.robotparser
```
%% Cell type:markdown id:527600aa tags:
### Rate-limited webpage parsing
- `requests` module:
- `resp = requests.get(<URL>)` method: enables us to send HTTP GET request
- `resp.status_code`: status code of the response
- `resp.text`: `str` text content of the response
- `resp.headers`: `dict` content of response headers
%% Cell type:code id:8241e51c tags:
``` python
base_url = "http://34.123.132.20:5000/"
```
%% Cell type:markdown id:23ba100b tags:
### `urllib.robotparser`
- Documentation: https://docs.python.org/3/library/urllib.robotparser.html
%% Cell type:code id:379c3ae5-7344-45b1-88c3-b35f0bd8eb5b tags:
``` python
rp = urllib.robotparser.RobotFileParser()
rp.set_url(base_url + "/robots.txt")
rp.read()
rp.can_fetch("cs320bot", base_url + "/slow")
```
%% Output
True
%% Cell type:code id:2e3fb01c-4281-4cbf-8828-98e04d27d09a tags:
``` python
rp.can_fetch("cs320bot", base_url + "/never")
```
%% Output
True
%% Cell type:code id:6cc81b85 tags:
``` python
def friendly_get(url):
if not rp.can_fetch("cs320bot", url):
raise Exception("you're not supposed to visit that page")
while True:
resp = requests.get(url)
if resp.status_code == 429:
seconds = int(resp.headers.get("Retry-After", 1))
print(f"sleep {seconds}")
time.sleep(seconds)
continue
resp.raise_for_status() # raise exception if not 200
return resp
friendly_get(base_url + "/slow").text
```
%% Output
'welcome!'
import flask # requires installation if not already installed - pip3 install flask
import time
import json
app = flask.Flask("my application") # name of the web application can be anything
major_counts = {}
last_visit = 0 # TODO: dict of visit times, for each IP
# TODO: create a survey page
# flask.request.args: enables us to get the arguments passed as part of the URL
@app.route("/survey")
def survey():
major = flask.request.args.get("major", "unkown")
if not major in major_counts:
major_counts[major] = 0
major_counts[major] += 1
return "MAJORS: \n" + json.dumps(major_counts)
# TODO: create an add page
@app.route("/add")
def adder():
args = dict(flask.request.args)
try:
x = float(args["x"])
y = float(args["y"])
except KeyError:
return "Please specify x and y."
return f"{x} + {y} = {x+y}"
# TODO: create a never page
@app.route("/never")
def never():
return "humans only, no bots allowed!"
# TODO: create a robots.txt page
# flask.Response: enables us to create a response object instance
# Arguments: str representing reponse, headers dict representing metadata
@app.route("/robots.txt")
def bot_rules():
return flask.Response("""\
User-Agent: *
Disallow: /never
""", headers={"Content-Type": "text/plain"})
# TODO: create a slow page
# GOAL: don't let people visit this more often than once per 3s
# flask.request.remote_addr: enables us to take action based on the IP address from
# which we receive the request
@app.route("/slow")
def slow():
global last_visit
print("VISITOR", flask.request.remote_addr)
if time.time() - last_visit > 3:
last_visit = time.time()
return "welcome!"
else:
return flask.Response("<b>go away</b>",
status=429,
headers={"Retry-After": "3"})
# TODO: write code for creating a page for time.html
# TEMPLATE semi-static / semi-dynamic
@app.route("/time.html")
def clock():
with open("time.html") as f:
s = f.read()
s = s.replace("REPLACE_ME", str(time.time()))
return s
# TODO: create a dynamic page ha.html
# DYNAMIC
@app.route("/ha.html")
def laugh():
return "ha "*1000
# STATIC
# @ operator is called a "decorator"
@app.route("/")
def home():
with open("index.html") as f:
html = f.read()
return html
if __name__ == "__main__":
app.run(host="0.0.0.0", debug=True, threaded=False)
# app.run never returns, so don't define functions
# after this (the def lines will never be reached)
%% Cell type:markdown id:cf313adf tags:
# Web 3: More Flask
%% Cell type:code id:d55e4bb4-9f29-4f4f-bba6-05054718259b tags:
``` python
import requests
import time
```
%% Cell type:markdown id:527600aa tags:
### Rate-limited webpage parsing
- `requests` module:
- `resp = requests.get(<URL>)` method: enables us to send HTTP GET request
- `resp.status_code`: status code of the response
- `resp.text`: `str` text content of the response
- `resp.headers`: `dict` content of response headers
%% Cell type:code id:8241e51c tags:
``` python
base_url = "http://34.123.132.20:5000/"
```
%% Cell type:markdown id:23ba100b tags:
### `urllib.robotparser`
- Documentation: https://docs.python.org/3/library/urllib.robotparser.html
%% Cell type:code id:379c3ae5-7344-45b1-88c3-b35f0bd8eb5b tags:
``` python
```
%% Cell type:code id:2e3fb01c-4281-4cbf-8828-98e04d27d09a tags:
``` python
```
%% Cell type:code id:6cc81b85 tags:
``` python
def friendly_get(url):
while True:
resp = requests.get(url)
resp.raise_for_status() # raise exception if not 200
return resp
friendly_get(base_url + "/slow").text
```
import flask # requires installation if not already installed - pip3 install flask
import time
import json
app = flask.Flask("my application") # name of the web application can be anything
major_counts = {}
last_visit = 0 # TODO: dict of visit times, for each IP
# TODO: create a survey page
# flask.request.args: enables us to get the arguments passed as part of the URL
# TODO: create an add page
# TODO: create a never page
# TODO: create a robots.txt page
# flask.Response: enables us to create a response object instance
# Arguments: str representing reponse, headers dict representing metadata
# TODO: create a slow page
# GOAL: don't let people visit this more often than once per 3s
# flask.request.remote_addr: enables us to take action based on the IP address from
# which we receive the request
# TODO: write code for creating a page for time.html
# TEMPLATE semi-static / semi-dynamic
# TODO: create a dynamic page ha.html
# DYNAMIC
# STATIC
# @ operator is called a "decorator"
# STATIC
# @ operator is called a "decorator"
@app.route("/")
def home():
with open("index.html") as f:
html = f.read()
return html
if __name__ == "__main__":
app.run(host="0.0.0.0", debug=True, threaded=False)
# app.run never returns, so don't define functions
# after this (the def lines will never be reached)
%% Cell type:markdown id:cf313adf tags:
# Web 3: More Flask
%% Cell type:code id:d55e4bb4-9f29-4f4f-bba6-05054718259b tags:
``` python
import requests
import time
```
%% Cell type:markdown id:527600aa tags:
### Rate-limited webpage parsing
- `requests` module:
- `resp = requests.get(<URL>)` method: enables us to send HTTP GET request
- `resp.status_code`: status code of the response
- `resp.text`: `str` text content of the response
- `resp.headers`: `dict` content of response headers
%% Cell type:code id:8241e51c tags:
``` python
base_url = "http://34.123.132.20:5000/"
```
%% Cell type:markdown id:23ba100b tags:
### `urllib.robotparser`
- Documentation: https://docs.python.org/3/library/urllib.robotparser.html
%% Cell type:code id:379c3ae5-7344-45b1-88c3-b35f0bd8eb5b tags:
``` python
```
%% Cell type:code id:2e3fb01c-4281-4cbf-8828-98e04d27d09a tags:
``` python
```
%% Cell type:code id:6cc81b85 tags:
``` python
def friendly_get(url):
while True:
resp = requests.get(url)
resp.raise_for_status() # raise exception if not 200
return resp
friendly_get(base_url + "/slow").text
```
import flask # requires installation if not already installed - pip3 install flask
import time
import json
app = flask.Flask("my application") # name of the web application can be anything
major_counts = {}
last_visit = 0 # TODO: dict of visit times, for each IP
# TODO: create a survey page
# flask.request.args: enables us to get the arguments passed as part of the URL
# TODO: create an add page
# TODO: create a never page
# TODO: create a robots.txt page
# flask.Response: enables us to create a response object instance
# Arguments: str representing reponse, headers dict representing metadata
# TODO: create a slow page
# GOAL: don't let people visit this more often than once per 3s
# flask.request.remote_addr: enables us to take action based on the IP address from
# which we receive the request
# TODO: write code for creating a page for time.html
# TEMPLATE semi-static / semi-dynamic
# TODO: create a dynamic page ha.html
# DYNAMIC
# STATIC
# @ operator is called a "decorator"
# STATIC
# @ operator is called a "decorator"
@app.route("/")
def home():
with open("index.html") as f:
html = f.read()
return html
if __name__ == "__main__":
app.run(host="0.0.0.0", debug=True, threaded=False)
# app.run never returns, so don't define functions
# after this (the def lines will never be reached)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment