Skip to content
Snippets Groups Projects
Commit 6af3f2d8 authored by TYLER CARAZA-HARTER's avatar TYLER CARAZA-HARTER
Browse files

lec1

parent 322ed7d3
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:34b260b6-a504-4cce-a9b3-a3566a8dbeb5 tags:
``` python
import random
import requests
import pandas as pd
r = requests.get("https://pages.cs.wisc.edu/~harter/cs544/data/wi-stations/stations.txt")
r.raise_for_status()
stations = r.text.strip().split("\n")
stations = random.sample(stations, k=10)
workload = random.choices(stations, k=100, weights=[0.3, 0.2] + [0.5/8]*8)
```
%% Cell type:code id:c65f907c-049f-4110-a5ca-75772e76fdcf tags:
``` python
import numpy as np
np.quantile([1,2,4,5], 0.5)
```
%% Output
np.float64(3.0)
%% Cell type:code id:e913b0fa-138c-4fb1-99df-738e79b7b5ad tags:
``` python
" ".join(workload)
```
%% Output
'US1WIIW0014 US1WIPC0020 USC00478805 US1WIBR0019 US1WIPC0020 US1WIBR0019 USC00474391 US1WIPC0020 USC00470062 USC00478805 US1WIBR0019 US1WIBR0019 US1WIIW0014 USC00478329 US1WIIW0014 US1WIBR0019 USC00478329 USC00470062 US1WIBR0019 USC00474391 US1WIMM0001 US1WIIW0014 US1WIBR0019 US1WIBR0019 US1WIMM0001 US1WIIW0014 US1WIBR0019 US1WIBR0019 USC00478805 US1WIIW0014 US1WIBR0019 US1WIIW0014 US1WIDG0011 US1WIIW0014 US1WIIW0014 US1WIDG0011 US1WIDG0011 USC00478805 US1WIIW0014 USC00478329 US1WIDG0011 US1WIBR0019 US1WIIW0014 US1WIIW0014 US1WIDG0011 USC00478805 USC00474391 US1WIBR0019 US1WIMM0001 USC00478329 US1WIBR0019 US1WIIW0014 US1WIPC0020 US1WIIW0014 US1WIBR0019 US1WIBR0019 USC00474391 US1WIBR0019 US1WIDG0011 US1WIVL0014 US1WIIW0014 US1WIIW0014 USC00478805 USC00470062 US1WIIW0014 USC00474391 US1WIBR0019 US1WIVL0014 US1WIIW0014 US1WIBR0019 USC00470062 US1WIBR0019 US1WIBR0019 US1WIPC0020 USC00478805 US1WIPC0020 USC00470062 USC00478329 US1WIPC0020 US1WIMM0001 USC00478329 USC00478329 USC00474391 US1WIBR0019 US1WIBR0019 USC00478805 USC00470062 US1WIIW0014 USC00470062 US1WIMM0001 US1WIBR0019 USC00474391 US1WIIW0014 US1WIPC0020 US1WIIW0014 US1WIIW0014 US1WIBR0019 US1WIBR0019 US1WIIW0014 US1WIPC0020'
%% Cell type:code id:3a66f04d-2361-4ca9-8556-d781d9e5b96c tags:
``` python
import time
time.time() # seconds since Jan 1, 1970
```
%% Output
1739200561.2493458
%% Cell type:code id:ed3e0c8a-6831-411c-b26c-2d14555127f0 tags:
``` python
start = time.time()
time.sleep(2)
end = time.time()
(end-start) * 1000 # milliseconds
```
%% Output
2000.2596378326416
%% Cell type:code id:508955f2-0071-4683-aae8-a8ddf8601570 tags:
``` python
# Example 1: FIFO Policy
cache_size = 3
cache = {} # key=station name, value=DataFrame with weather data for that station
evict_order = [] # evict from the left, try to keep whatever is on the right
# stats
hits = [] # 1 is a hit, 0 is a miss
latency_ms = [] # latency of get_station in milliseconds
def get_station(station):
start = time.time()
if station in cache:
hits.append(1)
print("hit", end=" ")
df = cache[station]
else:
hits.append(0)
print("miss", end=" ")
df = pd.read_csv(f"https://pages.cs.wisc.edu/~harter/cs544/data/wi-stations/{station}.csv.gz",
names=["station", "date", "element", "value", "m", "q", "s", "obs"], low_memory=False)
cache[station] = df
evict_order.append(station)
if len(cache) > cache_size:
#print("evict!")
victim = evict_order.pop(0)
cache.pop(victim)
#print("CACHE:", evict_order)
end = time.time()
latency_ms.append((end-start)*1000)
return df
for station in workload:
get_station(station)
```
%% Output
miss miss miss miss hit hit miss miss miss miss miss hit miss miss hit hit hit miss miss miss miss miss miss hit hit hit hit hit miss hit hit hit miss miss hit hit hit hit hit miss hit miss hit hit miss miss miss miss miss miss hit miss miss hit miss hit miss hit miss miss miss hit miss miss hit miss miss miss miss hit miss miss hit miss miss hit miss miss miss miss hit hit miss miss hit miss miss miss hit miss miss miss miss miss hit hit miss hit hit hit
%% Cell type:code id:01d83645-241b-478c-9142-dde57d778b63 tags:
``` python
print()
print("Hits:", sum(hits))
print("Hit Rate:", sum(hits) / len(hits))
print("Avg Latency:", sum(latency_ms) / len(latency_ms))
print("Median Latency:", np.quantile(latency_ms, 0.5))
print("p99 Latency:", np.quantile(latency_ms, 0.99))
```
%% Output
Hits: 40
Hit Rate: 0.4
Avg Latency: 16.57418727874756
Median Latency: 15.306830406188965
p99 Latency: 93.56653213500977
%% Cell type:code id:2f4e837c-da54-48f3-bd04-0465064c2df6 tags:
``` python
# Example 2: LRU Policy
cache_size = 3
cache = {} # key=station name, value=DataFrame with weather data for that station
evict_order = [] # evict from the left, try to keep whatever is on the right
# stats
hits = [] # 1 is a hit, 0 is a miss
latency_ms = [] # latency of get_station in milliseconds
def get_station(station):
start = time.time()
if station in cache:
hits.append(1)
print("hit", end=" ")
df = cache[station]
evict_order.remove(station)
evict_order.append(station)
else:
hits.append(0)
print("miss", end=" ")
df = pd.read_csv(f"https://pages.cs.wisc.edu/~harter/cs544/data/wi-stations/{station}.csv.gz",
names=["station", "date", "element", "value", "m", "q", "s", "obs"], low_memory=False)
cache[station] = df
evict_order.append(station)
if len(cache) > cache_size:
#print("evict!")
victim = evict_order.pop(0)
cache.pop(victim)
#print("CACHE:", evict_order)
end = time.time()
latency_ms.append((end-start)*1000)
return df
for station in workload:
get_station(station)
```
%% Output
miss miss miss miss hit hit miss hit miss miss miss hit miss miss hit hit hit miss hit miss miss miss miss hit hit hit hit hit miss hit hit hit miss hit hit hit hit miss hit miss miss miss miss hit hit miss miss miss miss miss hit miss miss hit hit hit miss hit miss miss miss hit miss miss hit miss miss miss miss hit miss hit hit miss miss hit miss miss hit miss hit hit miss miss hit miss miss miss hit miss miss miss miss miss hit hit miss hit hit hit
%% Cell type:code id:70dfb612-f83b-4d78-899d-afea29ea24de tags:
``` python
print()
print("Hits:", sum(hits))
print("Hit Rate:", sum(hits) / len(hits))
print("Avg Latency:", sum(latency_ms) / len(latency_ms))
print("Median Latency:", np.quantile(latency_ms, 0.5))
print("p99 Latency:", np.quantile(latency_ms, 0.99))
```
%% Output
Hits: 44
Hit Rate: 0.44
Avg Latency: 16.295619010925293
Median Latency: 15.197038650512695
p99 Latency: 95.38349866867067
%% Cell type:code id:1afdfed8-d89b-4bc1-a0a7-688d751bc9c6 tags:
``` python
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment