import re
import netaddr
import pandas as pd
from bisect import bisect
ips = pd.read_csv("ip2location.csv")
def lookup_region(ipaddr):
ipaddr = re.sub(r'[a-zA-Z]', '0', ipaddr)
my_ip = int(netaddr.IPAddress(ipaddr))
idx = bisect(ips['low'], my_ip)
return ips.iloc[idx - 1]['region']
class Filing:
def __init__(self, html):
self.dates = []
for item in re.findall(r"((19|20)\d{2}-(0[1-9]|1[0-2])-(0[1-9]|1[0-9]|2[0-9]|3[0-1]))", html):
self.sic = None
if re.findall(r"SIC=(\d+)", html):
self.sic = int(re.findall(r"SIC=(\d+)", html)[0])
self.addresses = []
for addr_html in re.findall(r'<div class="mailer">([\s\S]+?)</div>', html):
lines = []
found = re.findall(r'<span class="mailerAddress">([\s\S]+?)</span>', addr_html)
if found:
for line in found:
def state(self):
for address in self.addresses:
statecode = re.findall(r"\s([A-Z]{2})\s\d{5}", address)
if statecode:
return statecode[0]
return None
def __repr__(self):
return f"<{self.sic}>"
