Skip to content
Snippets Groups Projects
Commit 80ca03dd authored by JAE HYEON PARK's avatar JAE HYEON PARK
Browse files

mp6 completed.

parent 24093392
Branches master
No related tags found
No related merge requests found
import re
import netaddr
import pandas as pd
from bisect import bisect
ips = pd.read_csv("ip2location.csv")
def lookup_region(ipaddr):
ipaddr = re.sub(r'[a-zA-Z]', '0', ipaddr)
my_ip = int(netaddr.IPAddress(ipaddr))
idx = bisect(ips['low'], my_ip)
return ips.iloc[idx - 1]['region']
class Filing:
def __init__(self, html):
self.dates = []
for item in re.findall(r"((19|20)\d{2}-(0[1-9]|1[0-2])-(0[1-9]|1[0-9]|2[0-9]|3[0-1]))", html):
self.dates.append(item[0])
self.sic = None
if re.findall(r"SIC=(\d+)", html):
self.sic = int(re.findall(r"SIC=(\d+)", html)[0])
self.addresses = []
for addr_html in re.findall(r'<div class="mailer">([\s\S]+?)</div>', html):
lines = []
found = re.findall(r'<span class="mailerAddress">([\s\S]+?)</span>', addr_html)
if found:
for line in found:
lines.append(line.strip())
self.addresses.append("\n".join(lines))
def state(self):
for address in self.addresses:
statecode = re.findall(r"\s([A-Z]{2})\s\d{5}", address)
if statecode:
return statecode[0]
return None
def __repr__(self):
return f"<{self.sic}>"
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment