JIHONG MIN
main

Repository

#Inside the server container
python3 client.py DbToHdfs
python3 client.py BlockLocations -f <file_path>
python3 client.py PartitionByCounty
python3 client.py CalcAvgLoan -c <county_code>
docker build . -f Dockerfile.hdfs -t p4-hdfs
docker build . -f Dockerfile.namenode -t p4-nn
docker build . -f Dockerfile.datanode -t p4-dn
docker build . -f Dockerfile.mysql -t p4-mysql
docker build . -f Dockerfile.server -t p4-server
export PROJECT=p4
mysql> show tables;
+-----------------+
| Tables_in_CS544 |
+-----------------+
| loan_types      |
| loans           |
+-----------------+
mysql> select count(*) from new_table;
+----------+
| count(*) |
+----------+
|   426716 |
+----------+
14.4 M  43.2 M  hdfs://boss:9000/hdma-wi-2021.parquet
{'dd8706a32f34': 6, '21a88993bb15': 4, '47c17821001f': 5}
r = requests.get("http://boss:9870/webhdfs/v1/<filepath>?op=GETFILESTATUS")
r.raise_for_status()
print(r.json())
{'FileStatus': {...
  'blockSize': 1048576,
  ...
  'length': 16642976,
  ...
  'replication': 1,
  'storagePolicy': 0,
  'type': 'FILE'}}
  ```
  ├── partitioned/
  │   ├── 55001.parquet
  │   ├── 55003.parquet
  │   └── ...
  ```
14.4 M  43.2 M  hdfs://boss:9000/hdma-wi-2021.parquet
19.3 M  19.3 M  hdfs://boss:9000/partitioned
docker build . -f Dockerfile.hdfs -t p4-hdfs
docker build . -f Dockerfile.namenode -t p4-nn
docker build . -f Dockerfile.datanode -t p4-dn
docker build . -f Dockerfile.mysql -t p4-mysql
docker build . -f Dockerfile.server -t p4-server
docker compose up -d