From 78d8c6b067a059a3c3ca318001616732438c3226 Mon Sep 17 00:00:00 2001
From: TYLER CARAZA-HARTER <tharter@cs544-tharter.cs.wisc.edu>
Date: Sat, 1 Mar 2025 20:14:19 -0600
Subject: [PATCH] HDFS starter

---
 lec/18-hdfs/docker-compose.yml  | 22 ++++++++++++++++++++++
 lec/18-hdfs/hdfs.Dockerfile     |  9 +++++++++
 lec/18-hdfs/notebook.Dockerfile |  4 ++++
 3 files changed, 35 insertions(+)
 create mode 100644 lec/18-hdfs/docker-compose.yml
 create mode 100644 lec/18-hdfs/hdfs.Dockerfile
 create mode 100644 lec/18-hdfs/notebook.Dockerfile

diff --git a/lec/18-hdfs/docker-compose.yml b/lec/18-hdfs/docker-compose.yml
new file mode 100644
index 0000000..46f4e66
--- /dev/null
+++ b/lec/18-hdfs/docker-compose.yml
@@ -0,0 +1,22 @@
+services:
+    hdfs:
+        image: p4-hdfs
+        hostname: main
+        ports:
+        - "127.0.0.1:9870:9870"
+        deploy:
+                resources:
+                        limits:
+                                memory: 2g
+        command: sleep infinity
+
+    nb:
+        image: p4-nb
+        ports:
+        - "127.0.0.1:5000:5000"
+        volumes:
+        - "./nb:/nb"
+        deploy:
+                resources:
+                        limits:
+                                memory: 2g
diff --git a/lec/18-hdfs/hdfs.Dockerfile b/lec/18-hdfs/hdfs.Dockerfile
new file mode 100644
index 0000000..256cf98
--- /dev/null
+++ b/lec/18-hdfs/hdfs.Dockerfile
@@ -0,0 +1,9 @@
+FROM ubuntu:24.04
+RUN apt-get update; apt-get install -y wget curl openjdk-11-jdk python3-pip iproute2 nano
+
+# HDFS
+RUN wget https://dlcdn.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz; tar -xf hadoop-3.3.6.tar.gz; rm hadoop-3.3.6.tar.gz
+
+ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64
+ENV PATH="${PATH}:/hadoop-3.3.6/bin"
+ENV HADOOP_HOME=/hadoop-3.3.6
diff --git a/lec/18-hdfs/notebook.Dockerfile b/lec/18-hdfs/notebook.Dockerfile
new file mode 100644
index 0000000..f9354e9
--- /dev/null
+++ b/lec/18-hdfs/notebook.Dockerfile
@@ -0,0 +1,4 @@
+FROM p4-hdfs
+RUN pip3 install jupyterlab==4.0.3 jupyter-client==8.4.0 pyarrow==17.0.0 pandas==2.2.3 requests==2.31.0 nbconvert==7.9.2 --break-system-packages
+CMD export CLASSPATH=`$HADOOP_HOME/bin/hdfs classpath --glob` && \
+    python3 -m jupyterlab --no-browser --ip=0.0.0.0 --port=5000 --allow-root --NotebookApp.token=''
-- 
GitLab