Skip to content
Snippets Groups Projects
Commit 78d8c6b0 authored by TYLER CARAZA-HARTER's avatar TYLER CARAZA-HARTER
Browse files

HDFS starter

parent b027ad84
No related branches found
No related tags found
No related merge requests found
services:
hdfs:
image: p4-hdfs
hostname: main
ports:
- "127.0.0.1:9870:9870"
deploy:
resources:
limits:
memory: 2g
command: sleep infinity
nb:
image: p4-nb
ports:
- "127.0.0.1:5000:5000"
volumes:
- "./nb:/nb"
deploy:
resources:
limits:
memory: 2g
FROM ubuntu:24.04
RUN apt-get update; apt-get install -y wget curl openjdk-11-jdk python3-pip iproute2 nano
# HDFS
RUN wget https://dlcdn.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz; tar -xf hadoop-3.3.6.tar.gz; rm hadoop-3.3.6.tar.gz
ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64
ENV PATH="${PATH}:/hadoop-3.3.6/bin"
ENV HADOOP_HOME=/hadoop-3.3.6
FROM p4-hdfs
RUN pip3 install jupyterlab==4.0.3 jupyter-client==8.4.0 pyarrow==17.0.0 pandas==2.2.3 requests==2.31.0 nbconvert==7.9.2 --break-system-packages
CMD export CLASSPATH=`$HADOOP_HOME/bin/hdfs classpath --glob` && \
python3 -m jupyterlab --no-browser --ip=0.0.0.0 --port=5000 --allow-root --NotebookApp.token=''
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment