Skip to content
Snippets Groups Projects
Dockerfile 803 B
Newer Older
TYLER CARAZA-HARTER's avatar
TYLER CARAZA-HARTER committed
FROM ubuntu:24.04
TYLER CARAZA-HARTER's avatar
TYLER CARAZA-HARTER committed
RUN apt-get update; apt-get install -y wget curl openjdk-11-jdk python3-pip nano
TYLER CARAZA-HARTER's avatar
TYLER CARAZA-HARTER committed

# SPARK
TYLER CARAZA-HARTER's avatar
TYLER CARAZA-HARTER committed
#RUN wget https://archive.apache.org/dist/spark/spark-3.5.5/spark-3.5.5-bin-hadoop3.tgz && tar -xf spark-3.5.5-bin-hadoop3.tgz && rm spark-3.5.5-bin-hadoop3.tgz
RUN wget https://dlcdn.apache.org/spark/spark-3.5.5/spark-3.5.5-bin-hadoop3.tgz && tar -xf spark-3.5.5-bin-hadoop3.tgz && rm spark-3.5.5-bin-hadoop3.tgz

TYLER CARAZA-HARTER's avatar
TYLER CARAZA-HARTER committed
# HDFS
RUN wget https://dlcdn.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz && tar -xf hadoop-3.3.6.tar.gz && rm hadoop-3.3.6.tar.gz

# Jupyter
TYLER CARAZA-HARTER's avatar
TYLER CARAZA-HARTER committed
RUN pip3 install jupyterlab==4.3.5 pandas==2.2.3 pyspark==3.5.5 matplotlib==3.10.1 --break-system-packages
TYLER CARAZA-HARTER's avatar
TYLER CARAZA-HARTER committed

ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64
ENV PATH="${PATH}:/hadoop-3.3.6/bin"
ENV HADOOP_HOME=/hadoop-3.3.6