Skip to content
Snippets Groups Projects
Commit 28ae67b1 authored by TYLER CARAZA-HARTER's avatar TYLER CARAZA-HARTER
Browse files

lec 20

parent 05a7ac90
Branches sadBucky
No related tags found
No related merge requests found
FROM ubuntu:24.04
RUN apt-get update; apt-get install -y wget curl openjdk-11-jdk python3-pip nano
# SPARK
RUN wget https://archive.apache.org/dist/spark/spark-3.5.5/spark-3.5.5-bin-hadoop3.tgz && tar -xf spark-3.5.5-bin-hadoop3.tgz && rm spark-3.5.5-bin-hadoop3.tgz
# HDFS
RUN wget https://dlcdn.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz && tar -xf hadoop-3.3.6.tar.gz && rm hadoop-3.3.6.tar.gz
# Jupyter
RUN pip3 install jupyterlab==4.3.5 pandas==2.2.3 pyspark==3.5.5 --break-system-packages
ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64
ENV PATH="${PATH}:/hadoop-3.3.6/bin"
ENV HADOOP_HOME=/hadoop-3.3.6
services:
nb:
image: spark-demo
ports:
- "127.0.0.1:5000:5000"
- "127.0.0.1:4040:4040"
volumes:
- "./nb:/nb"
command: python3 -m jupyterlab --no-browser --ip=0.0.0.0 --port=5000 --allow-root --NotebookApp.token=''
nn:
image: spark-demo
hostname: nn
command: sh -c "hdfs namenode -format -force && hdfs namenode -D dfs.replication=1 -fs hdfs://nn:9000"
dn:
image: spark-demo
command: hdfs datanode -fs hdfs://nn:9000
spark-boss:
image: spark-demo
hostname: boss
command: sh -c "/spark-3.5.5-bin-hadoop3/sbin/start-master.sh && sleep infinity"
spark-worker:
image: spark-demo
command: sh -c "/spark-3.5.5-bin-hadoop3/sbin/start-worker.sh spark://boss:7077 -c 2 -m 2g && sleep infinity"
deploy:
replicas: 2
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment