Spark
systemctl status spark-cluster.service spark-worker.service
● spark-cluster.service - Spark Init service
Loaded: loaded (/etc/systemd/system/spark-cluster.service; enabled; vendor preset: disabled)
Active: active (running) since Wed 2024-03-27 09:07:15 CET; 4min 50s ago
Process: 2385 ExecStart=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/start-all.sh (code=exited, status=0/SUCCESS)
Main PID: 2455 (java)
Tasks: 49 (limit: 125782)
Memory: 295.6M
CGroup: /system.slice/spark-cluster.service
└─2455 /usr/lib/jvm/java-17-openjdk-17.0.10.0.7-2.0.1.el8.x86_64/bin/java -cp /opt/spark/spark-3.5.0-bin-hadoop3/conf/:/opt/spark/spark-3.5.0-bin-hadoop3/jars/* -Xmx1g org.apache.spark.deploy.master.>
Mär 27 09:07:12 localhost.localdomain systemd[1]: Starting Spark Init service...
Mär 27 09:07:12 localhost.localdomain start-all.sh[2418]: starting org.apache.spark.deploy.master.Master, logging to /opt/spark/spark-3.5.0-bin-hadoop3/logs/spark-spark-org.apache.spark.deploy.master.Master-1-l>
Mär 27 09:07:15 localhost.localdomain start-all.sh[6473]: localhost: ssh: connect to host localhost port 22: Connection refused
Mär 27 09:07:15 localhost.localdomain systemd[1]: Started Spark Init service.
● spark-worker.service - Spark Init service
Loaded: loaded (/etc/systemd/system/spark-worker.service; enabled; vendor preset: disabled)
Active: active (running) since Wed 2024-03-27 09:07:15 CET; 4min 50s ago
Process: 2386 ExecStart=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/start-slave.sh spark://localhost.localdomain:7077 (code=exited, status=0/SUCCESS)
Main PID: 2487 (java)
Tasks: 49 (limit: 125782)
Memory: 248.3M
CGroup: /system.slice/spark-worker.service
└─2487 /usr/lib/jvm/java-17-openjdk-17.0.10.0.7-2.0.1.el8.x86_64/bin/java -cp /opt/spark/spark-3.5.0-bin-hadoop3/conf/:/opt/spark/spark-3.5.0-bin-hadoop3/jars/* -Xmx1g org.apache.spark.deploy.worker.>
Mär 27 09:07:12 localhost.localdomain systemd[1]: Starting Spark Init service...
Mär 27 09:07:12 localhost.localdomain start-slave.sh[2386]: This script is deprecated, use start-worker.sh
Mär 27 09:07:12 localhost.localdomain start-slave.sh[2426]: starting org.apache.spark.deploy.worker.Worker, logging to /opt/spark/spark-3.5.0-bin-hadoop3/logs/spark-spark-org.apache.spark.deploy.worker.Worker-1>
Mär 27 09:07:15 localhost.localdomain systemd[1]: Started Spark Init service.at /etc/systemd/system/spark-cluster.service [Unit] Description=Spark Init service After=network.target #After=systemd-user-sessions.service #After=network-online.target [Service] Type=forking User=spark Group=spark ExecStart=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/start-all.sh ExecStop=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/stop-all.sh #TimeoutSec=30 #Restart=always #RestartSec= 30 #StartLimitInterval=350 #StartLimitBurst=10 [Install] WantedBy=multi-user.target
cat /etc/systemd/system/spark-cluster.service [Unit] Description=Spark Init service After=network.target #After=systemd-user-sessions.service #After=network-online.target [Service] Type=forking User=spark Group=spark ExecStart=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/start-all.sh ExecStop=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/stop-all.sh #TimeoutSec=30 #Restart=always #RestartSec= 30 #StartLimitInterval=350 #StartLimitBurst=10 [Install] WantedBy=multi-user.target [root@localhost ~]# cat /etc/systemd/system/spark- spark-cluster.service spark-worker.service [root@localhost ~]# cat /etc/systemd/system/spark-worker.service [Unit] Description=Spark Init service After=network.target #After=systemd-user-sessions.service #After=network-online.target [Service] Type=forking User=spark Group=spark ExecStart=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/start-slave.sh spark://localhost.localdomain:7077 ExecStop=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/stop-slave.sh #TimeoutSec=30 #Restart=always #RestartSec= 30 #StartLimitInterval=350 #StartLimitBurst=10 [Install] WantedBy=multi-user.target
SPARK_HOME=/opt/spark/spark-3.5.0-bin-hadoop3 MAIL=/var/spool/mail/spark PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS=notebook --ip=0.0.0.0 --port=8888 --no-browser --certfile=/home/spark/.ssh/tccert.pem --keyfile /home/spark/.ssh/tckey.key LOGNAME=spark PYSPARK_PYTHON=/usr/bin/python3.11 PATH=/home/spark/.local/bin:/home/spark/bin:/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/opt/spark/spark-3.5.0-bin-hadoop3/bin:/opt/spark/spark-3.5.0-bin-hadoop3/sbin:/home/spark/.local/bin/:/opt/spark/spark-3.5.0-bin-hadoop3/bin:/opt/spark/spark-3.5.0-bin-hadoop3/sbin:/home/spark/.local/bin/
cat start_pyspark.sh #pyspark --packages graphframes:graphframes:0.8.2-spark3.2-s_2.12 --conf spark.local.dir=/home/spark/SparkPkg --driver-class-path /home/spark/jars/ojdbc8.jar export PYSPARK_DRIVER_PYTHON=jupyter export PYSPARK_DRIVER_PYTHON_OPTS='notebook' export PYSPARK_PYTHON=/usr/bin/python3.11 pyspark --packages graphframes:graphframes:0.8.2-spark3.2-s_2.12 --conf spark.local.dir=/tmp --driver-class-path /home/spark/jars/ojdbc11.jar --jars /home/spark/jars/ojdbc11.jar
bashrc
# User specific aliases and functions
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-17.0.10.0.7-2.0.1.el8.x86_64
export SPARK_HOME=/opt/spark/spark-3.5.0-bin-hadoop3
export PATH=$PATH:${SPARK_HOME}/bin:${SPARK_HOME}/sbin:~/.local/bin/
export PYSPARK_PYTHON=/usr/bin/python3.11
export PYSPARK_DRIVER_PYTHON=jupyter
export PYSPARK_DRIVER_PYTHON_OPTS='notebook --ip=0.0.0.0 --port=8888 --no-browser --certfile=/home/spark/.ssh/tccert.pem --keyfile /home/spark/.ssh/tckey.key'