= Spark = {{{ systemctl status spark-cluster.service spark-worker.service ● spark-cluster.service - Spark Init service Loaded: loaded (/etc/systemd/system/spark-cluster.service; enabled; vendor preset: disabled) Active: active (running) since Wed 2024-03-27 09:07:15 CET; 4min 50s ago Process: 2385 ExecStart=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/start-all.sh (code=exited, status=0/SUCCESS) Main PID: 2455 (java) Tasks: 49 (limit: 125782) Memory: 295.6M CGroup: /system.slice/spark-cluster.service └─2455 /usr/lib/jvm/java-17-openjdk-17.0.10.0.7-2.0.1.el8.x86_64/bin/java -cp /opt/spark/spark-3.5.0-bin-hadoop3/conf/:/opt/spark/spark-3.5.0-bin-hadoop3/jars/* -Xmx1g org.apache.spark.deploy.master.> Mär 27 09:07:12 localhost.localdomain systemd[1]: Starting Spark Init service... Mär 27 09:07:12 localhost.localdomain start-all.sh[2418]: starting org.apache.spark.deploy.master.Master, logging to /opt/spark/spark-3.5.0-bin-hadoop3/logs/spark-spark-org.apache.spark.deploy.master.Master-1-l> Mär 27 09:07:15 localhost.localdomain start-all.sh[6473]: localhost: ssh: connect to host localhost port 22: Connection refused Mär 27 09:07:15 localhost.localdomain systemd[1]: Started Spark Init service. ● spark-worker.service - Spark Init service Loaded: loaded (/etc/systemd/system/spark-worker.service; enabled; vendor preset: disabled) Active: active (running) since Wed 2024-03-27 09:07:15 CET; 4min 50s ago Process: 2386 ExecStart=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/start-slave.sh spark://localhost.localdomain:7077 (code=exited, status=0/SUCCESS) Main PID: 2487 (java) Tasks: 49 (limit: 125782) Memory: 248.3M CGroup: /system.slice/spark-worker.service └─2487 /usr/lib/jvm/java-17-openjdk-17.0.10.0.7-2.0.1.el8.x86_64/bin/java -cp /opt/spark/spark-3.5.0-bin-hadoop3/conf/:/opt/spark/spark-3.5.0-bin-hadoop3/jars/* -Xmx1g org.apache.spark.deploy.worker.> Mär 27 09:07:12 localhost.localdomain systemd[1]: Starting Spark Init service... Mär 27 09:07:12 localhost.localdomain start-slave.sh[2386]: This script is deprecated, use start-worker.sh Mär 27 09:07:12 localhost.localdomain start-slave.sh[2426]: starting org.apache.spark.deploy.worker.Worker, logging to /opt/spark/spark-3.5.0-bin-hadoop3/logs/spark-spark-org.apache.spark.deploy.worker.Worker-1> Mär 27 09:07:15 localhost.localdomain systemd[1]: Started Spark Init service. }}} {{{ at /etc/systemd/system/spark-cluster.service [Unit] Description=Spark Init service After=network.target #After=systemd-user-sessions.service #After=network-online.target [Service] Type=forking User=spark Group=spark ExecStart=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/start-all.sh ExecStop=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/stop-all.sh #TimeoutSec=30 #Restart=always #RestartSec= 30 #StartLimitInterval=350 #StartLimitBurst=10 [Install] WantedBy=multi-user.target }}} {{{ cat /etc/systemd/system/spark-cluster.service [Unit] Description=Spark Init service After=network.target #After=systemd-user-sessions.service #After=network-online.target [Service] Type=forking User=spark Group=spark ExecStart=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/start-all.sh ExecStop=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/stop-all.sh #TimeoutSec=30 #Restart=always #RestartSec= 30 #StartLimitInterval=350 #StartLimitBurst=10 [Install] WantedBy=multi-user.target [root@localhost ~]# cat /etc/systemd/system/spark- spark-cluster.service spark-worker.service [root@localhost ~]# cat /etc/systemd/system/spark-worker.service [Unit] Description=Spark Init service After=network.target #After=systemd-user-sessions.service #After=network-online.target [Service] Type=forking User=spark Group=spark ExecStart=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/start-slave.sh spark://localhost.localdomain:7077 ExecStop=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/stop-slave.sh #TimeoutSec=30 #Restart=always #RestartSec= 30 #StartLimitInterval=350 #StartLimitBurst=10 [Install] WantedBy=multi-user.target }}} {{{ SPARK_HOME=/opt/spark/spark-3.5.0-bin-hadoop3 MAIL=/var/spool/mail/spark PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS=notebook --ip=0.0.0.0 --port=8888 --no-browser --certfile=/home/spark/.ssh/tccert.pem --keyfile /home/spark/.ssh/tckey.key LOGNAME=spark PYSPARK_PYTHON=/usr/bin/python3.11 PATH=/home/spark/.local/bin:/home/spark/bin:/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/opt/spark/spark-3.5.0-bin-hadoop3/bin:/opt/spark/spark-3.5.0-bin-hadoop3/sbin:/home/spark/.local/bin/:/opt/spark/spark-3.5.0-bin-hadoop3/bin:/opt/spark/spark-3.5.0-bin-hadoop3/sbin:/home/spark/.local/bin/ }}} {{{ cat start_pyspark.sh #pyspark --packages graphframes:graphframes:0.8.2-spark3.2-s_2.12 --conf spark.local.dir=/home/spark/SparkPkg --driver-class-path /home/spark/jars/ojdbc8.jar export PYSPARK_DRIVER_PYTHON=jupyter export PYSPARK_DRIVER_PYTHON_OPTS='notebook' export PYSPARK_PYTHON=/usr/bin/python3.11 pyspark --packages graphframes:graphframes:0.8.2-spark3.2-s_2.12 --conf spark.local.dir=/tmp --driver-class-path /home/spark/jars/ojdbc11.jar --jars /home/spark/jars/ojdbc11.jar }}} bashrc {{{ # User specific aliases and functions export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-17.0.10.0.7-2.0.1.el8.x86_64 export SPARK_HOME=/opt/spark/spark-3.5.0-bin-hadoop3 export PATH=$PATH:${SPARK_HOME}/bin:${SPARK_HOME}/sbin:~/.local/bin/ export PYSPARK_PYTHON=/usr/bin/python3.11 export PYSPARK_DRIVER_PYTHON=jupyter export PYSPARK_DRIVER_PYTHON_OPTS='notebook --ip=0.0.0.0 --port=8888 --no-browser --certfile=/home/spark/.ssh/tccert.pem --keyfile /home/spark/.ssh/tckey.key' }}}