Spark

systemctl status spark-cluster.service spark-worker.service 
● spark-cluster.service - Spark Init service
   Loaded: loaded (/etc/systemd/system/spark-cluster.service; enabled; vendor preset: disabled)
   Active: active (running) since Wed 2024-03-27 09:07:15 CET; 4min 50s ago
  Process: 2385 ExecStart=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/start-all.sh (code=exited, status=0/SUCCESS)
 Main PID: 2455 (java)
    Tasks: 49 (limit: 125782)
   Memory: 295.6M
   CGroup: /system.slice/spark-cluster.service
           └─2455 /usr/lib/jvm/java-17-openjdk-17.0.10.0.7-2.0.1.el8.x86_64/bin/java -cp /opt/spark/spark-3.5.0-bin-hadoop3/conf/:/opt/spark/spark-3.5.0-bin-hadoop3/jars/* -Xmx1g org.apache.spark.deploy.master.>

Mär 27 09:07:12 localhost.localdomain systemd[1]: Starting Spark Init service...
Mär 27 09:07:12 localhost.localdomain start-all.sh[2418]: starting org.apache.spark.deploy.master.Master, logging to /opt/spark/spark-3.5.0-bin-hadoop3/logs/spark-spark-org.apache.spark.deploy.master.Master-1-l>
Mär 27 09:07:15 localhost.localdomain start-all.sh[6473]: localhost: ssh: connect to host localhost port 22: Connection refused
Mär 27 09:07:15 localhost.localdomain systemd[1]: Started Spark Init service.

● spark-worker.service - Spark Init service
   Loaded: loaded (/etc/systemd/system/spark-worker.service; enabled; vendor preset: disabled)
   Active: active (running) since Wed 2024-03-27 09:07:15 CET; 4min 50s ago
  Process: 2386 ExecStart=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/start-slave.sh spark://localhost.localdomain:7077 (code=exited, status=0/SUCCESS)
 Main PID: 2487 (java)
    Tasks: 49 (limit: 125782)
   Memory: 248.3M
   CGroup: /system.slice/spark-worker.service
           └─2487 /usr/lib/jvm/java-17-openjdk-17.0.10.0.7-2.0.1.el8.x86_64/bin/java -cp /opt/spark/spark-3.5.0-bin-hadoop3/conf/:/opt/spark/spark-3.5.0-bin-hadoop3/jars/* -Xmx1g org.apache.spark.deploy.worker.>

Mär 27 09:07:12 localhost.localdomain systemd[1]: Starting Spark Init service...
Mär 27 09:07:12 localhost.localdomain start-slave.sh[2386]: This script is deprecated, use start-worker.sh
Mär 27 09:07:12 localhost.localdomain start-slave.sh[2426]: starting org.apache.spark.deploy.worker.Worker, logging to /opt/spark/spark-3.5.0-bin-hadoop3/logs/spark-spark-org.apache.spark.deploy.worker.Worker-1>
Mär 27 09:07:15 localhost.localdomain systemd[1]: Started Spark Init service.

at /etc/systemd/system/spark-cluster.service
[Unit]
Description=Spark Init service
After=network.target
#After=systemd-user-sessions.service
#After=network-online.target

[Service]
Type=forking
User=spark
Group=spark
ExecStart=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/start-all.sh
ExecStop=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/stop-all.sh
#TimeoutSec=30
#Restart=always
#RestartSec= 30
#StartLimitInterval=350
#StartLimitBurst=10

[Install]
WantedBy=multi-user.target

cat /etc/systemd/system/spark-cluster.service
[Unit]
Description=Spark Init service
After=network.target
#After=systemd-user-sessions.service
#After=network-online.target

[Service]
Type=forking
User=spark
Group=spark
ExecStart=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/start-all.sh
ExecStop=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/stop-all.sh
#TimeoutSec=30
#Restart=always
#RestartSec= 30
#StartLimitInterval=350
#StartLimitBurst=10

[Install]
WantedBy=multi-user.target
[root@localhost ~]# cat /etc/systemd/system/spark-
spark-cluster.service  spark-worker.service   
[root@localhost ~]# cat /etc/systemd/system/spark-worker.service 
[Unit]
Description=Spark Init service
After=network.target
#After=systemd-user-sessions.service
#After=network-online.target

[Service]
Type=forking
User=spark
Group=spark
ExecStart=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/start-slave.sh spark://localhost.localdomain:7077
ExecStop=/opt/spark/spark-3.5.0-bin-hadoop3/sbin/stop-slave.sh
#TimeoutSec=30
#Restart=always
#RestartSec= 30
#StartLimitInterval=350
#StartLimitBurst=10

[Install]
WantedBy=multi-user.target

SPARK_HOME=/opt/spark/spark-3.5.0-bin-hadoop3
MAIL=/var/spool/mail/spark
PYSPARK_DRIVER_PYTHON=jupyter
PYSPARK_DRIVER_PYTHON_OPTS=notebook --ip=0.0.0.0 --port=8888 --no-browser --certfile=/home/spark/.ssh/tccert.pem --keyfile /home/spark/.ssh/tckey.key
LOGNAME=spark
PYSPARK_PYTHON=/usr/bin/python3.11
PATH=/home/spark/.local/bin:/home/spark/bin:/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/opt/spark/spark-3.5.0-bin-hadoop3/bin:/opt/spark/spark-3.5.0-bin-hadoop3/sbin:/home/spark/.local/bin/:/opt/spark/spark-3.5.0-bin-hadoop3/bin:/opt/spark/spark-3.5.0-bin-hadoop3/sbin:/home/spark/.local/bin/

cat start_pyspark.sh
#pyspark --packages graphframes:graphframes:0.8.2-spark3.2-s_2.12 --conf spark.local.dir=/home/spark/SparkPkg --driver-class-path /home/spark/jars/ojdbc8.jar
export PYSPARK_DRIVER_PYTHON=jupyter
export PYSPARK_DRIVER_PYTHON_OPTS='notebook'
export PYSPARK_PYTHON=/usr/bin/python3.11
pyspark --packages graphframes:graphframes:0.8.2-spark3.2-s_2.12 --conf spark.local.dir=/tmp --driver-class-path /home/spark/jars/ojdbc11.jar  --jars /home/spark/jars/ojdbc11.jar

bashrc

# User specific aliases and functions
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-17.0.10.0.7-2.0.1.el8.x86_64
export SPARK_HOME=/opt/spark/spark-3.5.0-bin-hadoop3
export PATH=$PATH:${SPARK_HOME}/bin:${SPARK_HOME}/sbin:~/.local/bin/
export PYSPARK_PYTHON=/usr/bin/python3.11
export PYSPARK_DRIVER_PYTHON=jupyter
export PYSPARK_DRIVER_PYTHON_OPTS='notebook --ip=0.0.0.0 --port=8888 --no-browser --certfile=/home/spark/.ssh/tccert.pem --keyfile /home/spark/.ssh/tckey.key'

Wikinger: ComputerKram/Spark (zuletzt geändert am 2024-03-27 08:29:37 durch Robert)