Commit e4100990 authored by Stefano Alberto Russo's avatar Stefano Alberto Russo
Browse files

Added the standalone computing resource service with Podman, Docker and...

Added the standalone computing resource service with Podman, Docker and Singularity support. Improved demo slurm cluster naming.
parent 76b3ad5a
Loading
Loading
Loading
Loading
+15 −6
Original line number Diff line number Diff line
version: '3'
services:

  slurmclustermaster-main:
  slurmclustermaster:
    image: "rosetta/slurmclustermaster"
    container_name: slurmclustermaster-main
    hostname: slurmclustermaster-main
    container_name: slurmclustermaster
    hostname: slurmclustermaster
    environment:
      - SAFEMODE=False
    privileged: true
@@ -12,10 +12,10 @@ services:
      - ./data/shared:/shared
      # - ./data/singularity_cache:/rosetta/.singularity/cache # Not working, check permissions...

  slurmclusterworker-one:
  slurmclusterworker:
    image: "rosetta/slurmclusterworker"
    container_name: slurmclusterworker-one
    hostname: slurmclusterworker-one
    container_name: slurmclusterworker
    hostname: slurmclusterworker
    environment:
      - SAFEMODE=False
    privileged: true
@@ -23,6 +23,15 @@ services:
      - ./data/shared:/shared
      - /var/run/docker.sock:/var/run/docker.sock

  standaloneworker:
    image: "rosetta/standaloneworker"
    container_name: standaloneworker
    hostname: standaloneworker
    privileged: true
    volumes:
      - ./data/shared:/shared
      - /var/run/docker.sock:/var/run/docker.sock

  dregistry:
    container_name: dregistry
    hostname: dregistry
+1 −0
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@ if [[ "x$SERVICE" == "x" ]] ; then
    $BUILD_COMMAND services/slurmcluster -t rosetta/slurmcluster    
    $BUILD_COMMAND services/slurmclustermaster -t rosetta/slurmclustermaster    
    $BUILD_COMMAND services/slurmclusterworker -t rosetta/slurmclusterworker    
    $BUILD_COMMAND services/standaloneworker -t rosetta/standaloneworker    
    $BUILD_COMMAND services/dregistry -t rosetta/dregistry
    $BUILD_COMMAND services/webapp -t rosetta/webapp
    $BUILD_COMMAND services/postgres -t rosetta/postgres
+9 −10
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@
# Put this file on all nodes of your cluster.
# See the slurm.conf man page for more information.
#
ControlMachine=slurmclustermaster-main
ControlMachine=slurmclustermaster
#ControlAddr=
#BackupController=
#BackupAddr=
@@ -155,16 +155,15 @@ SlurmdLogFile=/var/log/slurm-llnl/slurmd.log
#SuspendRate=
#SuspendTime=
#
# Must add controller node explictly but don't place it into any partition
NodeName=slurmclustermaster-main CPUs=1 State=UNKNOWN
#NodeName=partitiona-instrument CPUs=1 State=UNKNOWN
#NodeName=partitionb-instrument CPUs=1 State=UNKNOWN
#NodeName=cris-instrument CPUs=1 State=UNKNOWN
# Must add controller node explicitly but don't place it into any partition
NodeName=slurmclustermaster CPUs=1 State=UNKNOWN
#
# COMPUTE NODES
NodeName=slurmclusterworker-one CPUs=1 State=UNKNOWN
#NodeName=slurmclusterworker-two CPUs=1 State=UNKNOWN
PartitionName=partition1 Nodes=slurmclusterworker-one MaxTime=INFINITE State=UP
#PartitionName=partition2 Nodes=slurmclusterworker-two MaxTime=INFINITE State=UP
NodeName=slurmclusterworker CPUs=1 State=UNKNOWN
#NodeName=slurmclusterworker-multi-one CPUs=1 State=UNKNOWN
#NodeName=slurmclusterworker-multi-two CPUs=1 State=UNKNOWN
PartitionName=partition1 Nodes=slurmclusterworker MaxTime=INFINITE State=UP
#PartitionName=partition2 Nodes=slurmclusterworker-multi-one,slurmclusterworker-multi-two MaxTime=INFINITE State=UP



+36 −0
Original line number Diff line number Diff line
FROM quay.io/podman/stable:v3.2.3

# This is necessary due to some base image permission errors.
RUN chown -R podman:podman /home/podman

# Change user
RUN usermod -l testuser podman
RUN usermod -d /home/testuser testuser
RUN ln -s /home/podman /home/testuser
RUN groupmod -n testuser podman

# Replace uid/gid mapping from podman to testuser user
COPY subuid /etc/subuid
COPY subgid /etc/subgid

#RUN dnf repolist 
#RUN dnf update --refresh
RUN dnf install -y docker singularity openssh-server
RUN ssh-keygen -A
RUN mkdir /home/testuser/.ssh
COPY keys/id_rsa.pub /home/testuser/.ssh/authorized_keys
RUN dnf install -y python wget


#----------------------
# Entrypoint
#----------------------

# Copy entrypoint
COPY entrypoint.sh /

# Give right permissions
RUN chmod 755 /entrypoint.sh

# Set entrypoint
ENTRYPOINT ["/entrypoint.sh"]
 No newline at end of file
+23 −0
Original line number Diff line number Diff line
#!/bin/bash

# Exit on any error. More complex thing could be done in future
# (see https://stackoverflow.com/questions/4381618/exit-a-script-on-error)
set -e

# Fix FUSE permissions
chmod 777 /dev/fuse

#---------------------
#  Entrypoint command
#---------------------

if [[ "x$@" == "x" ]] ; then
    echo -n "[INFO] Executing Docker entrypoint command: /usr/sbin/sshd -D"
    /usr/sbin/sshd -D
else
    ENTRYPOINT_COMMAND=$@
    echo -n "[INFO] Executing Docker entrypoint command: "
    echo $ENTRYPOINT_COMMAND
    exec "$ENTRYPOINT_COMMAND"
fi
#exec sudo -i -u testuser /bin/bash -c "$ENTRYPOINT_COMMAND"
Loading