Skip to content
Commits on Source (10)
...@@ -14,8 +14,5 @@ __pycache__/ ...@@ -14,8 +14,5 @@ __pycache__/
# Data # Data
data* data*
# DB conf
services/webapp/db_conf.sh
# Compose # Compose
docker-compose.yml docker-compose.yml
...@@ -6,5 +6,5 @@ if [ ! -d ./services ]; then ...@@ -6,5 +6,5 @@ if [ ! -d ./services ]; then
exit 1 exit 1
fi fi
rosetta/shell webapp "cd /opt/code && source /env.sh && source /db_conf.sh && BACKEND_LOG_LEVEL=ERROR python3 manage.py makemigrations" rosetta/shell webapp "cd /opt/code && source /env.sh && BACKEND_LOG_LEVEL=ERROR python3 manage.py makemigrations"
...@@ -6,4 +6,4 @@ if [ ! -d ./services ]; then ...@@ -6,4 +6,4 @@ if [ ! -d ./services ]; then
exit 1 exit 1
fi fi
rosetta/shell webapp "cd /opt/code && source /env.sh && source /db_conf.sh && BACKEND_LOG_LEVEL=ERROR python3 manage.py migrate" rosetta/shell webapp "cd /opt/code && source /env.sh && BACKEND_LOG_LEVEL=ERROR python3 manage.py migrate"
...@@ -405,13 +405,13 @@ class FileManagerAPI(PrivateGETAPI, PrivatePOSTAPI): ...@@ -405,13 +405,13 @@ class FileManagerAPI(PrivateGETAPI, PrivatePOSTAPI):
dest = dest.replace('\ ', '\\\\\\ ') dest = dest.replace('\ ', '\\\\\\ ')
# Get credentials # Get credentials
computing_user, computing_host, computing_keys = get_ssh_access_mode_credentials(computing, user) computing_user, computing_host, computing_port, computing_keys = get_ssh_access_mode_credentials(computing, user)
# Command # Command
if mode=='get': if mode=='get':
command = 'scp -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{}:{} {}'.format(computing_keys.private_key_file, computing_user, computing_host, source, dest) command = 'scp -P {} -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{}:{} {}'.format(computing_port, computing_keys.private_key_file, computing_user, computing_host, source, dest)
elif mode == 'put': elif mode == 'put':
command = 'scp -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {} {}@{}:{}'.format(computing_keys.private_key_file, source, computing_user, computing_host, dest) command = 'scp -P -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {} {}@{}:{}'.format(computing_port, computing_keys.private_key_file, source, computing_user, computing_host, dest)
else: else:
raise ValueError('Unknown mode "{}"'.format(mode)) raise ValueError('Unknown mode "{}"'.format(mode))
...@@ -423,10 +423,10 @@ class FileManagerAPI(PrivateGETAPI, PrivatePOSTAPI): ...@@ -423,10 +423,10 @@ class FileManagerAPI(PrivateGETAPI, PrivatePOSTAPI):
if storage.access_mode == 'ssh+cli': if storage.access_mode == 'ssh+cli':
if storage.access_through_computing: if storage.access_through_computing:
# Get credentials # Get credentials
computing_user, computing_host, computing_keys = get_ssh_access_mode_credentials(storage.computing, user) computing_user, computing_host, computing_port, computing_keys = get_ssh_access_mode_credentials(storage.computing, user)
# Command # Command
command = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} "{}"'.format(computing_keys.private_key_file, computing_user, computing_host, command) command = 'ssh -p {} -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} "{}"'.format(computing_port, computing_keys.private_key_file, computing_user, computing_host, command)
else: else:
raise NotImplementedError('Not accessing through computing is not implemented for storage type "{}"'.format(storage.type)) raise NotImplementedError('Not accessing through computing is not implemented for storage type "{}"'.format(storage.type))
elif storage.access_mode == 'cli': elif storage.access_mode == 'cli':
......
...@@ -132,7 +132,7 @@ class InternalStandaloneComputingManager(StandaloneComputingManager): ...@@ -132,7 +132,7 @@ class InternalStandaloneComputingManager(StandaloneComputingManager):
#run_command += ' -v {}/user-{}:/data'.format(settings.LOCAL_USER_DATA_DIR, task.user.id) #run_command += ' -v {}/user-{}:/data'.format(settings.LOCAL_USER_DATA_DIR, task.user.id)
# Host name, image entry command # Host name, image entry command
run_command += ' -h task-{} -d -t {}/{}:{}'.format(task.short_uuid, task.container.registry, task.container.image_name, task.container.image_tag) run_command += ' -h task-{} --name task-{} -d -t {}/{}:{}'.format(task.short_uuid, task.short_uuid, task.container.registry, task.container.image_name, task.container.image_tag)
# Debug # Debug
logger.debug('Running new task with command="{}"'.format(run_command)) logger.debug('Running new task with command="{}"'.format(run_command))
...@@ -140,37 +140,28 @@ class InternalStandaloneComputingManager(StandaloneComputingManager): ...@@ -140,37 +140,28 @@ class InternalStandaloneComputingManager(StandaloneComputingManager):
# Run the task # Run the task
out = os_shell(run_command, capture=True) out = os_shell(run_command, capture=True)
if out.exit_code != 0: if out.exit_code != 0:
logger.error('Got error in starting task: {}'.format(out))
raise Exception(out.stderr) raise Exception(out.stderr)
else: else:
tid = out.stdout
logger.debug('Created task with id: "{}"'.format(tid))
# Get task IP address # Get task IP address
out = os_shell('sudo docker inspect --format \'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}\' ' + tid + ' | tail -n1', capture=True) out = os_shell('export CONTAINER_ID=$(sudo docker ps -a --filter name=task-'+task.short_uuid+' --format {{.ID}}) && sudo docker inspect --format \'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}\' $CONTAINER_ID | tail -n1', capture=True)
if out.exit_code != 0: if out.exit_code != 0:
raise Exception('Error: ' + out.stderr) raise Exception('Error: ' + out.stderr)
task_ip = out.stdout task_ip = out.stdout
# Set fields # Set fields
task.id = tid
task.status = TaskStatuses.running task.status = TaskStatuses.running
task.interface_ip = task_ip task.interface_ip = task_ip
task.interface_port = task_port task.interface_port = task_port
# Save # Save
task.save() task.save()
# Wait 10 seconds to see if the task is still up...
def _stop_task(self, task): def _stop_task(self, task):
# Delete the Docker container # Delete the Docker container
standby_supported = False stop_command = 'export CONTAINER_ID=$(sudo docker ps -a --filter name=task-'+task.short_uuid+' --format {{.ID}}) && sudo docker stop $CONTAINER_ID && sudo docker rm $CONTAINER_ID'
if standby_supported:
stop_command = 'sudo docker stop {}'.format(task.id)
else:
stop_command = 'sudo docker stop {} && sudo docker rm {}'.format(task.id,task.id)
out = os_shell(stop_command, capture=True) out = os_shell(stop_command, capture=True)
if out.exit_code != 0: if out.exit_code != 0:
...@@ -187,7 +178,7 @@ class InternalStandaloneComputingManager(StandaloneComputingManager): ...@@ -187,7 +178,7 @@ class InternalStandaloneComputingManager(StandaloneComputingManager):
def _get_task_log(self, task, **kwargs): def _get_task_log(self, task, **kwargs):
# View the Docker container log (attach) # View the Docker container log (attach)
view_log_command = 'sudo docker logs {}'.format(task.id,) view_log_command = 'export CONTAINER_ID=$(sudo docker ps -a --filter name=task-'+task.short_uuid+' --format {{.ID}}) && sudo docker logs $CONTAINER_ID'
logger.debug(view_log_command) logger.debug(view_log_command)
out = os_shell(view_log_command, capture=True) out = os_shell(view_log_command, capture=True)
if out.exit_code != 0: if out.exit_code != 0:
...@@ -203,7 +194,7 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana ...@@ -203,7 +194,7 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana
logger.debug('Starting a remote task "{}"'.format(self.computing)) logger.debug('Starting a remote task "{}"'.format(self.computing))
# Get credentials # Get credentials
computing_user, computing_host, computing_keys = get_ssh_access_mode_credentials(self.computing, task.user) computing_user, computing_host, computing_port, computing_keys = get_ssh_access_mode_credentials(self.computing, task.user)
# Get webapp conn string # Get webapp conn string
from.utils import get_webapp_conn_string from.utils import get_webapp_conn_string
...@@ -270,7 +261,7 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana ...@@ -270,7 +261,7 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana
else: else:
binds += ',{}:{}'.format(expanded_base_path, expanded_bind_path) binds += ',{}:{}'.format(expanded_base_path, expanded_bind_path)
run_command = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} '.format(computing_keys.private_key_file, computing_user, computing_host) run_command = 'ssh -p {} -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} '.format(computing_port, computing_keys.private_key_file, computing_user, computing_host)
run_command += '/bin/bash -c \'"rm -rf /tmp/{}_data && mkdir -p /tmp/{}_data/tmp && mkdir -p /tmp/{}_data/home && chmod 700 /tmp/{}_data && '.format(task.uuid, task.uuid, task.uuid, task.uuid) run_command += '/bin/bash -c \'"rm -rf /tmp/{}_data && mkdir -p /tmp/{}_data/tmp && mkdir -p /tmp/{}_data/home && chmod 700 /tmp/{}_data && '.format(task.uuid, task.uuid, task.uuid, task.uuid)
run_command += 'wget {} {}/api/v1/base/agent/?task_uuid={} -O /tmp/{}_data/agent.py &> /dev/null && export BASE_PORT=\$(python /tmp/{}_data/agent.py 2> /tmp/{}_data/task.log) && '.format(CHECK_WGET_CERT_STR, webapp_conn_string, task.uuid, task.uuid, task.uuid, task.uuid) run_command += 'wget {} {}/api/v1/base/agent/?task_uuid={} -O /tmp/{}_data/agent.py &> /dev/null && export BASE_PORT=\$(python /tmp/{}_data/agent.py 2> /tmp/{}_data/task.log) && '.format(CHECK_WGET_CERT_STR, webapp_conn_string, task.uuid, task.uuid, task.uuid, task.uuid)
run_command += 'export SINGULARITY_NOHTTPS=true && export SINGULARITYENV_BASE_PORT=\$BASE_PORT {} {} &&'.format(authstring, varsstring) run_command += 'export SINGULARITY_NOHTTPS=true && export SINGULARITYENV_BASE_PORT=\$BASE_PORT {} {} &&'.format(authstring, varsstring)
...@@ -333,21 +324,22 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana ...@@ -333,21 +324,22 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana
# TODO: remove this hardcoding # TODO: remove this hardcoding
prefix = 'sudo' if (computing_host == 'slurmclusterworker' and container_engine=='docker') else '' prefix = 'sudo' if (computing_host == 'slurmclusterworker' and container_engine=='docker') else ''
run_command = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} '.format(computing_keys.private_key_file, computing_user, computing_host) run_command = 'ssh -p {} -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} '.format(computing_port, computing_keys.private_key_file, computing_user, computing_host)
run_command += '/bin/bash -c \'"rm -rf /tmp/{}_data && mkdir /tmp/{}_data && chmod 700 /tmp/{}_data && '.format(task.uuid, task.uuid, task.uuid) run_command += '/bin/bash -c \'"rm -rf /tmp/{}_data && mkdir /tmp/{}_data && chmod 700 /tmp/{}_data && '.format(task.uuid, task.uuid, task.uuid)
run_command += 'wget {} {}/api/v1/base/agent/?task_uuid={} -O /tmp/{}_data/agent.py &> /dev/null && export TASK_PORT=\$(python /tmp/{}_data/agent.py 2> /tmp/{}_data/task.log) && '.format(CHECK_WGET_CERT_STR, webapp_conn_string, task.uuid, task.uuid, task.uuid, task.uuid) run_command += 'wget {} {}/api/v1/base/agent/?task_uuid={} -O /tmp/{}_data/agent.py &> /dev/null && export TASK_PORT=\$(python /tmp/{}_data/agent.py 2> /tmp/{}_data/task.log) && '.format(CHECK_WGET_CERT_STR, webapp_conn_string, task.uuid, task.uuid, task.uuid, task.uuid)
run_command += 'exec nohup {} {} run -p \$TASK_PORT:{} {} {} {} '.format(prefix, container_engine, task.container.interface_port, authstring, varsstring, binds) run_command += 'exec nohup {} {} run -p \$TASK_PORT:{} {} {} {} '.format(prefix, container_engine, task.container.interface_port, authstring, varsstring, binds)
if container_engine == 'podman': if container_engine == 'podman':
run_command += '--network=private --uts=private --userns=keep-id ' run_command += '--network=private --uts=private --userns=keep-id '
#run_command += '-d -t {}/{}:{}'.format(task.container.registry, task.container.image_name, task.container.image_tag) #run_command += '-d -t {}/{}:{}'.format(task.container.registry, task.container.image_name, task.container.image_tag)
run_command += '-h task-{} -t {}/{}:{}'.format(task.short_uuid, task.container.registry, task.container.image_name, task.container.image_tag) run_command += '-h task-{} --name task-{} -t {}/{}:{}'.format(task.short_uuid, task.short_uuid, task.container.registry, task.container.image_name, task.container.image_tag)
run_command += '&>> /tmp/{}_data/task.log & echo \$!"\''.format(task.uuid) run_command += '&>> /tmp/{}_data/task.log &"\''.format(task.uuid)
else: else:
raise NotImplementedError('Container engine {} not supported'.format(container_engine)) raise NotImplementedError('Container engine {} not supported'.format(container_engine))
out = os_shell(run_command, capture=True) out = os_shell(run_command, capture=True)
if out.exit_code != 0: if out.exit_code != 0:
logger.error('Got error in starting task: {}'.format(out))
raise Exception(out.stderr) raise Exception(out.stderr)
# Log # Log
...@@ -357,17 +349,18 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana ...@@ -357,17 +349,18 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana
task_uuid = task.uuid task_uuid = task.uuid
task = Task.objects.get(uuid=task_uuid) task = Task.objects.get(uuid=task_uuid)
# Save pid echoed by the command above # Save the task (container) id for Singularity, which is the PID echoed by the command above
task.id = out.stdout if container_engine == 'singularity':
task.process_id = out.stdout
# Save # Save
task.save() task.save()
def _stop_task(self, task, **kwargs): def _stop_task(self, task, **kwargs):
# Get credentials # Get credentials
computing_user, computing_host, computing_keys = get_ssh_access_mode_credentials(self.computing, task.user) computing_user, computing_host, computing_port, computing_keys = get_ssh_access_mode_credentials(self.computing, task.user)
# Handle container engine # Handle container engine
container_engine = None container_engine = None
...@@ -377,20 +370,23 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana ...@@ -377,20 +370,23 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana
container_engine = task.computing.default_container_engine container_engine = task.computing.default_container_engine
if container_engine=='singularity': if container_engine=='singularity':
internal_stop_command = 'kill -9 {}'.format(task.id) internal_stop_command = 'kill -9 {}'.format(task.process_id)
elif container_engine in ['docker', 'podman']: elif container_engine in ['docker', 'podman']:
# TODO: remove this hardcoding # TODO: remove this hardcoding
prefix = 'sudo' if (computing_host == 'slurmclusterworker' and container_engine=='docker') else '' prefix = 'sudo' if (computing_host == 'slurmclusterworker' and container_engine=='docker') else ''
internal_stop_command = '{} {} stop {} && {} {} rm {}'.format(prefix,container_engine,task.id,prefix,container_engine,task.id) internal_stop_command = 'export CONTAINER_ID=$('+prefix+' '+container_engine+' ps -a --filter name=task-'+task.short_uuid+' --format {{.ID}}) &&'
internal_stop_command += 'if [ "x$CONTAINER_ID" != "x" ]; then {} {} stop $CONTAINER_ID && {} {} rm $CONTAINER_ID; fi'.format(prefix,container_engine,prefix,container_engine)
else: else:
raise NotImplementedError('Container engine {} not supported'.format(container_engine)) raise NotImplementedError('Container engine {} not supported'.format(container_engine))
stop_command = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "{}"\''.format(computing_keys.private_key_file, computing_user, computing_host, internal_stop_command) stop_command = 'ssh -p {} -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "{}"\''.format(computing_port, computing_keys.private_key_file, computing_user, computing_host, internal_stop_command)
out = os_shell(stop_command, capture=True) out = os_shell(stop_command, capture=True)
if out.exit_code != 0: if out.exit_code != 0:
if ('No such process' in out.stderr) or ('No such container' in out.stderr) or ('no container' in out.stderr) or ('missing' in out.stderr): if ('No such process' in out.stderr) or ('No such container' in out.stderr) or ('no container' in out.stderr) or ('missing' in out.stderr):
pass pass
else: else:
logger.critical('Got error in stopping task: {}'.format(out))
raise Exception(out.stderr) raise Exception(out.stderr)
# Set task as stopped # Set task as stopped
...@@ -401,7 +397,7 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana ...@@ -401,7 +397,7 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana
def _get_task_log(self, task, **kwargs): def _get_task_log(self, task, **kwargs):
# Get credentials # Get credentials
computing_user, computing_host, computing_keys = get_ssh_access_mode_credentials(self.computing, task.user) computing_user, computing_host, computing_port, computing_keys = get_ssh_access_mode_credentials(self.computing, task.user)
# Handle container engine # Handle container engine
container_engine = None container_engine = None
...@@ -413,15 +409,13 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana ...@@ -413,15 +409,13 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana
if container_engine=='singularity': if container_engine=='singularity':
internal_view_log_command = 'cat /tmp/{}_data/task.log'.format(task.uuid) internal_view_log_command = 'cat /tmp/{}_data/task.log'.format(task.uuid)
elif container_engine in ['docker','podman']: elif container_engine in ['docker','podman']:
# TODO: remove this hardcoding # TODO: consider podman/docker logs?
#prefix = 'sudo' if (computing_host == 'slurmclusterworker' and container_engine=='docker') else ''
#internal_view_log_command = '{} {} logs {}'.format(prefix,container_engine,task.id)
internal_view_log_command = 'cat /tmp/{}_data/task.log'.format(task.uuid) internal_view_log_command = 'cat /tmp/{}_data/task.log'.format(task.uuid)
else: else:
raise NotImplementedError('Container engine {} not supported'.format(container_engine)) raise NotImplementedError('Container engine {} not supported'.format(container_engine))
# Prepare full comand # Prepare full comand
view_log_command = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "{}"\''.format(computing_keys.private_key_file, computing_user, computing_host, internal_view_log_command) view_log_command = 'ssh -p {} -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "{}"\''.format(computing_port, computing_keys.private_key_file, computing_user, computing_host, internal_view_log_command)
# Execute # Execute
out = os_shell(view_log_command, capture=True) out = os_shell(view_log_command, capture=True)
...@@ -437,7 +431,7 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag ...@@ -437,7 +431,7 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag
logger.debug('Starting a remote task "{}"'.format(self.computing)) logger.debug('Starting a remote task "{}"'.format(self.computing))
# Get credentials # Get credentials
computing_user, computing_host, computing_keys = get_ssh_access_mode_credentials(self.computing, task.user) computing_user, computing_host, computing_port, computing_keys = get_ssh_access_mode_credentials(self.computing, task.user)
# Get webapp conn string # Get webapp conn string
from.utils import get_webapp_conn_string from.utils import get_webapp_conn_string
...@@ -524,7 +518,7 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag ...@@ -524,7 +518,7 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag
else: else:
binds += ',{}:{}'.format(expanded_base_path, expanded_bind_path) binds += ',{}:{}'.format(expanded_base_path, expanded_bind_path)
run_command = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} '.format(computing_keys.private_key_file, computing_user, computing_host) run_command = 'ssh -p {} -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} '.format(computing_port, computing_keys.private_key_file, computing_user, computing_host)
run_command += '\'bash -c "echo \\"#!/bin/bash\nwget {} {}/api/v1/base/agent/?task_uuid={} -O \$HOME/agent_{}.py &> \$HOME/{}.log && export BASE_PORT=\\\\\\$(python \$HOME/agent_{}.py 2> \$HOME/{}.log) && '.format(CHECK_WGET_CERT_STR, webapp_conn_string, task.uuid, task.uuid, task.uuid, task.uuid, task.uuid) run_command += '\'bash -c "echo \\"#!/bin/bash\nwget {} {}/api/v1/base/agent/?task_uuid={} -O \$HOME/agent_{}.py &> \$HOME/{}.log && export BASE_PORT=\\\\\\$(python \$HOME/agent_{}.py 2> \$HOME/{}.log) && '.format(CHECK_WGET_CERT_STR, webapp_conn_string, task.uuid, task.uuid, task.uuid, task.uuid, task.uuid)
run_command += 'export SINGULARITY_NOHTTPS=true && export SINGULARITYENV_BASE_PORT=\\\\\\$BASE_PORT {} {} && '.format(authstring, varsstring) run_command += 'export SINGULARITY_NOHTTPS=true && export SINGULARITYENV_BASE_PORT=\\\\\\$BASE_PORT {} {} && '.format(authstring, varsstring)
run_command += 'rm -rf /tmp/{}_data && mkdir -p /tmp/{}_data/tmp &>> \$HOME/{}.log && mkdir -p /tmp/{}_data/home &>> \$HOME/{}.log && chmod 700 /tmp/{}_data && '.format(task.uuid, task.uuid, task.uuid, task.uuid, task.uuid, task.uuid) run_command += 'rm -rf /tmp/{}_data && mkdir -p /tmp/{}_data/tmp &>> \$HOME/{}.log && mkdir -p /tmp/{}_data/home &>> \$HOME/{}.log && chmod 700 /tmp/{}_data && '.format(task.uuid, task.uuid, task.uuid, task.uuid, task.uuid, task.uuid)
...@@ -538,6 +532,7 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag ...@@ -538,6 +532,7 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag
out = os_shell(run_command, capture=True) out = os_shell(run_command, capture=True)
if out.exit_code != 0: if out.exit_code != 0:
logger.error('Got error in starting task: {}'.format(out))
raise Exception(out.stderr) raise Exception(out.stderr)
# Log # Log
...@@ -554,8 +549,8 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag ...@@ -554,8 +549,8 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag
task_uuid = task.uuid task_uuid = task.uuid
task = Task.objects.get(uuid=task_uuid) task = Task.objects.get(uuid=task_uuid)
# Save job id as task id # Save job id
task.id = job_id task.job_id = job_id
# Set status (only fi we get here before the agent which sets the status as running via the API) # Set status (only fi we get here before the agent which sets the status as running via the API)
if task.status != TaskStatuses.running: if task.status != TaskStatuses.running:
...@@ -568,10 +563,10 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag ...@@ -568,10 +563,10 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag
def _stop_task(self, task, **kwargs): def _stop_task(self, task, **kwargs):
# Get credentials # Get credentials
computing_user, computing_host, computing_keys = get_ssh_access_mode_credentials(self.computing, task.user) computing_user, computing_host, computing_port, computing_keys = get_ssh_access_mode_credentials(self.computing, task.user)
# Stop the task remotely # Stop the task remotely
stop_command = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "scancel {}"\''.format(computing_keys.private_key_file, computing_user, computing_host, task.id) stop_command = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "scancel {}"\''.format(computing_keys.private_key_file, computing_user, computing_host, task.job_id)
out = os_shell(stop_command, capture=True) out = os_shell(stop_command, capture=True)
if out.exit_code != 0: if out.exit_code != 0:
raise Exception(out.stderr) raise Exception(out.stderr)
...@@ -584,10 +579,10 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag ...@@ -584,10 +579,10 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag
def _get_task_log(self, task, **kwargs): def _get_task_log(self, task, **kwargs):
# Get credentials # Get credentials
computing_user, computing_host, computing_keys = get_ssh_access_mode_credentials(self.computing, task.user) computing_user, computing_host, computing_port, computing_keys = get_ssh_access_mode_credentials(self.computing, task.user)
# View log remotely # View log remotely
view_log_command = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "cat \$HOME/{}.log"\''.format(computing_keys.private_key_file, computing_user, computing_host, task.uuid) view_log_command = 'ssh -p {} -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "cat \$HOME/{}.log"\''.format(computing_port, computing_keys.private_key_file, computing_user, computing_host, task.uuid)
out = os_shell(view_log_command, capture=True) out = os_shell(view_log_command, capture=True)
if out.exit_code != 0: if out.exit_code != 0:
......
...@@ -288,8 +288,8 @@ to provide help, news and informations on your deployment. Or you can just ignor ...@@ -288,8 +288,8 @@ to provide help, news and informations on your deployment. Or you can just ignor
# Demo standalone platform computing plus conf # Demo standalone platform computing plus conf
demo_standalone_computing_platform = Computing.objects.create(name = 'Demo Standalone Platform', demo_standalone_computing_platform = Computing.objects.create(name = 'Demo Standalone (as platform user)',
description = 'A demo standalone computing resource access as platform.', description = 'A demo standalone computing resource accessed as platform user.',
type = 'standalone', type = 'standalone',
arch = 'amd64', arch = 'amd64',
supported_archs = ['386'], supported_archs = ['386'],
...@@ -359,25 +359,3 @@ to provide help, news and informations on your deployment. Or you can just ignor ...@@ -359,25 +359,3 @@ to provide help, news and informations on your deployment. Or you can just ignor
base_path = '/shared/data/users/$SSH_USER', base_path = '/shared/data/users/$SSH_USER',
bind_path = '/storages/personal') bind_path = '/storages/personal')
try:
demo_standalone_computing = Computing.objects.get(name='Demo Standalone Platform')
demo_computing_resources.append(demo_standalone_computing)
# Demo personal storage
Storage.objects.create(computing = computing,
access_through_computing = True,
name = 'Personal',
type = 'generic_posix',
access_mode = 'ssh+cli',
auth_mode = 'user_keys',
base_path = '/shared/data/users/$SSH_USER',
bind_path = '/storages/personal')
except:
pass
# Generated by Django 2.2.1 on 2023-10-07 10:52
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core_app', '0033_auto_20220410_1531'),
]
operations = [
migrations.AlterField(
model_name='task',
name='requires_tcp_tunnel',
field=models.BooleanField(verbose_name='Requires TCP tunnel'),
),
]
# Generated by Django 2.2.1 on 2023-10-07 12:49
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core_app', '0034_auto_20231007_1052'),
]
operations = [
migrations.RemoveField(
model_name='task',
name='id',
),
migrations.AddField(
model_name='task',
name='job_id',
field=models.CharField(blank=True, max_length=64, null=True, verbose_name='Job ID'),
),
migrations.AddField(
model_name='task',
name='process_id',
field=models.CharField(blank=True, max_length=64, null=True, verbose_name='Process ID'),
),
]
...@@ -274,9 +274,10 @@ class Task(models.Model): ...@@ -274,9 +274,10 @@ class Task(models.Model):
name = models.CharField('Name', max_length=36, blank=False, null=False) name = models.CharField('Name', max_length=36, blank=False, null=False)
# Task management # Task management
id = models.CharField('ID', max_length=64, blank=True, null=True) # i.e. Slurm job id, singularity PID, docker hash status = models.CharField('Status', max_length=36, blank=True, null=True)
status = models.CharField('Status', max_length=36, blank=True, null=True) created = models.DateTimeField('Created on', default=timezone.now)
created = models.DateTimeField('Created on', default=timezone.now) process_id = models.CharField('Process ID', max_length=64, blank=True, null=True) # i.e. Singularity PID
job_id = models.CharField('Job ID', max_length=64, blank=True, null=True) # i.e. Slurm job id
# How to reach the task interface. The IP has to be intended either as the container IP if this is directly # How to reach the task interface. The IP has to be intended either as the container IP if this is directly
# reachable (i.e. using a Docker or Kubernetes network) or as the host IP address, depending on the # reachable (i.e. using a Docker or Kubernetes network) or as the host IP address, depending on the
...@@ -286,7 +287,7 @@ class Task(models.Model): ...@@ -286,7 +287,7 @@ class Task(models.Model):
interface_port = models.IntegerField('Interface port', blank=True, null=True) interface_port = models.IntegerField('Interface port', blank=True, null=True)
# Task access # Task access
requires_tcp_tunnel = models.BooleanField('Requires a TCP tunnel') requires_tcp_tunnel = models.BooleanField('Requires TCP tunnel')
tcp_tunnel_port = models.IntegerField('TCP tunnel port', blank=True, null=True) tcp_tunnel_port = models.IntegerField('TCP tunnel port', blank=True, null=True)
requires_proxy = models.BooleanField('Requires proxy') requires_proxy = models.BooleanField('Requires proxy')
requires_proxy_auth = models.BooleanField('Requires proxy auth') requires_proxy_auth = models.BooleanField('Requires proxy auth')
......
...@@ -44,10 +44,10 @@ ...@@ -44,10 +44,10 @@
<!-- <a href="/computing/?uuid={{ task.computing.uuid }}" no_style="color:{{task.computing.color}}"><i class="fa fa-external-link" ></i></a><br/> --> <!-- <a href="/computing/?uuid={{ task.computing.uuid }}" no_style="color:{{task.computing.color}}"><i class="fa fa-external-link" ></i></a><br/> -->
<div style="margin-top:2px"> <div style="margin-top:2px">
{% if task.verified_status == "running" %} {% if task.status == "running" %}
<b>Status:</b> <font color="green">running</font> <b>Status:</b> <font color="green">running</font>
{% else %} {% else %}
<b>Status:</b> {{ task.verified_status }} <b>Status:</b> {{ task.status }}
{% endif %} {% endif %}
</div> </div>
</div> </div>
...@@ -63,8 +63,8 @@ ...@@ -63,8 +63,8 @@
<!-- Connect --> <!-- Connect -->
{% if task.interface_port %} {% if task.interface_port %}
{% if task.verified_status == "running" %} {% if task.status == "running" %}
<a href="/task_connect/?uuid={{task.uuid}}" class="btn btn-connect" target="_blank">Connect</a> <a href="/task_connect/?uuid={{task.uuid}}" class="btn btn-connect">Connect</a>
{% else %} {% else %}
<a href="" class="btn btn-disabled">Connect</a> <a href="" class="btn btn-disabled">Connect</a>
{% endif %} {% endif %}
...@@ -112,8 +112,8 @@ ...@@ -112,8 +112,8 @@
</tr> --> </tr> -->
<!-- <tr> <!-- <tr>
<td><b>ID</b></td> <td><b>UUID</b></td>
<td>{{ task.id }}</td> <td>{{ task.uuid }}</td>
</tr> --> </tr> -->
<tr> <tr>
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
{% include "navigation.html"%} {% include "navigation.html"%}
<!-- with body_args="style='background: #202020'" --> <!-- with body_args="style='background: #202020'" -->
<center> <center>
<div style="width:370px;"> <div style="width:370px;">
<form class="form-signin" role="form" action='/direct_connect/{{data.task.uuid}}/' method='POST'> <form class="form-signin" role="form" action='/direct_connect/{{data.task.uuid}}/' method='POST'>
{% csrf_token %} {% csrf_token %}
...@@ -14,6 +15,14 @@ ...@@ -14,6 +15,14 @@
<p style="font-size: 16px;"> <p style="font-size: 16px;">
<br /> <br />
{% if not data.task.interface_status == 'running' %}
<br/>
<div class="alert alert-warning" role="alert"><i class="fa fa-warning"></i> the task interface is not up, cannot connect.</div>
Please check the <a href="/task_log/?uuid={{ data.task.uuid }}&action=viewlog">task logs</a>.
<br/><br/>
<i>Note: if you just launched the task, this alert might be due to the normal task startup time.</i>
{% else %}
{% if not data.task.requires_proxy_auth %} {% if not data.task.requires_proxy_auth %}
{% if data.task.container.interface_auth_user %} {% if data.task.container.interface_auth_user %}
User: <input style="margin-bottom:15px;" type="username" class="form-control" value="{{ data.task.container.interface_auth_user }}"name='username' readonly > User: <input style="margin-bottom:15px;" type="username" class="form-control" value="{{ data.task.container.interface_auth_user }}"name='username' readonly >
...@@ -75,14 +84,14 @@ ...@@ -75,14 +84,14 @@
<b>Port:</b> <code>{{ data.task.tcp_tunnel_port}}</code> <b>Port:</b> <code>{{ data.task.tcp_tunnel_port}}</code>
</p> </p>
{% endif %} {% endif %}
{% endif%}
</p> </p>
</form> </form>
</div> </div>
<br /><br /> <br /><br />
{% if data.task.interface_status == 'running' %}
{% if data.task.requires_proxy_auth %} {% if data.task.requires_proxy_auth %}
<p style="margin-left:10px; font-size:0.9em; color:rgb(200,200,200); max-width:600px"> <p style="margin-left:10px; font-size:0.9em; color:rgb(200,200,200); max-width:600px">
<i class="fa fa-info-warning" style="color:#337ab7"></i> <i class="fa fa-info-warning" style="color:#337ab7"></i>
...@@ -90,6 +99,7 @@ ...@@ -90,6 +99,7 @@
to a web browser which supports embedding user credentials in the connection URL (as Chorme, Edge or Firefox). to a web browser which supports embedding user credentials in the connection URL (as Chorme, Edge or Firefox).
</p> </p>
{% endif %} {% endif %}
{% endif %}
<br /><br /><br /> <br /><br /><br />
</center> </center>
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
<hr> <hr>
<b>ID:</b> {{ data.task.id }} &nbsp; &nbsp; <b>UUID:</b> {{ data.task.uuid }} &nbsp; &nbsp;
<b>Status:</b> {{ data.task.status }} &nbsp; &nbsp; <b>Status:</b> {{ data.task.status }} &nbsp; &nbsp;
<b>Auto refresh:{{data.refresh}}</b>&nbsp; <b>Auto refresh:{{data.refresh}}</b>&nbsp;
{% if not data.refresh %} OFF {% else %} <a href="?uuid={{data.task.uuid}}">OFF</a> {% endif %} | {% if not data.refresh %} OFF {% else %} <a href="?uuid={{data.task.uuid}}">OFF</a> {% endif %} |
......
...@@ -32,9 +32,6 @@ color_map = ["#440154", "#440558", "#450a5c", "#450e60", "#451465", "#461969", ...@@ -32,9 +32,6 @@ color_map = ["#440154", "#440558", "#450a5c", "#450e60", "#451465", "#461969",
"#97d73e", "#9ed93a", "#a8db34", "#b0dd31", "#b8de30", "#c3df2e", "#97d73e", "#9ed93a", "#a8db34", "#b0dd31", "#b8de30", "#c3df2e",
"#cbe02d", "#d6e22b", "#e1e329", "#eae428", "#f5e626", "#fde725"] "#cbe02d", "#d6e22b", "#e1e329", "#eae428", "#f5e626", "#fde725"]
#======================
# Utility functions
#======================
def booleanize(*args, **kwargs): def booleanize(*args, **kwargs):
# Handle both single value and kwargs to get arg name # Handle both single value and kwargs to get arg name
...@@ -265,10 +262,6 @@ def get_md5(string): ...@@ -265,10 +262,6 @@ def get_md5(string):
return md5 return md5
#=========================
# Time
#=========================
def timezonize(timezone): def timezonize(timezone):
'''Convert a string representation of a timezone to its pytz object or do nothing if the argument is already a pytz timezone''' '''Convert a string representation of a timezone to its pytz object or do nothing if the argument is already a pytz timezone'''
...@@ -283,14 +276,17 @@ def timezonize(timezone): ...@@ -283,14 +276,17 @@ def timezonize(timezone):
timezone = pytz.timezone(timezone) timezone = pytz.timezone(timezone)
return timezone return timezone
def now_t(): def now_t():
'''Return the current time in epoch seconds''' '''Return the current time in epoch seconds'''
return now_s() return now_s()
def now_s(): def now_s():
'''Return the current time in epoch seconds''' '''Return the current time in epoch seconds'''
return calendar.timegm(now_dt().utctimetuple()) return calendar.timegm(now_dt().utctimetuple())
def now_dt(tzinfo='UTC'): def now_dt(tzinfo='UTC'):
'''Return the current time in datetime format''' '''Return the current time in datetime format'''
if tzinfo != 'UTC': if tzinfo != 'UTC':
...@@ -335,10 +331,12 @@ def dt(*args, **kwargs): ...@@ -335,10 +331,12 @@ def dt(*args, **kwargs):
return time_dt return time_dt
def get_tz_offset_s(time_dt): def get_tz_offset_s(time_dt):
'''Get the time zone offset in seconds''' '''Get the time zone offset in seconds'''
return s_from_dt(time_dt.replace(tzinfo=pytz.UTC)) - s_from_dt(time_dt) return s_from_dt(time_dt.replace(tzinfo=pytz.UTC)) - s_from_dt(time_dt)
def check_dt_consistency(date_dt): def check_dt_consistency(date_dt):
'''Check that the timezone is consistent with the datetime (some conditions in Python lead to have summertime set in winter)''' '''Check that the timezone is consistent with the datetime (some conditions in Python lead to have summertime set in winter)'''
...@@ -355,6 +353,7 @@ def check_dt_consistency(date_dt): ...@@ -355,6 +353,7 @@ def check_dt_consistency(date_dt):
else: else:
return True return True
def correct_dt_dst(datetime_obj): def correct_dt_dst(datetime_obj):
'''Check that the dst is correct and if not change it''' '''Check that the dst is correct and if not change it'''
...@@ -374,14 +373,17 @@ def correct_dt_dst(datetime_obj): ...@@ -374,14 +373,17 @@ def correct_dt_dst(datetime_obj):
datetime_obj.microsecond, datetime_obj.microsecond,
tzinfo=datetime_obj.tzinfo) tzinfo=datetime_obj.tzinfo)
def change_tz(dt, tz): def change_tz(dt, tz):
return dt.astimezone(timezonize(tz)) return dt.astimezone(timezonize(tz))
def dt_from_t(timestamp_s, tz=None): def dt_from_t(timestamp_s, tz=None):
'''Create a datetime object from an epoch timestamp in seconds. If no timezone is given, UTC is assumed''' '''Create a datetime object from an epoch timestamp in seconds. If no timezone is given, UTC is assumed'''
# TODO: check if uniform everything on this one or not. # TODO: check if uniform everything on this one or not.
return dt_from_s(timestamp_s=timestamp_s, tz=tz) return dt_from_s(timestamp_s=timestamp_s, tz=tz)
def dt_from_s(timestamp_s, tz=None): def dt_from_s(timestamp_s, tz=None):
'''Create a datetime object from an epoch timestamp in seconds. If no timezone is given, UTC is assumed''' '''Create a datetime object from an epoch timestamp in seconds. If no timezone is given, UTC is assumed'''
...@@ -397,6 +399,7 @@ def dt_from_s(timestamp_s, tz=None): ...@@ -397,6 +399,7 @@ def dt_from_s(timestamp_s, tz=None):
return timestamp_dt return timestamp_dt
def s_from_dt(dt): def s_from_dt(dt):
'''Returns seconds with floating point for milliseconds/microseconds.''' '''Returns seconds with floating point for milliseconds/microseconds.'''
if not (isinstance(dt, datetime.datetime)): if not (isinstance(dt, datetime.datetime)):
...@@ -404,6 +407,7 @@ def s_from_dt(dt): ...@@ -404,6 +407,7 @@ def s_from_dt(dt):
microseconds_part = (dt.microsecond/1000000.0) if dt.microsecond else 0 microseconds_part = (dt.microsecond/1000000.0) if dt.microsecond else 0
return ( calendar.timegm(dt.utctimetuple()) + microseconds_part) return ( calendar.timegm(dt.utctimetuple()) + microseconds_part)
def dt_from_str(string, timezone=None): def dt_from_str(string, timezone=None):
# Supported formats on UTC # Supported formats on UTC
...@@ -458,10 +462,12 @@ def dt_from_str(string, timezone=None): ...@@ -458,10 +462,12 @@ def dt_from_str(string, timezone=None):
return dt(year, month, day, hour, minute, second, usecond, offset_s=offset_s) return dt(year, month, day, hour, minute, second, usecond, offset_s=offset_s)
def dt_to_str(dt): def dt_to_str(dt):
'''Return the ISO representation of the datetime as argument''' '''Return the ISO representation of the datetime as argument'''
return dt.isoformat() return dt.isoformat()
class dt_range(object): class dt_range(object):
def __init__(self, from_dt, to_dt, timeSlotSpan): def __init__(self, from_dt, to_dt, timeSlotSpan):
...@@ -489,20 +495,18 @@ class dt_range(object): ...@@ -489,20 +495,18 @@ class dt_range(object):
return self.__next__() return self.__next__()
#================================
# Others
#================================
def debug_param(**kwargs): def debug_param(**kwargs):
for item in kwargs: for item in kwargs:
logger.critical('Param "{}": "{}"'.format(item, kwargs[item])) logger.critical('Param "{}": "{}"'.format(item, kwargs[item]))
def get_my_ip(): def get_my_ip():
import socket import socket
hostname = socket.gethostname() hostname = socket.gethostname()
my_ip = socket.gethostbyname(hostname) my_ip = socket.gethostbyname(hostname)
return my_ip return my_ip
def get_webapp_conn_string(): def get_webapp_conn_string():
webapp_ssl = booleanize(os.environ.get('ROSETTA_WEBAPP_SSL', False)) webapp_ssl = booleanize(os.environ.get('ROSETTA_WEBAPP_SSL', False))
webapp_host = os.environ.get('ROSETTA_WEBAPP_HOST', get_my_ip()) webapp_host = os.environ.get('ROSETTA_WEBAPP_HOST', get_my_ip())
...@@ -513,32 +517,68 @@ def get_webapp_conn_string(): ...@@ -513,32 +517,68 @@ def get_webapp_conn_string():
webapp_conn_string = 'http://{}:{}'.format(webapp_host, webapp_port) webapp_conn_string = 'http://{}:{}'.format(webapp_host, webapp_port)
return webapp_conn_string return webapp_conn_string
def get_platform_registry(): def get_platform_registry():
platform_registry_host = os.environ.get('PLATFORM_REGISTRY_HOST', 'proxy') platform_registry_host = os.environ.get('PLATFORM_REGISTRY_HOST', 'proxy')
platform_registry_port = os.environ.get('PLATFORM_REGISTRY_PORT', '5000') platform_registry_port = os.environ.get('PLATFORM_REGISTRY_PORT', '5000')
platform_registry_conn_string = '{}:{}'.format(platform_registry_host, platform_registry_port) platform_registry_conn_string = '{}:{}'.format(platform_registry_host, platform_registry_port)
return platform_registry_conn_string return platform_registry_conn_string
def get_rosetta_tasks_tunnel_host(): def get_rosetta_tasks_tunnel_host():
# Importing here instead of on top avoids circular dependencies problems when loading booleanize in settings # Importing here instead of on top avoids circular dependencies problems when loading booleanize in settings
from django.conf import settings from django.conf import settings
tunnel_host = os.environ.get('ROSETTA_TASKS_TUNNEL_HOST', settings.ROSETTA_HOST) tunnel_host = os.environ.get('ROSETTA_TASKS_TUNNEL_HOST', settings.ROSETTA_HOST)
return tunnel_host return tunnel_host
def get_rosetta_tasks_proxy_host(): def get_rosetta_tasks_proxy_host():
# Importing here instead of on top avoids circular dependencies problems when loading booleanize in settings # Importing here instead of on top avoids circular dependencies problems when loading booleanize in settings
from django.conf import settings from django.conf import settings
proxy_host = os.environ.get('ROSETTA_TASKS_PROXY_HOST', settings.ROSETTA_HOST) proxy_host = os.environ.get('ROSETTA_TASKS_PROXY_HOST', settings.ROSETTA_HOST)
return proxy_host return proxy_host
def hash_string_to_int(string): def hash_string_to_int(string):
return int(hashlib.sha1(string.encode('utf8')).hexdigest(), 16) return int(hashlib.sha1(string.encode('utf8')).hexdigest(), 16)
def get_ssh_access_mode_credentials(computing, user):
from .models import KeyPair
# Get computing host
try:
computing_host = computing.conf.get('host')
except AttributeError:
computing_host = None
if not computing_host:
raise ValueError('No computing host?!')
# Get computing (SSH) port
try:
computing_port = computing.conf.get('port')
except AttributeError:
computing_port = 22
if not computing_port:
computing_port = 22
# Get computing user and keys
if computing.auth_mode == 'user_keys':
computing_user = user.profile.get_extra_conf('computing_user', computing)
if not computing_user:
raise ValueError('No \'computing_user\' parameter found for computing resource \'{}\' in user profile'.format(computing.name))
# Get user key
computing_keys = KeyPair.objects.get(user=user, default=True)
elif computing.auth_mode == 'platform_keys':
computing_user = computing.conf.get('user')
computing_keys = KeyPair.objects.get(user=None, default=True)
else:
raise NotImplementedError('Auth modes other than user_keys and platform_keys not supported.')
if not computing_user:
raise ValueError('No \'user\' parameter found for computing resource \'{}\' in its configuration'.format(computing.name))
return (computing_user, computing_host, computing_port, computing_keys)
#================================
# Tunnel (and proxy) setup
#================================
def setup_tunnel_and_proxy(task): def setup_tunnel_and_proxy(task):
...@@ -602,7 +642,13 @@ def setup_tunnel_and_proxy(task): ...@@ -602,7 +642,13 @@ def setup_tunnel_and_proxy(task):
tunnel_command= 'ssh -4 -i {} -o StrictHostKeyChecking=no -nNT -L 0.0.0.0:{}:{}:{} {}@{} & '.format(user_keys.private_key_file, task.tcp_tunnel_port, task.interface_ip, task.interface_port, first_user, first_host) tunnel_command= 'ssh -4 -i {} -o StrictHostKeyChecking=no -nNT -L 0.0.0.0:{}:{}:{} {}@{} & '.format(user_keys.private_key_file, task.tcp_tunnel_port, task.interface_ip, task.interface_port, first_user, first_host)
else: else:
tunnel_command= 'ssh -4 -o StrictHostKeyChecking=no -nNT -L 0.0.0.0:{}:{}:{} localhost & '.format(task.tcp_tunnel_port, task.interface_ip, task.interface_port)
if task.computing.access_mode.startswith('ssh'):
computing_user, computing_host, computing_port, computing_keys = get_ssh_access_mode_credentials(task.computing, task.user)
tunnel_command = 'ssh -p {} -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no -o ConnectTimeout=10 '.format(computing_port, computing_keys.private_key_file)
tunnel_command += '-nNT -L 0.0.0.0:{}:{}:{} {}@{}'.format(task.tcp_tunnel_port, task.interface_ip, task.interface_port, computing_user, computing_host)
else:
tunnel_command= 'ssh -4 -o StrictHostKeyChecking=no -nNT -L 0.0.0.0:{}:{}:{} localhost & '.format(task.tcp_tunnel_port, task.interface_ip, task.interface_port)
background_tunnel_command = 'nohup {} >/dev/null 2>&1 &'.format(tunnel_command) background_tunnel_command = 'nohup {} >/dev/null 2>&1 &'.format(tunnel_command)
...@@ -713,36 +759,6 @@ Listen '''+str(task.tcp_tunnel_port)+''' ...@@ -713,36 +759,6 @@ Listen '''+str(task.tcp_tunnel_port)+'''
raise ErrorMessage('Something went wrong when loading the task proxy conf') raise ErrorMessage('Something went wrong when loading the task proxy conf')
def get_ssh_access_mode_credentials(computing, user):
from .models import KeyPair
# Get computing host
try:
computing_host = computing.conf.get('host')
except AttributeError:
computing_host = None
if not computing_host:
raise ValueError('No computing host?!')
# Get computing user and keys
if computing.auth_mode == 'user_keys':
computing_user = user.profile.get_extra_conf('computing_user', computing)
if not computing_user:
raise ValueError('No \'computing_user\' parameter found for computing resource \'{}\' in user profile'.format(computing.name))
# Get user key
computing_keys = KeyPair.objects.get(user=user, default=True)
elif computing.auth_mode == 'platform_keys':
computing_user = computing.conf.get('user')
computing_keys = KeyPair.objects.get(user=None, default=True)
else:
raise NotImplementedError('Auth modes other than user_keys and platform_keys not supported.')
if not computing_user:
raise ValueError('No \'user\' parameter found for computing resource \'{}\' in its configuration'.format(computing.name))
return (computing_user, computing_host, computing_keys)
def sanitize_container_env_vars(env_vars): def sanitize_container_env_vars(env_vars):
for env_var in env_vars: for env_var in env_vars:
......
...@@ -339,38 +339,10 @@ def account(request): ...@@ -339,38 +339,10 @@ def account(request):
#========================= #=========================
# Tasks view # Tasks view
#========================= #=========================
def set_verified_status(task):
# Chech status with ping
if task.status == 'running':
logger.debug('Task is running, check if startup completed')
logger.debug('Trying to establish connection on: "{}:{}"'.format(task.interface_ip,task.interface_port))
s = socket.socket()
try:
s.settimeout(1)
s.connect((task.interface_ip, task.interface_port))
# Not necessary, we just check that the container interfcae is up
#if not s.recv(10):
# logger.debug('No data read from socket')
# raise Exception('Could not read any data from socket')
except Exception as e:
logger.debug('Could not connect to socket')
if (pytz.UTC.localize(datetime.datetime.now())-task.created) > datetime.timedelta(hours=1):
task.verified_status = 'not working / killed'
else:
task.verified_status = 'starting up...'
else:
task.verified_status = 'running'
finally:
s.close()
else:
task.verified_status = task.status
@private_view @private_view
def tasks(request): def tasks(request):
...@@ -397,8 +369,7 @@ def tasks(request): ...@@ -397,8 +369,7 @@ def tasks(request):
task = Task.objects.get(user=request.user, uuid=uuid) task = Task.objects.get(user=request.user, uuid=uuid)
except Task.DoesNotExist: except Task.DoesNotExist:
raise ErrorMessage('Task does not exists or no access rights') raise ErrorMessage('Task does not exists or no access rights')
set_verified_status(task)
data['task'] = task data['task'] = task
# Task actions # Task actions
...@@ -479,7 +450,6 @@ def tasks(request): ...@@ -479,7 +450,6 @@ def tasks(request):
# Update task statuses # Update task statuses
for task in tasks: for task in tasks:
task.update_status() task.update_status()
set_verified_status(task)
# Set task and tasks variables # Set task and tasks variables
data['task'] = None data['task'] = None
...@@ -1154,7 +1124,6 @@ def task_connect(request): ...@@ -1154,7 +1124,6 @@ def task_connect(request):
if not task_uuid: if not task_uuid:
raise ErrorMessage('Empty task uuid') raise ErrorMessage('Empty task uuid')
# Get the task # Get the task
task = Task.objects.get(uuid=task_uuid) task = Task.objects.get(uuid=task_uuid)
...@@ -1163,6 +1132,42 @@ def task_connect(request): ...@@ -1163,6 +1132,42 @@ def task_connect(request):
# Ensure that the tunnel and proxy are set up # Ensure that the tunnel and proxy are set up
setup_tunnel_and_proxy(task) setup_tunnel_and_proxy(task)
# Set default interface status as unknown
task.interface_status = 'unknown'
# Check if task interface is up
if task.status == 'running':
logger.debug('Checking if task interface is running by trying to establish connection via local tunnel on port "{}"'.format(task.tcp_tunnel_port))
if task.container.interface_protocol.startswith('http'):
try:
if task.requires_tcp_tunnel:
requests.get('{}://localhost:{}'.format(task.container.interface_protocol, task.tcp_tunnel_port), timeout=3)
else:
requests.get('{}://{}:{}'.format(task.container.interface_protocol, task.interface_ip, task.interface_port), timeout=3)
logger.debug('Task interface is answering')
task.interface_status = 'running'
except Exception as e:
logger.debug('Could not connect to task interface ({})'.format(e))
else:
pass
# # TODO: the following raises a TimeoutError even if the connection is active and with requests work. Why?
# with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
# s.settimeout(3)
# try:
# s.connect(('localhost', task.tcp_tunnel_port))
# if not s.recv(10):
# logger.debug('No data read from socket')
# raise Exception('Could not read any data from socket')
# except Exception as e:
# logger.debug('Could not connect to task interface via socket ({})'.format(e))
# task.interface_status = 'unknown'
# else:
# logger.debug('Task interface is answering via socket')
# task.interface_status = 'running'
data ={} data ={}
data['task'] = task data['task'] = task
......