Commit 73c88b8f authored by Stefano Alberto Russo's avatar Stefano Alberto Russo
Browse files

Added hopped, remote computing support. Minor fixes.

parent 9e551c06
Loading
Loading
Loading
Loading
+156 −4
Original line number Diff line number Diff line
@@ -242,7 +242,6 @@ class RemoteComputingManager(ComputingManager):

        # Stop the task remotely
        stop_command = 'ssh -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "kill -9 {}"\''.format(user_keys.private_key_file, user, host, task.pid)
        logger.debug(stop_command)
        out = os_shell(stop_command, capture=True)
        if out.exit_code != 0:
            if not 'No such process' in out.stderr:
@@ -265,7 +264,7 @@ class RemoteComputingManager(ComputingManager):
        host = task.computing.get_conf_param('host')
        user = task.computing.get_conf_param('user')

        # Stop the task remotely
        # View log remotely
        view_log_command = 'ssh -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "cat \$HOME/{}.log"\''.format(user_keys.private_key_file, user, host, task.uuid)

        out = os_shell(view_log_command, capture=True)
@@ -400,7 +399,6 @@ class SlurmComputingManager(ComputingManager):

        # Stop the task remotely
        stop_command = 'ssh -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "scancel {}"\''.format(user_keys.private_key_file, user, host, task.pid)
        logger.debug(stop_command)
        out = os_shell(stop_command, capture=True)
        if out.exit_code != 0:
            raise Exception(out.stderr)
@@ -422,7 +420,7 @@ class SlurmComputingManager(ComputingManager):
        host = task.computing.get_conf_param('master')
        user = task.computing.get_conf_param('user')

        # Stop the task remotely
        # View log remotely
        view_log_command = 'ssh -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "cat \$HOME/{}.log"\''.format(user_keys.private_key_file, user, host, task.uuid)

        out = os_shell(view_log_command, capture=True)
@@ -433,6 +431,160 @@ class SlurmComputingManager(ComputingManager):



class RemotehopComputingManager(ComputingManager):
    
    def _start_task(self, task, **kwargs):
        logger.debug('Starting a remote task "{}"'.format(task.computing))

        # Get computing params
        first_host = task.computing.get_conf_param('first_host')
        first_user = task.computing.get_conf_param('first_user')
        second_host = task.computing.get_conf_param('second_host')
        second_user = task.computing.get_conf_param('second_user')
        setup_command = task.computing.get_conf_param('setup_command')

        # De hard-code
        use_agent = False

        # Get user keys
        if task.computing.requires_user_keys:
            user_keys = KeyPair.objects.get(user=task.user, default=True)
        else:
            raise NotImplementedError('Remote tasks not requiring keys are not yet supported')

        # Get webapp conn string
        from.utils import get_webapp_conn_string
        webapp_conn_string = get_webapp_conn_string()
            
        # Run the container on the host (non blocking)
        if task.container.type == 'singularity':

            task.tid    = task.uuid
            task.save()

            # Set pass if any
            if task.auth_pass:
                authstring = ' export SINGULARITYENV_AUTH_PASS={} && '.format(task.auth_pass)
            else:
                authstring = ''
 
            run_command  = 'ssh -i {} -4 -o StrictHostKeyChecking=no {}@{} '.format(user_keys.private_key_file, first_user, first_host)
            run_command += '"ssh -4 -o StrictHostKeyChecking=no {}@{} /bin/bash -c \''.format(second_user, second_host)
            
            if use_agent:
                run_command += '\'wget {}/api/v1/base/agent/?task_uuid={} -O \$HOME/agent_{}.py &> /dev/null && export BASE_PORT=\$(python \$HOME/agent_{}.py 2> \$HOME/{}.log) && '.format(webapp_conn_string, task.uuid, task.uuid, task.uuid, task.uuid)
                if setup_command:
                    run_command += setup_command + ' && '
                run_command += '\'export SINGULARITY_NOHTTPS=true && export SINGULARITYENV_BASE_PORT=\$BASE_PORT && {} '.format(authstring)
                run_command += 'exec nohup singularity run --pid --writable-tmpfs --containall --cleanenv '
            else:
                run_command += ' : && ' # Trick to prevent some issues in exporting variables                
                if setup_command:
                    run_command += setup_command + ' && '
                run_command += 'export SINGULARITY_NOHTTPS=true && export SINGULARITYENV_BASE_PORT={} && {} '.format(task.port, authstring)
                run_command += 'exec nohup singularity run --pid --writable-tmpfs --containall --cleanenv '
             
            # Set registry
            if task.container.registry == 'docker_local':
                raise Exception('This computing resource does not support local Docker registries yet')
                # Get local Docker registry conn string
                from.utils import get_local_docker_registry_conn_string
                local_docker_registry_conn_string = get_local_docker_registry_conn_string()
                registry = 'docker://{}/'.format(local_docker_registry_conn_string)
            elif task.container.registry == 'docker_hub':
                registry = 'docker://'
            else:
                raise NotImplementedError('Registry {} not supported'.format(task.container.registry))
     
            run_command+='{}{} &>> \$HOME/{}.log & echo \$!\'"'.format(registry, task.container.image, task.uuid)

        else:
            raise NotImplementedError('Container {} not supported'.format(task.container.type))

        out = os_shell(run_command, capture=True)
        if out.exit_code != 0:
            raise Exception(out.stderr)
        
        # Log        
        logger.debug('Shell exec output: "{}"'.format(out))


        # Load back the task to avoid  concurrency problems in the agent call
        task_uuid = task.uuid
        task = Task.objects.get(uuid=task_uuid)

        # Save pid echoed by the command above
        task_pid = out.stdout

        # Set fields
        task.status = TaskStatuses.running
        task.pid = task_pid
        task.ip  = second_host
 
        # Save
        task.save()


    def _stop_task(self, task, **kwargs):

        # Get user keys
        if task.computing.requires_user_keys:
            user_keys = KeyPair.objects.get(user=task.user, default=True)
        else:
            raise NotImplementedError('Remote tasks not requiring keys are not yet supported')

        # Get computing params
        first_host = task.computing.get_conf_param('first_host')
        first_user = task.computing.get_conf_param('first_user')
        second_host = task.computing.get_conf_param('second_host')
        second_user = task.computing.get_conf_param('second_user')

        # Stop the task remotely
        stop_command  = 'ssh -i {} -4 -o StrictHostKeyChecking=no {}@{} '.format(user_keys.private_key_file, first_user, first_host)
        stop_command += '"ssh -4 -o StrictHostKeyChecking=no {}@{} '.format(second_user, second_host)
        stop_command += 'kill -9 {}"'.format(task.pid)

        out = os_shell(stop_command, capture=True)
        if out.exit_code != 0:
            if not 'No such process' in out.stderr:
                raise Exception(out.stderr)

        # Set task as stopped
        task.status = TaskStatuses.stopped
        task.save()


    def _get_task_log(self, task, **kwargs):
        
        # Get user keys
        if task.computing.requires_user_keys:
            user_keys = KeyPair.objects.get(user=task.user, default=True)
        else:
            raise NotImplementedError('Remote tasks not requiring keys are not yet supported')

        # Get computing params
        first_host = task.computing.get_conf_param('first_host')
        first_user = task.computing.get_conf_param('first_user')
        second_host = task.computing.get_conf_param('second_host')
        second_user = task.computing.get_conf_param('second_user')

        # View log remotely
        view_log_command  = 'ssh -i {} -4 -o StrictHostKeyChecking=no {}@{} '.format(user_keys.private_key_file, first_user, first_host)
        view_log_command += '"ssh -4 -o StrictHostKeyChecking=no {}@{} '.format(second_user, second_host)
        view_log_command += 'cat \\\\\\$HOME/{}.log"'.format(task.uuid)

        out = os_shell(view_log_command, capture=True)
        if out.exit_code != 0:
            raise Exception(out.stderr)
        else:
            return out.stdout










+12 −0
Original line number Diff line number Diff line
import uuid
import json
from django.conf import settings
from django.db import models
from django.contrib.auth.models import User
@@ -159,6 +160,12 @@ class Computing(models.Model):
        except ComputingSysConf.DoesNotExist:
            return None


    @property    
    def sys_conf_data_json(self):
        return json.dumps(self.sys_conf_data)

    
    @property    
    def user_conf_data(self):
        try:
@@ -167,6 +174,11 @@ class Computing(models.Model):
            raise AttributeError('User conf data is not yet attached, please attach it before accessing.')


    @property    
    def user_conf_data_json(self):
        return json.dumps(self.user_conf_data)

    
    def attach_user_conf_data(self, user):
        if self.user and self.user != user:
            raise Exception('Cannot attach a conf data for another user (my user="{}", another user="{}"'.format(self.user, user)) 
+2 −3
Original line number Diff line number Diff line
@@ -56,9 +56,8 @@
           </tr>

           <tr>
            <td><b>Ports</b></td>
            <td>
             <input type="text" name="container_ports" value="" placeholder="" size="5" />
            <td colspan=2><b>Default port(s)</b>
             &nbsp; &nbsp;<input type="text" name="container_ports" value="" placeholder="" size="5" />
            </td>
           </tr>

+1 −1
Original line number Diff line number Diff line
@@ -77,7 +77,7 @@
        
        <div style="padding:10px;">
        <b>Type:</b> {{ computing.type.title }}<br/>
        <b>Owner:</b> {% if computing.user %}{{ data.computing.user }}{% else %}Platform{% endif %}<br/>
        <b>Owner:</b> {% if computing.user %}{{ computing.user }}{% else %}Platform{% endif %}<br/>
        <b>Supports:</b>
        {% if computing.supports_docker %}Docker <img src="/static/img/docker-logo.svg" style="height:18px; width:18px; margin-bottom:2px" />{% endif %}
        {% if computing.supports_singularity %}Singularity <img src="/static/img/singularity-logo.svg" style="height:18px; width:18px; margin-bottom:2px" />{% endif %}
+2 −2
Original line number Diff line number Diff line
@@ -40,8 +40,8 @@
       </tr>

       <tr>
        <td><b>Ports</b></td>
        <td>{{ container.ports }}</td>
        <td colspan=2><b>Default port(s)</b>
        &nbsp; &nbsp;{{ container.ports }}</td>
       </tr>

       <tr>
Loading