Loading services/webapp/code/rosetta/base_app/tasks.py→services/webapp/code/rosetta/base_app/computing_managers.py +129 −120 Original line number Diff line number Diff line Loading @@ -10,15 +10,71 @@ logger = logging.getLogger(__name__) TASK_DATA_DIR = "/data" def start_task(task): class ComputingManager(object): # Handle proper config if task.computing.type == 'local': def start_task(self, task, **kwargs): # Get our ip address #import netifaces #netifaces.ifaddresses('eth0') #backend_ip = netifaces.ifaddresses('eth0')[netifaces.AF_INET][0]['addr'] # Check for run task logic implementation try: self._start_task except AttributeError: raise NotImplementedError('Not implemented') # Call actual run task logic self._start_task(task, **kwargs) def stop_task(self, task, **kwargs): # Check for stop task logic implementation try: self._stop_task except AttributeError: raise NotImplementedError('Not implemented') # Call actual stop task logic self._stop_task(task, **kwargs) # Ok, save status as deleted task.status = 'stopped' task.save() # Check if the tunnel is active and if so kill it logger.debug('Checking if task "{}" has a running tunnel'.format(task.tid)) check_command = 'ps -ef | grep ":'+str(task.tunnel_port)+':'+str(task.ip)+':'+str(task.port)+'" | grep -v grep | awk \'{print $2}\'' logger.debug(check_command) out = os_shell(check_command, capture=True) logger.debug(out) if out.exit_code == 0: logger.debug('Task "{}" has a running tunnel, killing it'.format(task.tid)) tunnel_pid = out.stdout # Kill Tunnel command kill_tunnel_command= 'kill -9 {}'.format(tunnel_pid) # Log logger.debug('Killing tunnel with command: {}'.format(kill_tunnel_command)) # Execute os_shell(kill_tunnel_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr) def get_task_log(self, task, **kwargs): # Check for get task log logic implementation try: self._get_task_log except AttributeError: raise NotImplementedError('Not implemented') # Call actual get task log logic return self._get_task_log(task, **kwargs) class LocalComputingManager(ComputingManager): def _start_task(self, task): # Init run command #--cap-add=NET_ADMIN --cap-add=NET_RAW run_command = 'sudo docker run --network=rosetta_default --name rosetta-task-{}'.format( task.id) Loading @@ -39,8 +95,10 @@ def start_task(task): # Host name, image entry command run_command += ' -h task-{} -d -t {}{}'.format(task.id, registry_string, task.container.image) # Run the task Debug # Debug logger.debug('Running new task with command="{}"'.format(run_command)) # Run the task out = os_shell(run_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr) Loading @@ -48,7 +106,6 @@ def start_task(task): task_tid = out.stdout logger.debug('Created task with id: "{}"'.format(task_tid)) # Get task IP address out = os_shell('sudo docker inspect --format \'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}\' ' + task_tid + ' | tail -n1', capture=True) if out.exit_code != 0: Loading @@ -65,9 +122,34 @@ def start_task(task): task.save() def _stop_task(self, task): # Delete the Docker container standby_supported = False if standby_supported: stop_command = 'sudo docker stop {}'.format(task.tid) else: stop_command = 'sudo docker stop {} && sudo docker rm {}'.format(task.tid,task.tid) out = os_shell(stop_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr) def _get_task_log(self, task, **kwargs): # View the Docker container log (attach) view_log_command = 'sudo docker logs {}'.format(task.tid,) logger.debug(view_log_command) out = os_shell(view_log_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr) else: return out.stdout class RemoteComputingManager(ComputingManager): elif task.computing.type == 'remote': def _start_task(self, task, **kwargs): logger.debug('Starting a remote task "{}"'.format(task.computing)) # Get computing host Loading Loading @@ -143,11 +225,8 @@ def start_task(task): # Save task.save() elif task.computing.type == 'remoteOLD': logger.debug('Starting a remote task "{}"'.format(task.computing)) # Get computing host host = task.computing.get_conf_param('host') def _stop_task(self, task, **kwargs): # Get user keys if task.computing.require_user_keys: Loading @@ -155,64 +234,43 @@ def start_task(task): else: raise NotImplementedError('Remote tasks not requiring keys are not yet supported') # 1) Run the container on the host (non blocking) if task.container.type == 'singularity': # Set pass if any if task.auth_pass: authstring = ' export SINGULARITYENV_AUTH_PASS={} && '.format(task.auth_pass) else: authstring = '' # Get computing host host = task.computing.get_conf_param('host') run_command = 'ssh -i {} -4 -o StrictHostKeyChecking=no {} '.format(user_keys.private_key_file, host) run_command += '"export SINGULARITY_NOHTTPS=true && {} '.format(authstring) run_command += 'exec nohup singularity run --pid --writable-tmpfs --containall --cleanenv ' # Stop the task remotely stop_command = 'ssh -i {} -4 -o StrictHostKeyChecking=no {} "kill -9 {}"'.format(user_keys.private_key_file, host, task.pid) logger.debug(stop_command) out = os_shell(stop_command, capture=True) if out.exit_code != 0: if not 'No such process' in out.stderr: raise Exception(out.stderr) # Set registry if task.container.registry == 'docker_local': registry = 'docker://dregistry:5000/' elif task.container.registry == 'docker_hub': registry = 'docker://' else: raise NotImplementedError('Registry {} not supported'.format(task.container.registry)) run_command+='{}{} &> /tmp/{}.log & echo \$!"'.format(registry, task.container.image, task.uuid) def _get_task_log(self, task, **kwargs): # Get computing host host = task.computing.get_conf_param('host') # Get id_rsa if task.computing.require_user_keys: user_keys = Keys.objects.get(user=task.user, default=True) id_rsa_file = user_keys.private_key_file else: raise NotImplementedError('Container {} not supported'.format(task.container.type)) raise NotImplementedError('temote with no keys not yet') out = os_shell(run_command, capture=True) # View the Singularity container log view_log_command = 'ssh -i {} -4 -o StrictHostKeyChecking=no {} "cat /tmp/{}.log"'.format(id_rsa_file, host, task.uuid) logger.debug(view_log_command) out = os_shell(view_log_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr) else: return out.stdout # Save pid echoed by the command above task_pid = out.stdout # 2) Simulate the agent (i.e. report container IP and port port) # Get task IP address out = os_shell('sudo docker inspect --format \'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}\' '+host+' | tail -n1', capture=True) if out.exit_code != 0: raise Exception('Error: ' + out.stderr) task_ip = out.stdout # Set fields task.tid = task.uuid task.status = TaskStatuses.running task.ip = task_ip task.pid = task_pid task.port = int(task.container.service_ports.split(',')[0]) # Save task.save() #============================== # Slurm #============================== elif task.computing.type == 'slurm': class SlurmComputingManager(ComputingManager): def _start_task(self, task, **kwargs): logger.debug('Starting a remote task "{}"'.format(task.computing)) # Get computing host #Key Error ATM Loading Loading @@ -271,67 +329,18 @@ def start_task(task): raise Exception(out.stderr) def _stop_task(self, task, **kwargs): raise NotImplementedError('Not implemented') else: raise Exception('Consistency exception: invalid computing resource "{}'.format(task.computing)) def _get_task_log(self, task, **kwargs): raise NotImplementedError('Not implemented') def stop_task(task): if task.computing.type == 'local': # Delete the Docker container standby_supported = False if standby_supported: stop_command = 'sudo docker stop {}'.format(task.tid) else: stop_command = 'sudo docker stop {} && sudo docker rm {}'.format(task.tid,task.tid) out = os_shell(stop_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr) elif task.computing.type == 'remote': # Get user keys if task.computing.require_user_keys: user_keys = Keys.objects.get(user=task.user, default=True) else: raise NotImplementedError('Remote tasks not requiring keys are not yet supported') # Get computing host host = task.computing.get_conf_param('host') # Stop the task remotely stop_command = 'ssh -i {} -4 -o StrictHostKeyChecking=no {} "kill -9 {}"'.format(user_keys.private_key_file, host, task.pid) logger.debug(stop_command) out = os_shell(stop_command, capture=True) if out.exit_code != 0: if not 'No such process' in out.stderr: raise Exception(out.stderr) else: raise Exception('Don\'t know how to stop tasks on "{}" computing resource.'.format(task.computing)) # Ok, save status as deleted task.status = 'stopped' task.save() # Check if the tunnel is active and if so kill it logger.debug('Checking if task "{}" has a running tunnel'.format(task.tid)) check_command = 'ps -ef | grep ":'+str(task.tunnel_port)+':'+str(task.ip)+':'+str(task.port)+'" | grep -v grep | awk \'{print $2}\'' logger.debug(check_command) out = os_shell(check_command, capture=True) logger.debug(out) if out.exit_code == 0: logger.debug('Task "{}" has a running tunnel, killing it'.format(task.tid)) tunnel_pid = out.stdout # Kill Tunnel command kill_tunnel_command= 'kill -9 {}'.format(tunnel_pid) # Log logger.debug('Killing tunnel with command: {}'.format(kill_tunnel_command)) # Execute os_shell(kill_tunnel_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr) services/webapp/code/rosetta/base_app/management/commands/base_app_populate.py +2 −15 Original line number Diff line number Diff line Loading @@ -110,21 +110,8 @@ class Command(BaseCommand): #============================== # Demo remote computing #============================== demo_remote_computing = Computing.objects.create(user = None, name = 'Demo remote', type = 'remote', require_sys_conf = True, require_user_conf = False, require_user_keys = False) ComputingSysConf.objects.create(computing = demo_remote_computing, data = {'host': 'slurmclusterworker-one'}) #============================== # Demo remote (auth) computing #============================== demo_remote_auth_computing = Computing.objects.create(user = None, name = 'Demo remote (auth)', name = 'Demo remote', type = 'remote', require_sys_conf = True, require_user_conf = True, Loading services/webapp/code/rosetta/base_app/models.py +6 −0 Original line number Diff line number Diff line Loading @@ -200,6 +200,12 @@ class Computing(models.Model): param_value = self.user_conf_data[param] return param_value @property def manager(self): from . import computing_managers ComputingManager = getattr(computing_managers, '{}ComputingManager'.format(self.type.title())) return ComputingManager() class ComputingSysConf(models.Model): uuid = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) Loading services/webapp/code/rosetta/base_app/templates/account.html +14 −0 Original line number Diff line number Diff line Loading @@ -83,6 +83,20 @@ {% endif %} </td> </tr> </table> <br /> <h3>Keys</h3> <table class="dashboard"> <tr> <td valign="top"> <b>Default public key</b> </td> <td> <pre style="max-width:300px; height:">{{ data.default_public_key }}</pre> </td> </tr> </table> Loading services/webapp/code/rosetta/base_app/templates/components/task.html +0 −4 Original line number Diff line number Diff line Loading @@ -71,11 +71,7 @@ <font color="#c0c0c0">Stop</font> | {% endif %} {% if task.status == "exited" or task.status == "stopped" %} <a href="?uuid={{task.uuid}}&action=delete&details=False">Delete</a> {% else %} <font color="#c0c0c0">Delete</font> {% endif %} {% if task.status == "running" %} | <a href="?uuid={{task.uuid}}&action=connect">Connect</a> | <a href="/task_log/?uuid={{task.uuid}}&action=viewlog">View Log</a> Loading Loading
services/webapp/code/rosetta/base_app/tasks.py→services/webapp/code/rosetta/base_app/computing_managers.py +129 −120 Original line number Diff line number Diff line Loading @@ -10,15 +10,71 @@ logger = logging.getLogger(__name__) TASK_DATA_DIR = "/data" def start_task(task): class ComputingManager(object): # Handle proper config if task.computing.type == 'local': def start_task(self, task, **kwargs): # Get our ip address #import netifaces #netifaces.ifaddresses('eth0') #backend_ip = netifaces.ifaddresses('eth0')[netifaces.AF_INET][0]['addr'] # Check for run task logic implementation try: self._start_task except AttributeError: raise NotImplementedError('Not implemented') # Call actual run task logic self._start_task(task, **kwargs) def stop_task(self, task, **kwargs): # Check for stop task logic implementation try: self._stop_task except AttributeError: raise NotImplementedError('Not implemented') # Call actual stop task logic self._stop_task(task, **kwargs) # Ok, save status as deleted task.status = 'stopped' task.save() # Check if the tunnel is active and if so kill it logger.debug('Checking if task "{}" has a running tunnel'.format(task.tid)) check_command = 'ps -ef | grep ":'+str(task.tunnel_port)+':'+str(task.ip)+':'+str(task.port)+'" | grep -v grep | awk \'{print $2}\'' logger.debug(check_command) out = os_shell(check_command, capture=True) logger.debug(out) if out.exit_code == 0: logger.debug('Task "{}" has a running tunnel, killing it'.format(task.tid)) tunnel_pid = out.stdout # Kill Tunnel command kill_tunnel_command= 'kill -9 {}'.format(tunnel_pid) # Log logger.debug('Killing tunnel with command: {}'.format(kill_tunnel_command)) # Execute os_shell(kill_tunnel_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr) def get_task_log(self, task, **kwargs): # Check for get task log logic implementation try: self._get_task_log except AttributeError: raise NotImplementedError('Not implemented') # Call actual get task log logic return self._get_task_log(task, **kwargs) class LocalComputingManager(ComputingManager): def _start_task(self, task): # Init run command #--cap-add=NET_ADMIN --cap-add=NET_RAW run_command = 'sudo docker run --network=rosetta_default --name rosetta-task-{}'.format( task.id) Loading @@ -39,8 +95,10 @@ def start_task(task): # Host name, image entry command run_command += ' -h task-{} -d -t {}{}'.format(task.id, registry_string, task.container.image) # Run the task Debug # Debug logger.debug('Running new task with command="{}"'.format(run_command)) # Run the task out = os_shell(run_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr) Loading @@ -48,7 +106,6 @@ def start_task(task): task_tid = out.stdout logger.debug('Created task with id: "{}"'.format(task_tid)) # Get task IP address out = os_shell('sudo docker inspect --format \'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}\' ' + task_tid + ' | tail -n1', capture=True) if out.exit_code != 0: Loading @@ -65,9 +122,34 @@ def start_task(task): task.save() def _stop_task(self, task): # Delete the Docker container standby_supported = False if standby_supported: stop_command = 'sudo docker stop {}'.format(task.tid) else: stop_command = 'sudo docker stop {} && sudo docker rm {}'.format(task.tid,task.tid) out = os_shell(stop_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr) def _get_task_log(self, task, **kwargs): # View the Docker container log (attach) view_log_command = 'sudo docker logs {}'.format(task.tid,) logger.debug(view_log_command) out = os_shell(view_log_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr) else: return out.stdout class RemoteComputingManager(ComputingManager): elif task.computing.type == 'remote': def _start_task(self, task, **kwargs): logger.debug('Starting a remote task "{}"'.format(task.computing)) # Get computing host Loading Loading @@ -143,11 +225,8 @@ def start_task(task): # Save task.save() elif task.computing.type == 'remoteOLD': logger.debug('Starting a remote task "{}"'.format(task.computing)) # Get computing host host = task.computing.get_conf_param('host') def _stop_task(self, task, **kwargs): # Get user keys if task.computing.require_user_keys: Loading @@ -155,64 +234,43 @@ def start_task(task): else: raise NotImplementedError('Remote tasks not requiring keys are not yet supported') # 1) Run the container on the host (non blocking) if task.container.type == 'singularity': # Set pass if any if task.auth_pass: authstring = ' export SINGULARITYENV_AUTH_PASS={} && '.format(task.auth_pass) else: authstring = '' # Get computing host host = task.computing.get_conf_param('host') run_command = 'ssh -i {} -4 -o StrictHostKeyChecking=no {} '.format(user_keys.private_key_file, host) run_command += '"export SINGULARITY_NOHTTPS=true && {} '.format(authstring) run_command += 'exec nohup singularity run --pid --writable-tmpfs --containall --cleanenv ' # Stop the task remotely stop_command = 'ssh -i {} -4 -o StrictHostKeyChecking=no {} "kill -9 {}"'.format(user_keys.private_key_file, host, task.pid) logger.debug(stop_command) out = os_shell(stop_command, capture=True) if out.exit_code != 0: if not 'No such process' in out.stderr: raise Exception(out.stderr) # Set registry if task.container.registry == 'docker_local': registry = 'docker://dregistry:5000/' elif task.container.registry == 'docker_hub': registry = 'docker://' else: raise NotImplementedError('Registry {} not supported'.format(task.container.registry)) run_command+='{}{} &> /tmp/{}.log & echo \$!"'.format(registry, task.container.image, task.uuid) def _get_task_log(self, task, **kwargs): # Get computing host host = task.computing.get_conf_param('host') # Get id_rsa if task.computing.require_user_keys: user_keys = Keys.objects.get(user=task.user, default=True) id_rsa_file = user_keys.private_key_file else: raise NotImplementedError('Container {} not supported'.format(task.container.type)) raise NotImplementedError('temote with no keys not yet') out = os_shell(run_command, capture=True) # View the Singularity container log view_log_command = 'ssh -i {} -4 -o StrictHostKeyChecking=no {} "cat /tmp/{}.log"'.format(id_rsa_file, host, task.uuid) logger.debug(view_log_command) out = os_shell(view_log_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr) else: return out.stdout # Save pid echoed by the command above task_pid = out.stdout # 2) Simulate the agent (i.e. report container IP and port port) # Get task IP address out = os_shell('sudo docker inspect --format \'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}\' '+host+' | tail -n1', capture=True) if out.exit_code != 0: raise Exception('Error: ' + out.stderr) task_ip = out.stdout # Set fields task.tid = task.uuid task.status = TaskStatuses.running task.ip = task_ip task.pid = task_pid task.port = int(task.container.service_ports.split(',')[0]) # Save task.save() #============================== # Slurm #============================== elif task.computing.type == 'slurm': class SlurmComputingManager(ComputingManager): def _start_task(self, task, **kwargs): logger.debug('Starting a remote task "{}"'.format(task.computing)) # Get computing host #Key Error ATM Loading Loading @@ -271,67 +329,18 @@ def start_task(task): raise Exception(out.stderr) def _stop_task(self, task, **kwargs): raise NotImplementedError('Not implemented') else: raise Exception('Consistency exception: invalid computing resource "{}'.format(task.computing)) def _get_task_log(self, task, **kwargs): raise NotImplementedError('Not implemented') def stop_task(task): if task.computing.type == 'local': # Delete the Docker container standby_supported = False if standby_supported: stop_command = 'sudo docker stop {}'.format(task.tid) else: stop_command = 'sudo docker stop {} && sudo docker rm {}'.format(task.tid,task.tid) out = os_shell(stop_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr) elif task.computing.type == 'remote': # Get user keys if task.computing.require_user_keys: user_keys = Keys.objects.get(user=task.user, default=True) else: raise NotImplementedError('Remote tasks not requiring keys are not yet supported') # Get computing host host = task.computing.get_conf_param('host') # Stop the task remotely stop_command = 'ssh -i {} -4 -o StrictHostKeyChecking=no {} "kill -9 {}"'.format(user_keys.private_key_file, host, task.pid) logger.debug(stop_command) out = os_shell(stop_command, capture=True) if out.exit_code != 0: if not 'No such process' in out.stderr: raise Exception(out.stderr) else: raise Exception('Don\'t know how to stop tasks on "{}" computing resource.'.format(task.computing)) # Ok, save status as deleted task.status = 'stopped' task.save() # Check if the tunnel is active and if so kill it logger.debug('Checking if task "{}" has a running tunnel'.format(task.tid)) check_command = 'ps -ef | grep ":'+str(task.tunnel_port)+':'+str(task.ip)+':'+str(task.port)+'" | grep -v grep | awk \'{print $2}\'' logger.debug(check_command) out = os_shell(check_command, capture=True) logger.debug(out) if out.exit_code == 0: logger.debug('Task "{}" has a running tunnel, killing it'.format(task.tid)) tunnel_pid = out.stdout # Kill Tunnel command kill_tunnel_command= 'kill -9 {}'.format(tunnel_pid) # Log logger.debug('Killing tunnel with command: {}'.format(kill_tunnel_command)) # Execute os_shell(kill_tunnel_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr)
services/webapp/code/rosetta/base_app/management/commands/base_app_populate.py +2 −15 Original line number Diff line number Diff line Loading @@ -110,21 +110,8 @@ class Command(BaseCommand): #============================== # Demo remote computing #============================== demo_remote_computing = Computing.objects.create(user = None, name = 'Demo remote', type = 'remote', require_sys_conf = True, require_user_conf = False, require_user_keys = False) ComputingSysConf.objects.create(computing = demo_remote_computing, data = {'host': 'slurmclusterworker-one'}) #============================== # Demo remote (auth) computing #============================== demo_remote_auth_computing = Computing.objects.create(user = None, name = 'Demo remote (auth)', name = 'Demo remote', type = 'remote', require_sys_conf = True, require_user_conf = True, Loading
services/webapp/code/rosetta/base_app/models.py +6 −0 Original line number Diff line number Diff line Loading @@ -200,6 +200,12 @@ class Computing(models.Model): param_value = self.user_conf_data[param] return param_value @property def manager(self): from . import computing_managers ComputingManager = getattr(computing_managers, '{}ComputingManager'.format(self.type.title())) return ComputingManager() class ComputingSysConf(models.Model): uuid = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) Loading
services/webapp/code/rosetta/base_app/templates/account.html +14 −0 Original line number Diff line number Diff line Loading @@ -83,6 +83,20 @@ {% endif %} </td> </tr> </table> <br /> <h3>Keys</h3> <table class="dashboard"> <tr> <td valign="top"> <b>Default public key</b> </td> <td> <pre style="max-width:300px; height:">{{ data.default_public_key }}</pre> </td> </tr> </table> Loading
services/webapp/code/rosetta/base_app/templates/components/task.html +0 −4 Original line number Diff line number Diff line Loading @@ -71,11 +71,7 @@ <font color="#c0c0c0">Stop</font> | {% endif %} {% if task.status == "exited" or task.status == "stopped" %} <a href="?uuid={{task.uuid}}&action=delete&details=False">Delete</a> {% else %} <font color="#c0c0c0">Delete</font> {% endif %} {% if task.status == "running" %} | <a href="?uuid={{task.uuid}}&action=connect">Connect</a> | <a href="/task_log/?uuid={{task.uuid}}&action=viewlog">View Log</a> Loading