Loading services/webapp/code/rosetta/core_app/api.py +11 −11 Original line number Diff line number Diff line Loading @@ -305,25 +305,25 @@ print(port) elif action=='set_ip_port': task_ip = request.GET.get('ip', None) if not task_ip: return HttpResponse('IP not valid (got "{}")'.format(task_ip)) task_interface_ip = request.GET.get('ip', None) if not task_interface_ip: return HttpResponse('IP not valid (got "{}")'.format(task_interface_ip)) task_port = request.GET.get('port', None) if not task_port: return HttpResponse('Port not valid (got "{}")'.format(task_port)) task_interface_port = request.GET.get('port', None) if not task_interface_port: return HttpResponse('Port not valid (got "{}")'.format(task_interface_port)) try: int(task_port) int(task_interface_port) except (TypeError, ValueError): return HttpResponse('Port not valid (got "{}")'.format(task_port)) return HttpResponse('Port not valid (got "{}")'.format(task_interface_port)) # Set fields logger.info('Setting task "{}" to ip "{}" and port "{}"'.format(task.uuid, task_ip, task_port)) logger.info('Setting task "{}" to ip "{}" and port "{}"'.format(task.uuid, task_interface_ip, task_interface_port)) task.status = TaskStatuses.running task.ip = task_ip task.interface_ip = task_interface_ip if task.container.supports_custom_interface_port: task.port = int(task_port) task.interface_port = int(task_interface_port) task.save() # Notify the user that the task called back home Loading services/webapp/code/rosetta/core_app/computing_managers.py +16 −30 Original line number Diff line number Diff line Loading @@ -102,13 +102,13 @@ class InternalSingleNodeComputingManager(SingleNodeComputingManager): run_command += ' -v {}/user-{}:/data'.format(settings.LOCAL_USER_DATA_DIR, task.user.id) # Set registry string if task.container.registry == 'local': registry_string = 'localhost:5000/' else: registry_string = 'docker.io/' #if task.container.registry == 'local': # registry_string = 'localhost:5000/' #else: # registry_string = 'docker.io/' # Host name, image entry command run_command += ' -h task-{} -d -t {}{}'.format(task.uuid, registry_string, task.container.image) run_command += ' -h task-{} -d -t {}/{}:{}'.format(task.uuid, task.container.registry, task.container.image, task.container.tag) # Debug logger.debug('Running new task with command="{}"'.format(run_command)) Loading Loading @@ -348,16 +348,15 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag sbatch_args += ' --output=\$HOME/{}.log --error=\$HOME/{}.log '.format(task.uuid, task.uuid) # Submit the job if task.container.type == 'singularity': if task.computing.default_container_runtime == 'singularity': #if not task.container.supports_custom_interface_port: # raise Exception('This task does not support dynamic port allocation and is therefore not supported using singularity on Slurm') # Set pass if any if task.auth_pass: authstring = ' export SINGULARITYENV_AUTH_PASS={} && '.format(task.auth_pass) else: authstring = '' if not task.requires_proxy_auth and task.password: authstring = ' export SINGULARITYENV_AUTH_PASS={} && '.format(task.password) # Set binds, only from sys config if the resource is not owned by the user if self.computing.user != task.user: Loading @@ -382,24 +381,11 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag run_command += 'rm -rf /tmp/{}_data && mkdir -p /tmp/{}_data/tmp &>> \$HOME/{}.log && mkdir -p /tmp/{}_data/home &>> \$HOME/{}.log && chmod 700 /tmp/{}_data && '.format(task.uuid, task.uuid, task.uuid, task.uuid, task.uuid, task.uuid) run_command += 'exec nohup singularity run {} --pid --writable-tmpfs --no-home --home=/home/metauser --workdir /tmp/{}_data/tmp -B/tmp/{}_data/home:/home --containall --cleanenv '.format(binds, task.uuid, task.uuid) # Double to escape for Pythom, six for shell (double times three as \\\ escapes a single slash in shell) # Set registry if task.container.registry == 'docker_local': # Get local Docker registry conn string from.utils import get_local_docker_registry_conn_string local_docker_registry_conn_string = get_local_docker_registry_conn_string() registry = 'docker://{}/'.format(local_docker_registry_conn_string) elif task.container.registry == 'docker_hub': registry = 'docker://' else: raise NotImplementedError('Registry {} not supported'.format(task.container.registry)) run_command+='{}{} &> \$HOME/{}.log\\" > \$HOME/{}.sh && sbatch {} \$HOME/{}.sh"\''.format(registry, task.container.image, task.uuid, task.uuid, sbatch_args, task.uuid) # Double to escape for Python, six for shell (double times three as \\\ escapes a single slash in shell) run_command+='docker://{}/{}:{} &> \$HOME/{}.log\\" > \$HOME/{}.sh && sbatch {} \$HOME/{}.sh"\''.format(task.container.registry, task.container.image, task.container.tag, task.uuid, task.uuid, sbatch_args, task.uuid) else: raise NotImplementedError('Container {} not supported'.format(task.container.type)) raise NotImplementedError('Default container runtime "{}" not supported'.format(task.computing.default_container_runtime)) out = os_shell(run_command, capture=True) if out.exit_code != 0: Loading @@ -419,8 +405,8 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag task_uuid = task.uuid task = Task.objects.get(uuid=task_uuid) # Save job id as task pid task.pid = job_id # Save job id as task id task.id = job_id # Set status (only fi we get here before the agent which sets the status as running via the API) if task.status != TaskStatuses.running: Loading @@ -443,7 +429,7 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag user = self.computing.conf.get('user') # Stop the task remotely stop_command = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "scancel {}"\''.format(user_keys.private_key_file, user, host, task.pid) stop_command = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "scancel {}"\''.format(user_keys.private_key_file, user, host, task.id) out = os_shell(stop_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr) Loading services/webapp/code/rosetta/core_app/migrations/0009_auto_20211103_2256.py 0 → 100644 +169 −0 Original line number Diff line number Diff line # Generated by Django 2.2.1 on 2021-11-03 22:56 import django.contrib.postgres.fields.jsonb from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ ('core_app', '0008_auto_20211103_1232'), ] operations = [ migrations.AlterField( model_name='computing', name='access_mode', field=models.CharField(max_length=36, verbose_name='Access (control) mode'), ), migrations.AlterField( model_name='computing', name='auth_mode', field=models.CharField(max_length=36, verbose_name='Auth mode'), ), migrations.AlterField( model_name='computing', name='container_runtimes', field=models.CharField(max_length=256, verbose_name='Container runtimes'), ), migrations.AlterField( model_name='computing', name='description', field=models.TextField(blank=True, null=True, verbose_name='Description'), ), migrations.AlterField( model_name='computing', name='name', field=models.CharField(max_length=255, verbose_name='Name'), ), migrations.AlterField( model_name='computing', name='type', field=models.CharField(max_length=255, verbose_name='Type'), ), migrations.AlterField( model_name='computing', name='wms', field=models.CharField(blank=True, max_length=36, null=True, verbose_name='Workload management system'), ), migrations.AlterField( model_name='container', name='arch', field=models.CharField(default='x86_64', max_length=36, verbose_name='Architecture'), ), migrations.AlterField( model_name='container', name='description', field=models.TextField(blank=True, null=True, verbose_name='Description'), ), migrations.AlterField( model_name='container', name='image', field=models.CharField(max_length=255, verbose_name='Image'), ), migrations.AlterField( model_name='container', name='interface_port', field=models.IntegerField(blank=True, null=True, verbose_name='Interface port'), ), migrations.AlterField( model_name='container', name='interface_protocol', field=models.CharField(blank=True, max_length=36, null=True, verbose_name='Interface protocol'), ), migrations.AlterField( model_name='container', name='interface_transport', field=models.CharField(blank=True, max_length=36, null=True, verbose_name='Interface transport'), ), migrations.AlterField( model_name='container', name='name', field=models.CharField(max_length=255, verbose_name='Name'), ), migrations.AlterField( model_name='container', name='os', field=models.CharField(default='linux', max_length=36, verbose_name='Operating system'), ), migrations.AlterField( model_name='container', name='registry', field=models.CharField(max_length=255, verbose_name='Registry'), ), migrations.AlterField( model_name='container', name='supports_custom_interface_port', field=models.BooleanField(default=False, verbose_name='Supports custom interface port'), ), migrations.AlterField( model_name='container', name='supports_interface_auth', field=models.BooleanField(default=False, verbose_name='Supports interface auth'), ), migrations.AlterField( model_name='container', name='tag', field=models.CharField(default='latest', max_length=255, verbose_name='Tag'), ), migrations.AlterField( model_name='task', name='auth_token', field=models.CharField(blank=True, max_length=36, null=True, verbose_name='Auth token'), ), migrations.AlterField( model_name='task', name='computing_options', field=django.contrib.postgres.fields.jsonb.JSONField(blank=True, null=True, verbose_name='Computing options'), ), migrations.AlterField( model_name='task', name='extra_binds', field=models.CharField(blank=True, max_length=4096, null=True, verbose_name='Extra binds'), ), migrations.AlterField( model_name='task', name='id', field=models.CharField(blank=True, max_length=64, null=True, verbose_name='ID'), ), migrations.AlterField( model_name='task', name='interface_ip', field=models.CharField(blank=True, max_length=36, null=True, verbose_name='Interface IP address'), ), migrations.AlterField( model_name='task', name='interface_port', field=models.IntegerField(blank=True, null=True, verbose_name='Interface port'), ), migrations.AlterField( model_name='task', name='name', field=models.CharField(max_length=36, verbose_name='Name'), ), migrations.AlterField( model_name='task', name='requires_proxy', field=models.BooleanField(verbose_name='Requires proxy'), ), migrations.AlterField( model_name='task', name='requires_proxy_auth', field=models.BooleanField(verbose_name='Requires proxy auth'), ), migrations.AlterField( model_name='task', name='requires_tcp_tunnel', field=models.BooleanField(verbose_name='Requires a TCP tunnel'), ), migrations.AlterField( model_name='task', name='status', field=models.CharField(blank=True, max_length=36, null=True, verbose_name='Status'), ), migrations.AlterField( model_name='task', name='tcp_tunnel_port', field=models.IntegerField(blank=True, null=True, verbose_name='TCP tunnel port'), ), ] services/webapp/code/rosetta/core_app/models.py +41 −34 Original line number Diff line number Diff line Loading @@ -80,29 +80,29 @@ class Container(models.Model): # If a container has no user, it will be available to anyone. Can be created, edited and deleted only by admins. # Generic attributes name = models.CharField('Container Name', max_length=255, blank=False, null=False) description = models.TextField('Container description', blank=True, null=True) name = models.CharField('Name', max_length=255, blank=False, null=False) description = models.TextField('Description', blank=True, null=True) # Registry-related attributes registry = models.CharField('Container registry', max_length=255, blank=False, null=False) image = models.CharField('Container image', max_length=255, blank=False, null=False) tag = models.CharField('Container image', max_length=255, blank=False, null=False, default='latest') registry = models.CharField('Registry', max_length=255, blank=False, null=False) image = models.CharField('Image', max_length=255, blank=False, null=False) tag = models.CharField('Tag', max_length=255, blank=False, null=False, default='latest') # Platform-related arch = models.CharField('Container architecture', max_length=36, blank=False, null=False, default='x86_64') os = models.CharField('Container operating system', max_length=36, blank=False, null=False, default='linux') arch = models.CharField('Architecture', max_length=36, blank=False, null=False, default='x86_64') os = models.CharField('Operating system', max_length=36, blank=False, null=False, default='linux') # TODO: do we want more control with respect to kernel, CPUs, instruction sets? # requires = i.e. kernel > 3, intel, AVX2 # Port, protocol and transport for the container interface interface_port = models.IntegerField('Container interface port', blank=True, null=True) interface_protocol = models.CharField('Container interface protocol', max_length=36, blank=True, null=True) interface_transport = models.CharField('Container interface protocol', max_length=36, blank=True, null=True) interface_port = models.IntegerField('Interface port', blank=True, null=True) interface_protocol = models.CharField('Interface protocol', max_length=36, blank=True, null=True) interface_transport = models.CharField('Interface transport', max_length=36, blank=True, null=True) # Capabilities supports_custom_interface_port = models.BooleanField('Does the container support setting a custom interface port?', default=False) # BASE_PORT supports_interface_auth = models.BooleanField('Does the container interface support authentication?', default=False) # AUTH_USER / AUTH_PASS supports_custom_interface_port = models.BooleanField('Supports custom interface port', default=False) # BASE_PORT supports_interface_auth = models.BooleanField('Supports interface auth', default=False) # AUTH_USER / AUTH_PASS class Meta: ordering = ['name'] Loading Loading @@ -131,23 +131,23 @@ class Computing(models.Model): user = models.ForeignKey(User, related_name='+', on_delete=models.CASCADE, blank=True, null=True) # If a compute resource has no user, it will be available to anyone. Can be created, edited and deleted only by admins. name = models.CharField('Computing Name', max_length=255, blank=False, null=False) description = models.TextField('Container description', blank=True, null=True) name = models.CharField('Name', max_length=255, blank=False, null=False) description = models.TextField('Description', blank=True, null=True) # Standalone / sluster type = models.CharField('Computing Type', max_length=255, blank=False, null=False) type = models.CharField('Type', max_length=255, blank=False, null=False) requires_sys_conf = models.BooleanField(default=False) requires_user_conf = models.BooleanField(default=False) requires_user_keys = models.BooleanField(default=False) # Interfce and interaction definition access_mode = models.CharField('Computing resource access (control) mode', max_length=36, blank=False, null=False) auth_mode = models.CharField('Computing resource authentication mode', max_length=36, blank=False, null=False) wms = models.CharField('Computing resource WMS', max_length=36, blank=True, null=True) access_mode = models.CharField('Access (control) mode', max_length=36, blank=False, null=False) auth_mode = models.CharField('Auth mode', max_length=36, blank=False, null=False) wms = models.CharField('Workload management system', max_length=36, blank=True, null=True) # Supported container runtimes container_runtimes = models.CharField('Computing resource container runtimes', max_length=256, blank=False, null=False) container_runtimes = models.CharField('Container runtimes', max_length=256, blank=False, null=False) class Meta: ordering = ['name'] Loading @@ -168,6 +168,10 @@ class Computing(models.Model): color_map_index = string_int_hash % len(color_map) return color_map[color_map_index] @property def default_container_runtime(self): return str(self.container_runtimes).split(',')[0] #======================= # Computing manager Loading @@ -181,11 +185,11 @@ class Computing(models.Model): try: return self._manager except AttributeError: if self.type == 'cluster' and self.access_mode == 'ssh+cli' and self.access_mode == 'user_keys' and self.wms == 'slurm': if self.type == 'cluster' and self.access_mode == 'ssh+cli' and self.auth_mode == 'user_keys' and self.wms == 'slurm': self._manager = computing_managers.SlurmSSHClusterComputingManager(self) elif self.type == 'standalone' and self.access_mode == 'ssh+cli' and self.access_mode == 'user_keys' and self.wms is None: elif self.type == 'standalone' and self.access_mode == 'ssh+cli' and self.auth_mode == 'user_keys' and self.wms is None: self._manager = computing_managers.SSHSingleNodeComputingManager(self) elif self.type == 'standalone' and self.access_mode == 'internal' and self.access_mode == 'internal' and self.wms is None: elif self.type == 'standalone' and self.access_mode == 'internal' and self.auth_mode == 'internal' and self.wms is None: self._manager = computing_managers.InternalSingleNodeComputingManager(self) else: raise ConsistencyException('Don\'t know how to instantiate a computing manager for computing resource of type "{}", access mode "{}" and WMS "{}"'.format(self.type, self.access_mode, self.wms)) Loading Loading @@ -286,34 +290,37 @@ class Task(models.Model): uuid = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) user = models.ForeignKey(User, related_name='+', on_delete=models.CASCADE) name = models.CharField('Task name', max_length=36, blank=False, null=False) name = models.CharField('Name', max_length=36, blank=False, null=False) # Task management id = models.CharField('Task ID', max_length=64, blank=True, null=True) # i.e. Slurm job id, singularity PID, docker hash status = models.CharField('Task status', max_length=36, blank=True, null=True) id = models.CharField('ID', max_length=64, blank=True, null=True) # i.e. Slurm job id, singularity PID, docker hash status = models.CharField('Status', max_length=36, blank=True, null=True) created = models.DateTimeField('Created on', default=timezone.now) # How to reach the task interface. The IP has to be intended either as the container IP if this is directly # reachable (i.e. using a Docker or Kubernetes network) or as the host IP address, depending on the # computing resource and its computing manager/WMS/container runtime. The port is to be intended # as the port where the task interface is exposed on its IP address. interface_ip = models.CharField('Task interface ip address', max_length=36, blank=True, null=True) interface_port = models.IntegerField('Task interface port', blank=True, null=True) interface_ip = models.CharField('Interface IP address', max_length=36, blank=True, null=True) interface_port = models.IntegerField('Interface port', blank=True, null=True) # Task access requires_tcp_tunnel = models.BooleanField('Does the task require a tunnel to be opened for accessing its interface?') tcp_tunnel_port = models.IntegerField('Task tunnel port', blank=True, null=True) requires_proxy = models.BooleanField('Does the task require a proxy for accessing its interface?') requires_proxy_auth = models.BooleanField('Does the task require interface authentication to be enforced at proxy-level?') auth_token = models.CharField('A one-time token for proxy or interface authentication', max_length=36, blank=True, null=True) requires_tcp_tunnel = models.BooleanField('Requires a TCP tunnel') tcp_tunnel_port = models.IntegerField('TCP tunnel port', blank=True, null=True) requires_proxy = models.BooleanField('Requires proxy') requires_proxy_auth = models.BooleanField('Requires proxy auth') auth_token = models.CharField('Auth token', max_length=36, blank=True, null=True) # A one-time token for proxy or interface authentication # Links computing = models.ForeignKey(Computing, related_name='+', on_delete=models.CASCADE) container = models.ForeignKey('Container', on_delete=models.CASCADE, related_name='+') # Extra extra_binds = models.CharField('Task container extra binds', max_length=4096, blank=True, null=True) computing_options = JSONField('Task computing options', blank=True, null=True) # i.e. CPUs, RAM, cluster partition etc. TODO: why here? extra_binds = models.CharField('Extra binds', max_length=4096, blank=True, null=True) computing_options = JSONField('Computing options', blank=True, null=True) # i.e. CPUs, RAM, cluster partition etc. TODO: why here? # TODO: add the option for selecting the runtime as advanced option when creating the task #container_runtime class Meta: ordering = ['-created'] Loading services/webapp/code/rosetta/core_app/templates/add_container.html +1 −1 Original line number Diff line number Diff line Loading @@ -78,7 +78,7 @@ </table> <a href="javascript:void(0);" id="show_button" onclick="toggle_visibility('advanced_div')">Show advanced...</a> <a href="javascript:void(0);" id="show_button" onclick="toggle_visibility('advanced_div')">Advanced...</a> <div id="advanced_div" style="display:none; width:360px;"> Loading Loading
services/webapp/code/rosetta/core_app/api.py +11 −11 Original line number Diff line number Diff line Loading @@ -305,25 +305,25 @@ print(port) elif action=='set_ip_port': task_ip = request.GET.get('ip', None) if not task_ip: return HttpResponse('IP not valid (got "{}")'.format(task_ip)) task_interface_ip = request.GET.get('ip', None) if not task_interface_ip: return HttpResponse('IP not valid (got "{}")'.format(task_interface_ip)) task_port = request.GET.get('port', None) if not task_port: return HttpResponse('Port not valid (got "{}")'.format(task_port)) task_interface_port = request.GET.get('port', None) if not task_interface_port: return HttpResponse('Port not valid (got "{}")'.format(task_interface_port)) try: int(task_port) int(task_interface_port) except (TypeError, ValueError): return HttpResponse('Port not valid (got "{}")'.format(task_port)) return HttpResponse('Port not valid (got "{}")'.format(task_interface_port)) # Set fields logger.info('Setting task "{}" to ip "{}" and port "{}"'.format(task.uuid, task_ip, task_port)) logger.info('Setting task "{}" to ip "{}" and port "{}"'.format(task.uuid, task_interface_ip, task_interface_port)) task.status = TaskStatuses.running task.ip = task_ip task.interface_ip = task_interface_ip if task.container.supports_custom_interface_port: task.port = int(task_port) task.interface_port = int(task_interface_port) task.save() # Notify the user that the task called back home Loading
services/webapp/code/rosetta/core_app/computing_managers.py +16 −30 Original line number Diff line number Diff line Loading @@ -102,13 +102,13 @@ class InternalSingleNodeComputingManager(SingleNodeComputingManager): run_command += ' -v {}/user-{}:/data'.format(settings.LOCAL_USER_DATA_DIR, task.user.id) # Set registry string if task.container.registry == 'local': registry_string = 'localhost:5000/' else: registry_string = 'docker.io/' #if task.container.registry == 'local': # registry_string = 'localhost:5000/' #else: # registry_string = 'docker.io/' # Host name, image entry command run_command += ' -h task-{} -d -t {}{}'.format(task.uuid, registry_string, task.container.image) run_command += ' -h task-{} -d -t {}/{}:{}'.format(task.uuid, task.container.registry, task.container.image, task.container.tag) # Debug logger.debug('Running new task with command="{}"'.format(run_command)) Loading Loading @@ -348,16 +348,15 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag sbatch_args += ' --output=\$HOME/{}.log --error=\$HOME/{}.log '.format(task.uuid, task.uuid) # Submit the job if task.container.type == 'singularity': if task.computing.default_container_runtime == 'singularity': #if not task.container.supports_custom_interface_port: # raise Exception('This task does not support dynamic port allocation and is therefore not supported using singularity on Slurm') # Set pass if any if task.auth_pass: authstring = ' export SINGULARITYENV_AUTH_PASS={} && '.format(task.auth_pass) else: authstring = '' if not task.requires_proxy_auth and task.password: authstring = ' export SINGULARITYENV_AUTH_PASS={} && '.format(task.password) # Set binds, only from sys config if the resource is not owned by the user if self.computing.user != task.user: Loading @@ -382,24 +381,11 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag run_command += 'rm -rf /tmp/{}_data && mkdir -p /tmp/{}_data/tmp &>> \$HOME/{}.log && mkdir -p /tmp/{}_data/home &>> \$HOME/{}.log && chmod 700 /tmp/{}_data && '.format(task.uuid, task.uuid, task.uuid, task.uuid, task.uuid, task.uuid) run_command += 'exec nohup singularity run {} --pid --writable-tmpfs --no-home --home=/home/metauser --workdir /tmp/{}_data/tmp -B/tmp/{}_data/home:/home --containall --cleanenv '.format(binds, task.uuid, task.uuid) # Double to escape for Pythom, six for shell (double times three as \\\ escapes a single slash in shell) # Set registry if task.container.registry == 'docker_local': # Get local Docker registry conn string from.utils import get_local_docker_registry_conn_string local_docker_registry_conn_string = get_local_docker_registry_conn_string() registry = 'docker://{}/'.format(local_docker_registry_conn_string) elif task.container.registry == 'docker_hub': registry = 'docker://' else: raise NotImplementedError('Registry {} not supported'.format(task.container.registry)) run_command+='{}{} &> \$HOME/{}.log\\" > \$HOME/{}.sh && sbatch {} \$HOME/{}.sh"\''.format(registry, task.container.image, task.uuid, task.uuid, sbatch_args, task.uuid) # Double to escape for Python, six for shell (double times three as \\\ escapes a single slash in shell) run_command+='docker://{}/{}:{} &> \$HOME/{}.log\\" > \$HOME/{}.sh && sbatch {} \$HOME/{}.sh"\''.format(task.container.registry, task.container.image, task.container.tag, task.uuid, task.uuid, sbatch_args, task.uuid) else: raise NotImplementedError('Container {} not supported'.format(task.container.type)) raise NotImplementedError('Default container runtime "{}" not supported'.format(task.computing.default_container_runtime)) out = os_shell(run_command, capture=True) if out.exit_code != 0: Loading @@ -419,8 +405,8 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag task_uuid = task.uuid task = Task.objects.get(uuid=task_uuid) # Save job id as task pid task.pid = job_id # Save job id as task id task.id = job_id # Set status (only fi we get here before the agent which sets the status as running via the API) if task.status != TaskStatuses.running: Loading @@ -443,7 +429,7 @@ class SlurmSSHClusterComputingManager(ClusterComputingManager, SSHComputingManag user = self.computing.conf.get('user') # Stop the task remotely stop_command = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "scancel {}"\''.format(user_keys.private_key_file, user, host, task.pid) stop_command = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} \'/bin/bash -c "scancel {}"\''.format(user_keys.private_key_file, user, host, task.id) out = os_shell(stop_command, capture=True) if out.exit_code != 0: raise Exception(out.stderr) Loading
services/webapp/code/rosetta/core_app/migrations/0009_auto_20211103_2256.py 0 → 100644 +169 −0 Original line number Diff line number Diff line # Generated by Django 2.2.1 on 2021-11-03 22:56 import django.contrib.postgres.fields.jsonb from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ ('core_app', '0008_auto_20211103_1232'), ] operations = [ migrations.AlterField( model_name='computing', name='access_mode', field=models.CharField(max_length=36, verbose_name='Access (control) mode'), ), migrations.AlterField( model_name='computing', name='auth_mode', field=models.CharField(max_length=36, verbose_name='Auth mode'), ), migrations.AlterField( model_name='computing', name='container_runtimes', field=models.CharField(max_length=256, verbose_name='Container runtimes'), ), migrations.AlterField( model_name='computing', name='description', field=models.TextField(blank=True, null=True, verbose_name='Description'), ), migrations.AlterField( model_name='computing', name='name', field=models.CharField(max_length=255, verbose_name='Name'), ), migrations.AlterField( model_name='computing', name='type', field=models.CharField(max_length=255, verbose_name='Type'), ), migrations.AlterField( model_name='computing', name='wms', field=models.CharField(blank=True, max_length=36, null=True, verbose_name='Workload management system'), ), migrations.AlterField( model_name='container', name='arch', field=models.CharField(default='x86_64', max_length=36, verbose_name='Architecture'), ), migrations.AlterField( model_name='container', name='description', field=models.TextField(blank=True, null=True, verbose_name='Description'), ), migrations.AlterField( model_name='container', name='image', field=models.CharField(max_length=255, verbose_name='Image'), ), migrations.AlterField( model_name='container', name='interface_port', field=models.IntegerField(blank=True, null=True, verbose_name='Interface port'), ), migrations.AlterField( model_name='container', name='interface_protocol', field=models.CharField(blank=True, max_length=36, null=True, verbose_name='Interface protocol'), ), migrations.AlterField( model_name='container', name='interface_transport', field=models.CharField(blank=True, max_length=36, null=True, verbose_name='Interface transport'), ), migrations.AlterField( model_name='container', name='name', field=models.CharField(max_length=255, verbose_name='Name'), ), migrations.AlterField( model_name='container', name='os', field=models.CharField(default='linux', max_length=36, verbose_name='Operating system'), ), migrations.AlterField( model_name='container', name='registry', field=models.CharField(max_length=255, verbose_name='Registry'), ), migrations.AlterField( model_name='container', name='supports_custom_interface_port', field=models.BooleanField(default=False, verbose_name='Supports custom interface port'), ), migrations.AlterField( model_name='container', name='supports_interface_auth', field=models.BooleanField(default=False, verbose_name='Supports interface auth'), ), migrations.AlterField( model_name='container', name='tag', field=models.CharField(default='latest', max_length=255, verbose_name='Tag'), ), migrations.AlterField( model_name='task', name='auth_token', field=models.CharField(blank=True, max_length=36, null=True, verbose_name='Auth token'), ), migrations.AlterField( model_name='task', name='computing_options', field=django.contrib.postgres.fields.jsonb.JSONField(blank=True, null=True, verbose_name='Computing options'), ), migrations.AlterField( model_name='task', name='extra_binds', field=models.CharField(blank=True, max_length=4096, null=True, verbose_name='Extra binds'), ), migrations.AlterField( model_name='task', name='id', field=models.CharField(blank=True, max_length=64, null=True, verbose_name='ID'), ), migrations.AlterField( model_name='task', name='interface_ip', field=models.CharField(blank=True, max_length=36, null=True, verbose_name='Interface IP address'), ), migrations.AlterField( model_name='task', name='interface_port', field=models.IntegerField(blank=True, null=True, verbose_name='Interface port'), ), migrations.AlterField( model_name='task', name='name', field=models.CharField(max_length=36, verbose_name='Name'), ), migrations.AlterField( model_name='task', name='requires_proxy', field=models.BooleanField(verbose_name='Requires proxy'), ), migrations.AlterField( model_name='task', name='requires_proxy_auth', field=models.BooleanField(verbose_name='Requires proxy auth'), ), migrations.AlterField( model_name='task', name='requires_tcp_tunnel', field=models.BooleanField(verbose_name='Requires a TCP tunnel'), ), migrations.AlterField( model_name='task', name='status', field=models.CharField(blank=True, max_length=36, null=True, verbose_name='Status'), ), migrations.AlterField( model_name='task', name='tcp_tunnel_port', field=models.IntegerField(blank=True, null=True, verbose_name='TCP tunnel port'), ), ]
services/webapp/code/rosetta/core_app/models.py +41 −34 Original line number Diff line number Diff line Loading @@ -80,29 +80,29 @@ class Container(models.Model): # If a container has no user, it will be available to anyone. Can be created, edited and deleted only by admins. # Generic attributes name = models.CharField('Container Name', max_length=255, blank=False, null=False) description = models.TextField('Container description', blank=True, null=True) name = models.CharField('Name', max_length=255, blank=False, null=False) description = models.TextField('Description', blank=True, null=True) # Registry-related attributes registry = models.CharField('Container registry', max_length=255, blank=False, null=False) image = models.CharField('Container image', max_length=255, blank=False, null=False) tag = models.CharField('Container image', max_length=255, blank=False, null=False, default='latest') registry = models.CharField('Registry', max_length=255, blank=False, null=False) image = models.CharField('Image', max_length=255, blank=False, null=False) tag = models.CharField('Tag', max_length=255, blank=False, null=False, default='latest') # Platform-related arch = models.CharField('Container architecture', max_length=36, blank=False, null=False, default='x86_64') os = models.CharField('Container operating system', max_length=36, blank=False, null=False, default='linux') arch = models.CharField('Architecture', max_length=36, blank=False, null=False, default='x86_64') os = models.CharField('Operating system', max_length=36, blank=False, null=False, default='linux') # TODO: do we want more control with respect to kernel, CPUs, instruction sets? # requires = i.e. kernel > 3, intel, AVX2 # Port, protocol and transport for the container interface interface_port = models.IntegerField('Container interface port', blank=True, null=True) interface_protocol = models.CharField('Container interface protocol', max_length=36, blank=True, null=True) interface_transport = models.CharField('Container interface protocol', max_length=36, blank=True, null=True) interface_port = models.IntegerField('Interface port', blank=True, null=True) interface_protocol = models.CharField('Interface protocol', max_length=36, blank=True, null=True) interface_transport = models.CharField('Interface transport', max_length=36, blank=True, null=True) # Capabilities supports_custom_interface_port = models.BooleanField('Does the container support setting a custom interface port?', default=False) # BASE_PORT supports_interface_auth = models.BooleanField('Does the container interface support authentication?', default=False) # AUTH_USER / AUTH_PASS supports_custom_interface_port = models.BooleanField('Supports custom interface port', default=False) # BASE_PORT supports_interface_auth = models.BooleanField('Supports interface auth', default=False) # AUTH_USER / AUTH_PASS class Meta: ordering = ['name'] Loading Loading @@ -131,23 +131,23 @@ class Computing(models.Model): user = models.ForeignKey(User, related_name='+', on_delete=models.CASCADE, blank=True, null=True) # If a compute resource has no user, it will be available to anyone. Can be created, edited and deleted only by admins. name = models.CharField('Computing Name', max_length=255, blank=False, null=False) description = models.TextField('Container description', blank=True, null=True) name = models.CharField('Name', max_length=255, blank=False, null=False) description = models.TextField('Description', blank=True, null=True) # Standalone / sluster type = models.CharField('Computing Type', max_length=255, blank=False, null=False) type = models.CharField('Type', max_length=255, blank=False, null=False) requires_sys_conf = models.BooleanField(default=False) requires_user_conf = models.BooleanField(default=False) requires_user_keys = models.BooleanField(default=False) # Interfce and interaction definition access_mode = models.CharField('Computing resource access (control) mode', max_length=36, blank=False, null=False) auth_mode = models.CharField('Computing resource authentication mode', max_length=36, blank=False, null=False) wms = models.CharField('Computing resource WMS', max_length=36, blank=True, null=True) access_mode = models.CharField('Access (control) mode', max_length=36, blank=False, null=False) auth_mode = models.CharField('Auth mode', max_length=36, blank=False, null=False) wms = models.CharField('Workload management system', max_length=36, blank=True, null=True) # Supported container runtimes container_runtimes = models.CharField('Computing resource container runtimes', max_length=256, blank=False, null=False) container_runtimes = models.CharField('Container runtimes', max_length=256, blank=False, null=False) class Meta: ordering = ['name'] Loading @@ -168,6 +168,10 @@ class Computing(models.Model): color_map_index = string_int_hash % len(color_map) return color_map[color_map_index] @property def default_container_runtime(self): return str(self.container_runtimes).split(',')[0] #======================= # Computing manager Loading @@ -181,11 +185,11 @@ class Computing(models.Model): try: return self._manager except AttributeError: if self.type == 'cluster' and self.access_mode == 'ssh+cli' and self.access_mode == 'user_keys' and self.wms == 'slurm': if self.type == 'cluster' and self.access_mode == 'ssh+cli' and self.auth_mode == 'user_keys' and self.wms == 'slurm': self._manager = computing_managers.SlurmSSHClusterComputingManager(self) elif self.type == 'standalone' and self.access_mode == 'ssh+cli' and self.access_mode == 'user_keys' and self.wms is None: elif self.type == 'standalone' and self.access_mode == 'ssh+cli' and self.auth_mode == 'user_keys' and self.wms is None: self._manager = computing_managers.SSHSingleNodeComputingManager(self) elif self.type == 'standalone' and self.access_mode == 'internal' and self.access_mode == 'internal' and self.wms is None: elif self.type == 'standalone' and self.access_mode == 'internal' and self.auth_mode == 'internal' and self.wms is None: self._manager = computing_managers.InternalSingleNodeComputingManager(self) else: raise ConsistencyException('Don\'t know how to instantiate a computing manager for computing resource of type "{}", access mode "{}" and WMS "{}"'.format(self.type, self.access_mode, self.wms)) Loading Loading @@ -286,34 +290,37 @@ class Task(models.Model): uuid = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) user = models.ForeignKey(User, related_name='+', on_delete=models.CASCADE) name = models.CharField('Task name', max_length=36, blank=False, null=False) name = models.CharField('Name', max_length=36, blank=False, null=False) # Task management id = models.CharField('Task ID', max_length=64, blank=True, null=True) # i.e. Slurm job id, singularity PID, docker hash status = models.CharField('Task status', max_length=36, blank=True, null=True) id = models.CharField('ID', max_length=64, blank=True, null=True) # i.e. Slurm job id, singularity PID, docker hash status = models.CharField('Status', max_length=36, blank=True, null=True) created = models.DateTimeField('Created on', default=timezone.now) # How to reach the task interface. The IP has to be intended either as the container IP if this is directly # reachable (i.e. using a Docker or Kubernetes network) or as the host IP address, depending on the # computing resource and its computing manager/WMS/container runtime. The port is to be intended # as the port where the task interface is exposed on its IP address. interface_ip = models.CharField('Task interface ip address', max_length=36, blank=True, null=True) interface_port = models.IntegerField('Task interface port', blank=True, null=True) interface_ip = models.CharField('Interface IP address', max_length=36, blank=True, null=True) interface_port = models.IntegerField('Interface port', blank=True, null=True) # Task access requires_tcp_tunnel = models.BooleanField('Does the task require a tunnel to be opened for accessing its interface?') tcp_tunnel_port = models.IntegerField('Task tunnel port', blank=True, null=True) requires_proxy = models.BooleanField('Does the task require a proxy for accessing its interface?') requires_proxy_auth = models.BooleanField('Does the task require interface authentication to be enforced at proxy-level?') auth_token = models.CharField('A one-time token for proxy or interface authentication', max_length=36, blank=True, null=True) requires_tcp_tunnel = models.BooleanField('Requires a TCP tunnel') tcp_tunnel_port = models.IntegerField('TCP tunnel port', blank=True, null=True) requires_proxy = models.BooleanField('Requires proxy') requires_proxy_auth = models.BooleanField('Requires proxy auth') auth_token = models.CharField('Auth token', max_length=36, blank=True, null=True) # A one-time token for proxy or interface authentication # Links computing = models.ForeignKey(Computing, related_name='+', on_delete=models.CASCADE) container = models.ForeignKey('Container', on_delete=models.CASCADE, related_name='+') # Extra extra_binds = models.CharField('Task container extra binds', max_length=4096, blank=True, null=True) computing_options = JSONField('Task computing options', blank=True, null=True) # i.e. CPUs, RAM, cluster partition etc. TODO: why here? extra_binds = models.CharField('Extra binds', max_length=4096, blank=True, null=True) computing_options = JSONField('Computing options', blank=True, null=True) # i.e. CPUs, RAM, cluster partition etc. TODO: why here? # TODO: add the option for selecting the runtime as advanced option when creating the task #container_runtime class Meta: ordering = ['-created'] Loading
services/webapp/code/rosetta/core_app/templates/add_container.html +1 −1 Original line number Diff line number Diff line Loading @@ -78,7 +78,7 @@ </table> <a href="javascript:void(0);" id="show_button" onclick="toggle_visibility('advanced_div')">Show advanced...</a> <a href="javascript:void(0);" id="show_button" onclick="toggle_visibility('advanced_div')">Advanced...</a> <div id="advanced_div" style="display:none; width:360px;"> Loading