Loading autocnet/graph/asynchronous_funcs.py +15 −4 Original line number Diff line number Diff line import json import time from sqlalchemy import insert, update from sqlalchemy import insert from sqlalchemy.sql.expression import bindparam from autocnet.io.db.model import Points, Measures from autocnet.utils.serializers import object_hook from autocnet.transformation.spatial import reproject, og2oc def watch_insert_queue(queue, queue_name, counter_name, engine, stop_event, sleep_time=5): """ Loading Loading @@ -57,7 +58,8 @@ def watch_insert_queue(queue, queue_name, counter_name, engine, stop_event, slee measures = [] # Pull the SRID dynamically from the model (database) srid = Points.rectangular_srid rect_srid = Points.rectangular_srid lat_srid = Points.latitudinal_srid for i in range(0, read_length): msg = json.loads(queue.lpop(queue_name), object_hook=object_hook) Loading @@ -68,7 +70,16 @@ def watch_insert_queue(queue, queue_name, counter_name, engine, stop_event, slee # Since this avoids the ORM, need to map the table names manually msg['pointType'] = msg['pointtype'] msg['adjusted'] = f'SRID={srid};' + msg["adjusted"].wkt # Geometries go in as EWKT adjusted = msg['adjusted'] msg['adjusted'] = f'SRID={rect_srid};' + adjusted.wkt # Geometries go in as EWKT msg['apriori'] = f'SRID={rect_srid};' + adjusted.wkt lon_og, lat_og, _ = reproject([adjusted.x, adjusted.y, adjusted.z], Points.semimajor_rad, Points.semiminor_rad, 'geocent', 'latlon') lon, lat = og2oc(lon_og, lat_og, Points.semimajor_rad, Points.semiminor_rad) msg['geom'] = f'SRID={lat_srid};Point({lon} {lat})' # Measures are removed and manually added later point_measures = msg.pop('measures', []) Loading autocnet/graph/cluster_submit.py +57 −43 Original line number Diff line number Diff line Loading @@ -20,11 +20,13 @@ from autocnet.utils.utils import import_func from autocnet.utils.serializers import JsonEncoder, object_hook from autocnet.io.db.model import JobsHistory def parse_args(): # pragma: no cover parser = argparse.ArgumentParser() parser.add_argument('-r', '--host', help='The host URL for the redis queue to to pull messages from.') parser.add_argument('-p', '--port', help='The port for used by redis.') parser.add_argument('-q', '--queue', default=False, action='store_true', help='If passed, run in queue mode, where this job runs until either \ walltime is hit or the queue that is being processed is empty.') parser.add_argument('processing_queue', help='The name of the processing queue to draw messages from.') parser.add_argument('working_queue', help='The name of the queue to push messages to while they process.') Loading Loading @@ -55,7 +57,6 @@ def _instantiate_row(msg, ncg): """ # Get the dict mapping iterable keyword types to the objects objdict = ncg.apply_iterable_options rowid = msg['id'] obj = objdict[msg['along']] with ncg.session_scope() as session: res = session.query(obj).filter(getattr(obj, 'id')==msg['id']).one() Loading Loading @@ -167,14 +168,21 @@ def manage_messages(args, queue): A py-Redis queue object """ processing = True while processing: # Pop the message from the left queue and push to the right queue; atomic operation msg = transfer_message_to_work_queue(queue, args['processing_queue'], args['working_queue']) if msg is None: if args['queue'] == False: warnings.warn('Expected to process a cluster job, but the message queue is empty.') return elif args['queue'] == True: print(f'Completed processing from queue: {queue}.') return # The key to remove from the working queue is the message. Essentially, find this element # in the list where the element is the JSON representation of the message. Maybe swap to a hash? Loading @@ -196,22 +204,28 @@ def manage_messages(args, queue): # print to get everything on the logs in the directory print(out) serializedDict = json.loads(msg) results = msgdict['results'] if msgdict['results'] else [{"status" : "success"}] success = True if "success" in results[0]["status"].split(" ")[0].lower() else False sys.stdout.flush() stdout.flush() #serializedDict = json.loads(msg) #results = msgdict['results'] if msgdict['results'] else [{"status" : "success"}] #success = True if "success" in results[0]["status"].split(" ")[0].lower() else False jh = JobsHistory(jobId=int(os.environ["SLURM_JOB_ID"]), functionName=msgdict["func"], args={"args" : serializedDict["args"], "kwargs": serializedDict["kwargs"]}, results=msgdict["results"], logs=out, success=success) #jh = JobsHistory(jobId=int(os.environ["SLURM_JOB_ID"]), functionName=msgdict["func"], args={"args" : serializedDict["args"], "kwargs": serializedDict["kwargs"]}, results=msgdict["results"], logs=out, success=success) with response['kwargs']['Session']() as session: session.add(jh) session.commit() #with response['kwargs']['Session']() as session: #session.add(jh) #session.commit() finalize_message_from_work_queue(queue, args['working_queue'], remove_key) # Process only a single job, else draw the next message off the queue if available. if args['queue'] == False: processing = False def main(): # pragma: no cover args = vars(parse_args()) # Get the message queue = StrictRedis(host=args['host'], port=args['port'], db=0) manage_messages(args, queue) autocnet/graph/network.py +21 −5 Original line number Diff line number Diff line Loading @@ -1658,8 +1658,10 @@ class NetworkCandidateGraph(CandidateGraph): on='edge', args=(), walltime='01:00:00', jobname='AutoCNet', chunksize=1000, arraychunk=25, ntasks=1, filters={}, query_string='', reapply=False, Loading Loading @@ -1706,6 +1708,14 @@ class NetworkCandidateGraph(CandidateGraph): The number of concurrent jobs to run per job array. e.g. chunksize=100 and arraychunk=25 gives the job array 1-100%25 ntasks : int The number of tasks, distributed across the cluster on some set of nodes to be run. When running apply with ntasks, set ntasks to some integer greater then 1. arraychunk and chunksize arguments will then be ignored. In this mode, a number of non-communicating CPUs equal to ntasks are allocated and these CPUs run jobs. Changing from arrays to ntasks also likely requires increasing the walltime of the job significantly since less jobs will need to run for a longer duration. filters : dict Of simple filters to apply on database rows where the key is the attribute and the value used to check equivalency (e.g., attribute == value). Loading Loading @@ -1810,17 +1820,23 @@ class NetworkCandidateGraph(CandidateGraph): isissetup = f'export ISISROOT={isisroot} && export ISISDATA={isisdata}' condasetup = f'conda activate {condaenv}' job = f'acn_submit -r={rhost} -p={rport} {processing_queue} {self.working_queue}' command = f'{condasetup} && {isissetup} && {job}' if ntasks > 1: job += ' --queue' # Use queue mode where jobs run until the queue is empty command = f'{condasetup} && {isissetup} && srun {job}' if queue == None: queue = self.config['cluster']['queue'] submitter = Slurm(command, job_name='AutoCNet', job_name=jobname, mem_per_cpu=self.config['cluster']['processing_memory'], time=walltime, partition=queue, ntasks=ntasks, output=log_dir+f'/autocnet.{function}-%j') if ntasks > 1: job_str = submitter.submit(exclude=exclude) else: job_str = submitter.submit(array='1-{}%{}'.format(job_counter,arraychunk), chunksize=chunksize, exclude=exclude) Loading autocnet/graph/tests/test_cluster_submit.py +2 −1 Original line number Diff line number Diff line Loading @@ -16,7 +16,8 @@ from autocnet.io.db.model import Points, JobsHistory @pytest.fixture def args(): arg_dict = {'working_queue':'working', 'processing_queue':'processing'} 'processing_queue':'processing', 'queue':False} return arg_dict @pytest.fixture Loading Loading
autocnet/graph/asynchronous_funcs.py +15 −4 Original line number Diff line number Diff line import json import time from sqlalchemy import insert, update from sqlalchemy import insert from sqlalchemy.sql.expression import bindparam from autocnet.io.db.model import Points, Measures from autocnet.utils.serializers import object_hook from autocnet.transformation.spatial import reproject, og2oc def watch_insert_queue(queue, queue_name, counter_name, engine, stop_event, sleep_time=5): """ Loading Loading @@ -57,7 +58,8 @@ def watch_insert_queue(queue, queue_name, counter_name, engine, stop_event, slee measures = [] # Pull the SRID dynamically from the model (database) srid = Points.rectangular_srid rect_srid = Points.rectangular_srid lat_srid = Points.latitudinal_srid for i in range(0, read_length): msg = json.loads(queue.lpop(queue_name), object_hook=object_hook) Loading @@ -68,7 +70,16 @@ def watch_insert_queue(queue, queue_name, counter_name, engine, stop_event, slee # Since this avoids the ORM, need to map the table names manually msg['pointType'] = msg['pointtype'] msg['adjusted'] = f'SRID={srid};' + msg["adjusted"].wkt # Geometries go in as EWKT adjusted = msg['adjusted'] msg['adjusted'] = f'SRID={rect_srid};' + adjusted.wkt # Geometries go in as EWKT msg['apriori'] = f'SRID={rect_srid};' + adjusted.wkt lon_og, lat_og, _ = reproject([adjusted.x, adjusted.y, adjusted.z], Points.semimajor_rad, Points.semiminor_rad, 'geocent', 'latlon') lon, lat = og2oc(lon_og, lat_og, Points.semimajor_rad, Points.semiminor_rad) msg['geom'] = f'SRID={lat_srid};Point({lon} {lat})' # Measures are removed and manually added later point_measures = msg.pop('measures', []) Loading
autocnet/graph/cluster_submit.py +57 −43 Original line number Diff line number Diff line Loading @@ -20,11 +20,13 @@ from autocnet.utils.utils import import_func from autocnet.utils.serializers import JsonEncoder, object_hook from autocnet.io.db.model import JobsHistory def parse_args(): # pragma: no cover parser = argparse.ArgumentParser() parser.add_argument('-r', '--host', help='The host URL for the redis queue to to pull messages from.') parser.add_argument('-p', '--port', help='The port for used by redis.') parser.add_argument('-q', '--queue', default=False, action='store_true', help='If passed, run in queue mode, where this job runs until either \ walltime is hit or the queue that is being processed is empty.') parser.add_argument('processing_queue', help='The name of the processing queue to draw messages from.') parser.add_argument('working_queue', help='The name of the queue to push messages to while they process.') Loading Loading @@ -55,7 +57,6 @@ def _instantiate_row(msg, ncg): """ # Get the dict mapping iterable keyword types to the objects objdict = ncg.apply_iterable_options rowid = msg['id'] obj = objdict[msg['along']] with ncg.session_scope() as session: res = session.query(obj).filter(getattr(obj, 'id')==msg['id']).one() Loading Loading @@ -167,14 +168,21 @@ def manage_messages(args, queue): A py-Redis queue object """ processing = True while processing: # Pop the message from the left queue and push to the right queue; atomic operation msg = transfer_message_to_work_queue(queue, args['processing_queue'], args['working_queue']) if msg is None: if args['queue'] == False: warnings.warn('Expected to process a cluster job, but the message queue is empty.') return elif args['queue'] == True: print(f'Completed processing from queue: {queue}.') return # The key to remove from the working queue is the message. Essentially, find this element # in the list where the element is the JSON representation of the message. Maybe swap to a hash? Loading @@ -196,22 +204,28 @@ def manage_messages(args, queue): # print to get everything on the logs in the directory print(out) serializedDict = json.loads(msg) results = msgdict['results'] if msgdict['results'] else [{"status" : "success"}] success = True if "success" in results[0]["status"].split(" ")[0].lower() else False sys.stdout.flush() stdout.flush() #serializedDict = json.loads(msg) #results = msgdict['results'] if msgdict['results'] else [{"status" : "success"}] #success = True if "success" in results[0]["status"].split(" ")[0].lower() else False jh = JobsHistory(jobId=int(os.environ["SLURM_JOB_ID"]), functionName=msgdict["func"], args={"args" : serializedDict["args"], "kwargs": serializedDict["kwargs"]}, results=msgdict["results"], logs=out, success=success) #jh = JobsHistory(jobId=int(os.environ["SLURM_JOB_ID"]), functionName=msgdict["func"], args={"args" : serializedDict["args"], "kwargs": serializedDict["kwargs"]}, results=msgdict["results"], logs=out, success=success) with response['kwargs']['Session']() as session: session.add(jh) session.commit() #with response['kwargs']['Session']() as session: #session.add(jh) #session.commit() finalize_message_from_work_queue(queue, args['working_queue'], remove_key) # Process only a single job, else draw the next message off the queue if available. if args['queue'] == False: processing = False def main(): # pragma: no cover args = vars(parse_args()) # Get the message queue = StrictRedis(host=args['host'], port=args['port'], db=0) manage_messages(args, queue)
autocnet/graph/network.py +21 −5 Original line number Diff line number Diff line Loading @@ -1658,8 +1658,10 @@ class NetworkCandidateGraph(CandidateGraph): on='edge', args=(), walltime='01:00:00', jobname='AutoCNet', chunksize=1000, arraychunk=25, ntasks=1, filters={}, query_string='', reapply=False, Loading Loading @@ -1706,6 +1708,14 @@ class NetworkCandidateGraph(CandidateGraph): The number of concurrent jobs to run per job array. e.g. chunksize=100 and arraychunk=25 gives the job array 1-100%25 ntasks : int The number of tasks, distributed across the cluster on some set of nodes to be run. When running apply with ntasks, set ntasks to some integer greater then 1. arraychunk and chunksize arguments will then be ignored. In this mode, a number of non-communicating CPUs equal to ntasks are allocated and these CPUs run jobs. Changing from arrays to ntasks also likely requires increasing the walltime of the job significantly since less jobs will need to run for a longer duration. filters : dict Of simple filters to apply on database rows where the key is the attribute and the value used to check equivalency (e.g., attribute == value). Loading Loading @@ -1810,17 +1820,23 @@ class NetworkCandidateGraph(CandidateGraph): isissetup = f'export ISISROOT={isisroot} && export ISISDATA={isisdata}' condasetup = f'conda activate {condaenv}' job = f'acn_submit -r={rhost} -p={rport} {processing_queue} {self.working_queue}' command = f'{condasetup} && {isissetup} && {job}' if ntasks > 1: job += ' --queue' # Use queue mode where jobs run until the queue is empty command = f'{condasetup} && {isissetup} && srun {job}' if queue == None: queue = self.config['cluster']['queue'] submitter = Slurm(command, job_name='AutoCNet', job_name=jobname, mem_per_cpu=self.config['cluster']['processing_memory'], time=walltime, partition=queue, ntasks=ntasks, output=log_dir+f'/autocnet.{function}-%j') if ntasks > 1: job_str = submitter.submit(exclude=exclude) else: job_str = submitter.submit(array='1-{}%{}'.format(job_counter,arraychunk), chunksize=chunksize, exclude=exclude) Loading
autocnet/graph/tests/test_cluster_submit.py +2 −1 Original line number Diff line number Diff line Loading @@ -16,7 +16,8 @@ from autocnet.io.db.model import Points, JobsHistory @pytest.fixture def args(): arg_dict = {'working_queue':'working', 'processing_queue':'processing'} 'processing_queue':'processing', 'queue':False} return arg_dict @pytest.fixture Loading