Commit 8950821d authored by Elisabetta Giani's avatar Elisabetta Giani
Browse files

fix-configure-after-scan:

Some refactoring of the Configure method to resolve the
bug of configuration failure after and EndScan command:
- if subarray is READY when Configure is issued, put a delay to wait
  for CONFIGURING transition (temporary hack)
- added check on scan configuration string: if is equal to the
  last valid configuration programmed, the configuration step is
  skipped (the subarray is already programmed)
- if failure/timeout during configuration, check if the failed
  device is the CBF subarray.
- decorators for Configure method are applied in the mid/low instance
  code.
Updated the tests.
parent d30f3b9b
Loading
Loading
Loading
Loading
+153 −70
Original line number Diff line number Diff line
@@ -186,14 +186,12 @@ class CspSubarray(SKASubarray):
        dev_name = evt.device.dev_name()
        if not evt.err:
            try:
                if "commandprogress" == evt.attr_value.name.lower()[-15:]:
                if "cmdprogress" == evt.attr_value.name.lower()[-11:]:
                    if dev_name in self._sc_subarray_fqdn:
                        cmd_name = evt.attr_value.name[:-15]
                        self._sc_subarray_cmd_progress[dev_name][cmd_name] = evt.attr_value.value
                elif "cmdtimeoutexpired" == evt.attr_value.name.lower()[-17:]:
                elif "timeoutexpiredflag" == evt.attr_value.name.lower():
                    if dev_name in self._sc_subarray_fqdn:
                        cmd_name = evt.attr_value.name[:-17]
                        #self._sc_subarray_timeout_expired[dev_name][cmd_name] = True
                        self._timeout_expired = True
                else:
                    log_msg = ("Unexpected change event for"
@@ -241,12 +239,8 @@ class CspSubarray(SKASubarray):
            # Can happen evt empty??
            if evt:
                if not evt.err:
                    if self._sc_subarray_cmd_exec_state[evt.device.dev_name()][evt.cmd_name.lower()] == CmdExecState.RUNNING:
                        msg = "Device {} is processing command {}".format(evt.device,
                                                                      evt.cmd_name)
                    if self._sc_subarray_cmd_exec_state[evt.device.dev_name()][evt.cmd_name.lower()] == CmdExecState.IDLE:
                        msg = "Device {} ended command {} execution".format(evt.device, evt.cmd_name)
                    self.logger.info(msg)
                    self.logger.info("Device {} is processing the command {}".format(evt.device,
                                                                                     evt.cmd_name))
                else:
                    msg = "Error!!Command {} ended on device {}.\n".format(evt.cmd_name,
                                                                           evt.device.dev_name())
@@ -510,7 +504,7 @@ class CspSubarray(SKASubarray):
            self._failure_message[cmd_name] = ''
            self._sc_subarray_cmd_exec_state[device][cmd_name] = CmdExecState.RUNNING
            self.logger.info("Device {} State {} expected value {}".format(device,
                                                                           self._sc_subarray_state[device], 
                                                                           self._sc_subarray_obs_state[device], 
                                                                           dev_successful_state))
        # flag to signal when configuration ends on a sub-array sub-component
        device_done = defaultdict(lambda:False)
@@ -574,16 +568,23 @@ class CspSubarray(SKASubarray):
            self._cmd_progress[cmd_name] = 100
            elapsed_time = time.time() - starting_time 
            self.logger.info("Scan elapsed time:{}".format(elapsed_time))
        self._cmd_execution_state['scan'] = CmdExecState.IDLE
        self._cmd_execution_state[cmd_name] = CmdExecState.IDLE
        self._last_executed_command = cmd_name
        self.logger.info("Subarray obsState is: {} command {} state is {}".format(self._obs_state, cmd_name, self._cmd_execution_state[cmd_name]))
        
    def __configure_scan(self, device_list, **args_dict):
    def __configure_scan(self, input_arg, **args_dict):
        """
        Thread target function invoked from Configure method.
        It monitors the obsState value of each sub-array sub-component
        configuring the scan, looking for timeout or failure conditions.
        
        :param device_list: the FQDNs of the sub-array sub-components
        :param: input_arg: a list with two elements: 
            - the FQDNs of the sub-array sub-components
            - the configuration string 
        
        If the CSP.LMC Subarray device is properly configured, the received
        string passed as input argument of the command is stored the 
        class atribute _valid_scan_configuration.
        
        :return: None
        """
@@ -593,19 +594,18 @@ class CspSubarray(SKASubarray):
        # In the dictionary keys, is generally used the command name in lower letters
        self.logger.info("cmd_name: {} dev_state: {}".format(cmd_name,
                          dev_successful_state))
        command_start_time = time.time()
        self._num_dev_completed_task[cmd_name] = 0
        self._list_dev_completed_task[cmd_name] = []
        self._cmd_progress[cmd_name] = 0
        self._cmd_duration_measured[cmd_name] = 0
        self._abort_command_event.clear()
        # sub-component command execution measured time
        sc_cmd_duration_measured = defaultdict(lambda:defaultdict(lambda:0)) 
        command_progress = self._cmd_progress[cmd_name]
        # flag to signal when configuration ends on a sub-array sub-component
        device_done = defaultdict(lambda:False)
        # inside the end-less lop check the obsState of each sub-component
        device_list = input_arg[0]
        self.logger.info("device_list:{}".format(device_list))
        while True:
            command_progress = 0
            for device in device_list:
                self.logger.info("Command {} obs_state: {}".format(cmd_name,
                                                                 self._sc_subarray_obs_state[device]))
@@ -614,6 +614,22 @@ class CspSubarray(SKASubarray):
                # if the sub-component execution flag is no more RUNNING, the command has
                # ended with or without success. Go to check next device state.
                if self._sc_subarray_obs_state[device] == dev_successful_state:
                    # !!! bad hack:
                    # if the sub-element subarray state is already READY, sleep for a while to wait
                    # sub-element moves  to CONFIGURING.
                    # TODO:
                    # - wait for all sub-elements subarrays move to CONFIGURING, handling possible
                    #   error/timeout conditions
                    # - when in CONFIGURING, wait for sub-elements subarrays to move to READY
                    #   handling error conditions

                    # check the time elapsed from the command start, if less then 0.1 sleep for a while
                    elapsed_time = time.time() - self._sc_subarray_cmd_starting_time[device]
                    if elapsed_time < 0.1:
                        self.logger.info("Device {} already in READY state after {} sec".format(device,
                                         elapsed_time))
                        time.sleep(0.3)
                        continue
                    self.logger.info("Command {} ended with success on device {}.".format(cmd_name,
                                                                                              device))
                    # update the list and number of device that completed the task
@@ -623,22 +639,20 @@ class CspSubarray(SKASubarray):
                    # the command
                    self._sc_subarray_cmd_exec_state[device][cmd_name] = CmdExecState.IDLE
                    self._sc_subarray_cmd_progress[device][cmd_name] = 100
                    # calculate the real execution time for the command
                    if sc_cmd_duration_measured[device][cmd_name] >= self._cmd_duration_measured[cmd_name]:
                        self._cmd_duration_measured[cmd_name] = sc_cmd_duration_measured[device][cmd_name]
                    # command success: step to next device
                    device_done[device] = True

                # check for timeout event. A timeout event can be detected in two ways:
                # 1- the sub-element implements the 'onTimeoutExpired' attribute configured
                # 1- the sub-element implements the 'timeoutExpiredFlag' attribute configured
                #    for change event
                # 2- the CspMaster periodically checks the time elapsed from the start
                # 2- the CspSubarray periodically checks the time elapsed from the start
                #    of the command: if the value is greater than the sub-element expected time
                #    for command execution, the sub-element command execution state is set
                #    to TIMEOUT
                # Note: the second check, can be useful if the timeout event is not received
                # (for example for a temporary connection timeout)
                elapsed_time = time.time() - self._sc_subarray_cmd_starting_time[device]
                sc_cmd_duration_measured[device][cmd_name] = elapsed_time - time.time()

                if (elapsed_time > self._sc_subarray_cmd_duration_expected[device][cmd_name] or
                    self._sc_subarray_cmd_exec_state[device][cmd_name] == CmdExecState.TIMEOUT):
                    msg = ("Timeout executing {} command  on device {}".format(cmd_name, device))
@@ -653,10 +667,10 @@ class CspSubarray(SKASubarray):
                    device_done[device] = True
                # update the progress counter inside the loop taking into account the number of devices
                # executing the command
                self._cmd_progress[cmd_name] = command_progress+ self._sc_subarray_cmd_progress[device][cmd_name]/len(device_list)
                self.logger.info("Command {} on device {} exec_state {}:".format(cmd_name,device,
                command_progress += self._sc_subarray_cmd_progress[device][cmd_name]/len(device_list)
                self.logger.debug("Command {} on device {} exec_state {}:".format(cmd_name,device,
                                                                   self._sc_subarray_cmd_exec_state[device][cmd_name]))
            self.logger.info("self._sc_subarray_obs_state {}".format(self._sc_subarray_obs_state))
            self._cmd_progress[cmd_name] = command_progress
            if all(self._sc_subarray_obs_state[device] == ObsState.CONFIGURING for device in device_list):
                self._obs_state = ObsState.CONFIGURING
                self.logger.info("Current CspSubarray ObsState:{}".format(self._obs_state))
@@ -667,6 +681,14 @@ class CspSubarray(SKASubarray):
                    # end of the command: the command has been issued on all the sub-element devices
                    # reset the execution flag for the CSP
                break
            # check for global timeout expiration
            # may be this check is not necessary 
            if self._cmd_duration_expected[cmd_name] < (time.time() - command_start_time):
                self._timeout_expired = True
                self.logger.warn("Device went in timeout during configuration")
                break
            # TODO:
            # check for abort request.
            time.sleep(0.2)
        # end of the while loop
        # check for timeout/failure conditions on each sub-component
@@ -677,10 +699,22 @@ class CspSubarray(SKASubarray):
                self._failure_raised = True
            # reset sub-component execution flag
            self._sc_subarray_cmd_exec_state[device][cmd_name] = CmdExecState.IDLE
            # update the progress counter at the end of the loop 
            self._cmd_progress[cmd_name] = command_progress + self._sc_subarray_cmd_progress[device][cmd_name]/len(device_list)

        if self._timeout_expired or self._failure_raised:
            # if failure/timeout found check if the CBF subarray is configured. In
            # this case the CSP.LMC Subarray obsState is set to READY.
            if self._sc_subarray_obs_state[self.CbfSubarray] == ObsState.READY:
                self._obs_state = ObsState.READY
            #TODO:
            # else:...
            # self.set_state(tango.DevState.FAULT)
            # 

        if all(self._sc_subarray_obs_state[fqdn] == dev_successful_state for fqdn in device_list):
            self._obs_state = dev_successful_state
            self._valid_scan_configuration = input_arg[1]
            self._cmd_duration_measured[cmd_name] = time.time() - command_start_time
            self._cmd_progress[cmd_name] = 100
        self._last_executed_command = cmd_name
        # reset the CSP Subarray command execution flag
        self._cmd_execution_state[cmd_name] = CmdExecState.IDLE
@@ -1241,9 +1275,11 @@ class CspSubarray(SKASubarray):
        self._admin_mode = AdminMode.ONLINE
        self._obs_mode = ObsMode.IDLE
        self._obs_state = ObsState.IDLE
        # TODO: change the configureDelayExpected attribute in the
        # SKAObsDevice base class to RW
        self._config_delay_expected = 5
        # _config_delay_expected: inherited from the SKAObsDevice base class
        # Note: the _config_delay_expected could be set equal the max among the sub-elements
        # sub-arrays expected times for the command
        # For tests purpose we set it  = 10
        self._config_delay_expected = 10
        self._scan_id = 0
        # connect to TANGO DB
        
@@ -1883,6 +1919,7 @@ class CspSubarray(SKASubarray):
        :return: None
        """
        # invoke the constructor for the command thread
        self.logger.info("Received Scan command!!")
        for device in self._sc_subarray_assigned_fqdn:
            try:
                proxy = self._sc_subarray_proxies[device]
@@ -1898,18 +1935,16 @@ class CspSubarray(SKASubarray):
                self.logger.info(tango_err.args[0].desc)
            try:
                proxy.command_inout_asynch("Scan", argin[0], self._cmd_ended_cb)
                # invoke the constructor for the command thread
            except tango.DevFailed as tango_err:
                self.logger.info(tango_err.args[0].desc)

                # TODO: add check on the failed device. If CBF
                # throw an exception.
            # invoke the constructor for the command thread
            self._command_thread['scan'] = threading.Thread(target=self.__monitor_scan_execution,
                                                           name="Thread-Scan",
                                                           args=(self._sc_subarray_assigned_fqdn,))
            self._command_thread['scan'].start()
            #self._obs_state = ObsState.SCANNING
            self._cmd_execution_state['scan'] = CmdExecState.RUNNING
            # sleep for a while to let the thread start
            time.sleep(0.2)
            self._command_thread['scan'].start()
        # PROTECTED REGION END #    //  CspSubarray.Scan    
        
    def _validate_scan_configuration(self, argin):
@@ -1931,13 +1966,13 @@ class CspSubarray(SKASubarray):
        doc_in="A Json-encoded string with the scan configuration.",
    )
    @DebugIt()
    @ObsStateCheck('configscan')
    @SubarrayRejectCmd('AddSearchBeams',
                       'RemoveSearchBeams',
                       'AddTimingBeams',
                       'RemoveTimingBeams'
                       'AddNumOfSearchBeams',
                       'RemoveNumOfSearchBeams')
    #@ObsStateCheck('configscan')
    #@SubarrayRejectCmd('AddSearchBeams',
    #                   'RemoveSearchBeams',
    #                   'AddTimingBeams',
    #                   'RemoveTimingBeams'
    #                   'AddNumOfSearchBeams',
    #                   'RemoveNumOfSearchBeams')
    def Configure(self, argin):
        """
        *TANGO Class method*
@@ -1961,27 +1996,55 @@ class CspSubarray(SKASubarray):
       
        # the dictionary with the scan configuration
        try:
            # if the stored configuration attribute is not empty, check
            # for the received configuration content
            if self._valid_scan_configuration:
                try:
                    stored_configuration = json.dumps(self._valid_scan_configuration, sort_keys=True)
                    received_configuration = json.dumps(argin, sort_keys=True)
                    # if the received configuration is equal to the last valid configuration and the
                    # state of the subarray is READY than subarray is not re-configured.
                    if  (stored_configuration == received_configuration) and (self._obs_state == ObsState.READY):
                        self.logger.info("Subarray is going to use the same configuration")
                        return
                except Exception as e:
                    self.logger.warn(str(e))
            # go ahead and parse the received configuration
            self._validate_scan_configuration(argin)
        except tango.DevFailed as tango_err:
            msg = tango_err.args[0].desc
            # validation failure
            msg = "Failure during validation of configuration:{}".format(tango_err.args[0].desc)
            self.logger.error(msg)
            tango.Except.throw_exception("Command failed",
                                         msg,
                                         "Configure execution",
                                         "Configure",
                                         tango.ErrSeverity.ERR)
        # Forward the Configure command to CbfSubarray.
        # Forward the Configure command to the sub-elements
        # components (subarrays, pst beams)
        for device in self._sc_subarray_assigned_fqdn:
            error_device = 0
            # reset the command progress counter for each
            # sub-array component
            self._sc_subarray_cmd_progress[device]['configurescan'] = 0
            self._failure_message['configurescan'] = ''
            # NOTE: what happens if a sub-element subarray/PSt beam TANGO
            # device is not running? The next calls to proxy should fail
            # with a tango.DevFailed exception.
            if not self._is_sc_subarray_running(device):
                error_device += 1
                self._failure_message['configurescan'] += "Device {} not running".format(device)
                msg = "Device {} not running".format(device)
                if device == self.CbfSubarray:
                    # throw the exception, configuration can't proceed
                    tango.Except.throw_exception("Command failed",
                                                 msg,
                                                 "Configure",
                                                 tango.ErrSeverity.ERR)
                self._failure_message['configurescan'] += msg
                self._sc_subarray_cmd_exec_state[device]['configurescan'] = CmdExecState.FAILED
                #skip to next device
                continue
            proxy = self._sc_subarray_proxies[device]
            
            # subscribes attribute to track command execution and
            # subscribe sub-elements attributes to track command execution and
            # timeout on subarray sub-components
            attributes = ['configureCmdProgress', 'configureCmdTimeoutExpired']
            attributes = ['configureScanCmdProgress', 'timeoutExpiredFlag']
            for attr in attributes:
                try:
                    if self._sc_subarray_event_id[attr.lower()] == 0:
@@ -1992,31 +2055,52 @@ class CspSubarray(SKASubarray):
                        self._sc_subarray_event_id[device][attr] = event_id
                except tango.DevFailed as df:
                    self.logger.info(df.args[0].desc)              
            # Note: CBF/PSS sub-array checks for the validity of its
            # NOTE: CBF/PSS sub-array checks for the validity of its
            # configuration. Failure in configuration throws an exception that is
            # caught via the _cmd_ended_cb callback

            try:
                # read the timeout configured for the operation on the device
                self._sc_subarray_duration_expected[device]['configurescan'] = proxy.configureDelayExpected
            except AttributeError as attr_err:
                self.logger.info("No attribute {} on device {}".format(str(attr_err), device))
            try:
                proxy.command_inout_asynch("ConfigureScan",
                                           self._sc_subarray_scan_configuration[device],
                                           self._cmd_ended_cb)
                self._sc_subarray_cmd_exec_state[device]['configurescan'] = CmdExecState.RUNNING
            except tango.DevFailed as tango_err:
                error_device += 1
                if device == self.CbfSubarray:
                    msg = "Failure in configuring CBF: {}".format(tango_err-args[0].desc)
                    # throw the exception, configuration can't proceed
                    tango.Except.throw_exception("Command failed",
                                                 msg,
                                                 "Configure",
                                                 tango.ErrSeverity.ERR)

                self._failure_message['configurescan'] += tango_err.args[0].desc
            self._sc_subarray_cmd_exec_state[device]['configurescan'] = CmdExecState.RUNNING
            if error_device:
                self._sc_subarray_cmd_exec_state[device]['configurescan'] = CmdExecState.FAILED

            # register the starting time for the command
            self._sc_subarray_cmd_starting_time[device] = time.time()
            self.logger.debug("configure starting time: {}".format(self._sc_subarray_cmd_starting_time[device]))
        #self._obs_state = ObsState.CONFIGURING
        self._cmd_execution_state['configurescan'] = CmdExecState.RUNNING
        # end for loop on devices

        # TODO: evaluate the global timeout as the max of the single sub-element
        # timeouts
        # configure the timeout for the operation
        if self._config_delay_expected > 0:
            self._cmd_duration_expected['configurescan'] = self._config_delay_expected
        self.logger.debug("_config_delay_expected :{}".format(self._config_delay_expected))
        # invoke the constructor for the command thread
        thread_args = [self._sc_subarray_assigned_fqdn, argin]
        self._command_thread['configurescan'] = threading.Thread(target=self.__configure_scan,
                                                           name="Thread-Configure",
                                                           args=(self._sc_subarray_assigned_fqdn,))
                                                           args=(thread_args,))
        self._abort_command_event.clear()
        self._cmd_execution_state['configurescan'] = CmdExecState.RUNNING
        self._cmd_duration_measured['configurescan'] = 0
        self._command_thread['configurescan'].start()
        # sleep for a while to let the thread start
        time.sleep(0.2)
        # PROTECTED REGION END #    //  CspSubarray.Configure
    
    @AdminModeCheck('AddNumOfSearchBeams')
@@ -2210,10 +2294,10 @@ class CspSubarray(SKASubarray):
                proxy.command_inout_asynch("EndScan", self._cmd_ended_cb)
            except KeyError as key_err:
                self.logger.warning("No key {} found".format(key_err))
                self._sc_subarray_cmd_exec_state = CmdExecState.FAILED
            except tango.DevFailed as tango_err:
                self._sc_subarray_cmd_exec_state = CmdExecState.FAILED
                self.logger.warning(tango_err.args[0].desc)
                # TODO: address this case!
                # signal the failure raising the failure flag?
        # set the threading endScan event 
        self._end_scan_event.set()
        # PROTECTED REGION END #    //  CspSubarray.EndScan
@@ -2266,10 +2350,10 @@ class CspSubarray(SKASubarray):
        self._command_thread['gotoidle'] = threading.Thread(target=self.__gotoidle,
                                                           name="Thread-GotoIdle",
                                                           args=(self._sc_subarray_assigned_fqdn,))
        self._command_thread['gotoidle'].start()
        self._cmd_execution_state['gotoidle'] = CmdExecState.RUNNING
        self._command_thread['gotoidle'].start()
        # sleep for a while to let the thread start
        time.sleep(0.2)
        #time.sleep(0.2)
        # TODO:
        # add some check on command exeuction: end state has to be IDLE for each 
        # sub-array sub-component                
@@ -2285,7 +2369,6 @@ class CspSubarray(SKASubarray):
    @command(
    )
    @DebugIt()
    @ObsStateCheck('abort')
    def Abort(self):
        # PROTECTED REGION ID(CspSubarray.Abort) ENABLED START #
        """
@@ -2303,9 +2386,9 @@ class CspSubarray(SKASubarray):
                proxy.command_inout_asynch("Abort", self._cmd_ended_cb)
            except KeyError as key_err:
                self.logger.warning("No key {} found".format(key_err))
                self._sc_subarray_cmd_exec_state = CmdExecState.FAILED
                self._sc_subarray_cmd_exec_state[fqdn]['abort'] = CmdExecState.FAILED
            except tango.DevFailed as tango_err:
                self._sc_subarray_cmd_exec_state = CmdExecState.FAILED
                self._sc_subarray_cmd_exec_state[fqdn]['abort'] = CmdExecState.FAILED
                self.logger.warning(tango_err.args[0].desc)
        self._abort_command_event.set()
        # PROTECTED REGION END #    //  CspSubarray.Abort
+10 −6

File changed.

Preview size limit exceeded, changes collapsed.

+8 −8
Original line number Diff line number Diff line
@@ -103,11 +103,11 @@ class TestCspSubarray(object):
        state = tango_context.device.cbfSubarrayState
        assert state == DevState.OFF

    def test_remove_timing_beams_while_add_is_running_state(self, tango_context):
        """ Test the execution of the RemovingTimingBeams" while
            the AddTimingBeams is already running
        """
        tango_context.device.AddTimingBeams([1,2,3])
        with pytest.raises(tango.DevFailed) as df:
            tango_context.device.RemoveTimingBeams([1,2])
        assert "Can't execute command" in df.value.args[0].desc
    #def test_remove_timing_beams_while_add_is_running_state(self, tango_context):
    #    """ Test the execution of the RemovingTimingBeams" while
    #        the AddTimingBeams is already running
    #    """
    #    tango_context.device.AddTimingBeams([1,2,3])
    #    with pytest.raises(tango.DevFailed) as df:
    #        tango_context.device.RemoveTimingBeams([1,2])
    #    assert "Can't execute command" in df.value.args[0].desc
+120 −102

File changed.

Preview size limit exceeded, changes collapsed.

+19 −0

File changed.

Preview size limit exceeded, changes collapsed.