Commit e2a535cf authored by Elisabetta Giani's avatar Elisabetta Giani
Browse files

fix-bugs-mid-csp-lmc-0.5.3-image: after MVP integration tests, found a bug in

CspMaster monitoring thread.
Reduced the sleep time inside the thread function (from 1 to 0.1 sec).
Removed the callback registration with the asynch commands.
CSP State update is updated inside the event callback only when the CSP
is not running power commands.
Updated HISTORY, release and conf.py files.
parent e1d46ff2
Loading
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
0.5.5

- reduced the sleep time inside monitoring threads in CspMaster.py and CspSubarray.py
- the Csp State attribute is updated in the event callback only when the device is
  not running any power command.
- don't register the cmd_ended_cb callback with the asynchronous commands issued to power
  the CSP Element. When the CbfMaster is already in the requested State, the exception thrown
  by it is caught after the end of the thread and the class attribute _cmd_execution_state 
  (even if reset to IDLE inside the thread) is still equal RUNNING.  This causes the device
  failure if a new power command is issued on it. Maybe this issue is related to the PyTango
  issue with threading and I/O.


0.5.3

- Use lmcbaseclasses = 0.5.0
+179 −133
Original line number Diff line number Diff line
@@ -16,9 +16,9 @@ CSP.LMC Common Class for the CSPMaster TANGO Device.
# Python standard library
import sys
import os
from collections import defaultdict
import threading
import time
import threading
from collections import defaultdict
# PROTECTED REGION END# //CspMaster.standardlibray_import

# tango imports
@@ -258,6 +258,7 @@ class CspMaster(SKAMaster):
                    self.logger.error(msg)
                    self._se_cmd_execution_state[evt.device.dev_name()][evt.cmd_name.lower()] = CmdExecState.FAILED
                    self._failure_message[evt.cmd_name] = msg
                    self.logger.info("_cmd_ended_cb _cmd_execution_state:{}".format(self._cmd_execution_state))
                    # obsState and obsMode values take on the CbfSubarray's values via
                    # the subscribe/publish mechanism
            else:
@@ -287,9 +288,12 @@ class CspMaster(SKAMaster):
        :return: None
        """
        self._update_csp_health_state()
        if all(value == CmdExecState.IDLE for value in self._cmd_execution_state.values()) or (not any(self._cmd_execution_state)):
            self.logger.debug("_update_csp_state: _cmd_execution_state:{}".format(self._cmd_execution_state))
            self.set_state(self._se_state[self.CspCbf])
        if self._admin_mode in [AdminMode.OFFLINE, AdminMode.NOT_FITTED, AdminMode.RESERVED]:
            self.set_state(tango.DevState.DISABLE)
        self.logger.debug("_update_csp_state: CspState: {}".format(self.get_state()))

    def _update_csp_health_state(self):
        """
@@ -438,11 +442,10 @@ class CspMaster(SKAMaster):
            dev_successful_state = args_dict['dev_state']
        except KeyError as key_err:
            self.logger.warning("No key: {}".format(str(key_err)))
            # reset the CSP and sub-element running flags
            self._cmd_execution_state = CmdExecState.IDLE
            for device in device_list:
                for k, _ in self._se_cmd_execution_state[device].values():                    
                   self._se_cmd_execution_state[device][k] = CmdExecState.IDLE
            # reset the CSP and CSP sub-elements command execution
            # state flags
            self._se_cmd_execution_state.clear()
            self._cmd_execution_state.clear()
            return
        # tango_cmd_name: is the TANGO command name with the capital letter
        # In the dictionary keys, is generally used the command name in lower letters
@@ -459,12 +462,9 @@ class CspMaster(SKAMaster):
        se_cmd_duration_measured = defaultdict(lambda:defaultdict(lambda:0))
        # loop on the devices and power-on them sequentially
        for device in device_list:
            # set the sub-element command execution flag
            self._se_cmd_execution_state[device][cmd_name] = CmdExecState.RUNNING
            se_cmd_duration_measured[device][cmd_name] = 0
            self._se_cmd_progress[device][cmd_name] = 0
            
            try:
            device_proxy = self._se_proxies[device] 
            self.logger.debug("Issue asynch command {} on device {}:".format(cmd_name, device))
               
@@ -478,17 +478,39 @@ class CspMaster(SKAMaster):
            # (see "SKA System Control Guidelines").
            # In this case the current method enters the while loop and the execution of the
            # sub-element command is tracked in the right way.
                device_proxy.command_inout_asynch(tango_cmd_name, self._cmd_ended_cb)
            try:
                # 04-11-2020: removed the registration of the cmd_ended_cb callback. It has
                # been observed a wrong behavior when the callback is thrown after the end of the
                # thread. This happens when the same command is executed twice on the CbfMaster.
                # In this case the CbfMaster throws an exception to signal that the device is
                # already in that state, the check on the device state passes with success,
                # the self._cmd_execution_state is set to 
                # IDLE and the thread exit. After it is received the callback message with the
                # error message generated by the CbfMaster but the 
                # self._cmd_execution_state results RUNNING and the device stucks 
                #device_proxy.command_inout_asynch(tango_cmd_name, self._cmd_ended_cb)
                device_proxy.command_inout_asynch(tango_cmd_name)
            except tango.DevFailed as df:
                # It should not happen! Verify
                msg = "Failure reason: {} Desc: {}".format(str(df.args[0].reason), str(df.args[0].desc))
                self.logger.warning(msg)
                self._se_cmd_execution_state[device][cmd_name] = CmdExecState.FAILED
                self._failure_message[cmd_name] += msg
                num_of_failed_device += 1
                # skip to next device
                continue
            # set the sub-element command execution flag
            self._se_cmd_execution_state[device][cmd_name] = CmdExecState.RUNNING
            # register the starting time for the command
            self._se_cmd_starting_time[device] = time.time() 
                # loop on the device until the State changes to ON or a timeout occurred
            # loop on the device until the State changes to ON or a timeout or
            # a failure detection occurred
            self.logger.debug("Device {} State {} expected value {}".format(device, self._se_state[device], dev_successful_state))
            command_progress = self._cmd_progress[cmd_name]
            while True:
                if self._se_state[device] == dev_successful_state:
                    self.logger.info("Command {} ended with success on device {}.".format(cmd_name,
                                                                                          device))
                        self.logger.info("Command {} executed on device {}.".format(cmd_name,device))
                    # update the list and number of device that completed the task
                    self._num_dev_completed_task[cmd_name]  += 1
                    self._list_dev_completed_task[cmd_name].append(device)
@@ -544,10 +566,11 @@ class CspMaster(SKAMaster):
                        self._timeout_expired[cmd_name] = True
                        self._se_cmd_execution_state[device][cmd_name] = CmdExecState.IDLE
                        self._cmd_execution_state[cmd_name] = CmdExecState.IDLE
                        self.logger.info("self._cmd_execution_state:{}".format(self._cmd_execution_state))
                        return
                    # timeout on the sub-element, skip to the next device
                    break
                    time.sleep(1)
                time.sleep(0.1)
                # update the progress counter inside the loop taking into account the number of devices
                # executing the command
                self._cmd_progress[cmd_name] = command_progress + self._se_cmd_progress[device][cmd_name]/len(device_list)
@@ -564,16 +587,7 @@ class CspMaster(SKAMaster):
                # end of the command: the command has been issued on all the sub-element devices
                # reset the execution flag for the CSP
                break   
            except KeyError as key_err:
                msg = "No key {} found".format(str(key_err))
                self.logger.warning(msg)
            except tango.DevFailed as df:
                # It should not happen! Verify
                msg = "Failure reason: {} Desc: {}".format(str(df.args[0].reason), str(df.args[0].desc))
                self.logger.warning(msg)
        # out of the for loop
        # reset the CSP command execution flag
        self._cmd_execution_state[cmd_name] = CmdExecState.IDLE
        self._last_executed_command = cmd_name
        # if one or more sub-elements goes in timeout or failure, set the CSP
        # corresponding attribute
@@ -587,6 +601,11 @@ class CspMaster(SKAMaster):
                self._failure_raised[cmd_name] = True
            # reset the CSP sub-element command execution flag
            self._se_cmd_execution_state[device][cmd_name] = CmdExecState.IDLE
        # reset the CSP command execution flag

        self._cmd_execution_state[cmd_name] = CmdExecState.IDLE
        self.logger.debug("__issue_power command end _cmd_execution_state:{}".format(self._cmd_execution_state))
        self._update_csp_state()
    
    def _se_write_adminMode(self, value, device_fqdn):
        """
@@ -1208,7 +1227,7 @@ class CspMaster(SKAMaster):
        for fqdn in  self._se_fqdn:
            attribute_properties = csp_tango_db.get_device_attribute_property(fqdn,
                                                                              {'adminMode': ['__value']})
            self.logger.debug("fqdn: {} attribute_properties: {}".format(fqdn, attribute_properties))
            self.logger.info("fqdn: {} attribute_properties: {}".format(fqdn, attribute_properties))
            try:
                admin_mode_memorized = attribute_properties['adminMode']['__value']
                self._se_admin_mode[fqdn] = int(admin_mode_memorized[0])
@@ -1763,12 +1782,21 @@ class CspMaster(SKAMaster):
        self._command_thread['on'] = threading.Thread(target=self._issue_power_command, name="Thread-On",
                                               args=(device_list,),
                                               kwargs=args_dict)
        # set the  CSP command execution running flag
        self._cmd_execution_state['on'] = CmdExecState.RUNNING
        try:
            # start the thread
            self._command_thread['on'].start()
            # set the  CSP command execution running flag
            self._cmd_execution_state['on'] = CmdExecState.RUNNING
            # sleep for a while to let the thread start
            time.sleep(0.2)
        except Exception:
            # reset the sub-element command exec state
            self._se_cmd_execution_state.clear()
            self._cmd_execution_state['on'] = CmdExecState.IDLe
            tango.Except.throw_exception("Command failed",
                                         "Thread non started while executing On command",
                                         "On",
                                         tango.ErrSeverity.ERR)
        # PROTECTED REGION END #    //  CspMaster.On

    @AdminModeCheck('Off')
@@ -1838,12 +1866,21 @@ class CspMaster(SKAMaster):
        self._command_thread['off'] = threading.Thread(target=self._issue_power_command, name="Thread-Off",
                                                args=(device_list,),
                                                kwargs=args_dict)
        # set the  CSP command execution running flag
        self._cmd_execution_state['off'] = CmdExecState.RUNNING
        try:
            # start the thread
            self._command_thread['off'].start()
            # set the  CSP command execution running flag
            self._cmd_execution_state['off'] = CmdExecState.RUNNING
            # sleep for a while to let the thread start
            time.sleep(0.2)
        except Exception:
            # reset the sub-element command exec state
            self._se_cmd_execution_state.clear()
            self._cmd_execution_state['off'] = CmdExecState.IDLE
            tango.Except.throw_exception("Command failed",
                                         "Thread non started while executing Off command",
                                         "Off",
                                         tango.ErrSeverity.ERR)
        
        # PROTECTED REGION END #    //  CspMaster.Off

@@ -1910,11 +1947,20 @@ class CspMaster(SKAMaster):
                                                           name="Thread-Standby",
                                                           args=(device_list,),
                                                           kwargs=args_dict)
        try:
            # start the thread
        self._cmd_execution_state['standby'] = CmdExecState.RUNNING
            self._command_thread['standby'].start()
            self._cmd_execution_state['standby'] = CmdExecState.RUNNING
            # sleep for a while to let the thread start
            time.sleep(0.2)
        except Exception:
            # reset the sub-element command exec state
            self._se_cmd_execution_state.clear()
            self._cmd_execution_state['standby'] = CmdExecState.IDLE
            tango.Except.throw_exception("Command failed",
                                         "Thread not started while executing Standby command",
                                         "Standby",
                                         tango.ErrSeverity.ERR)
        # PROTECTED REGION END #    //  CspMaster.Standby

    @command(
+3 −3
Original line number Diff line number Diff line
@@ -558,7 +558,7 @@ class CspSubarray(SKASubarray):
            if any(device_done.values()) and all(value == True for value in device_done.values()):
                self.logger.info("All devices have been handled!")
                break
            time.sleep(1)
            time.sleep(0.2)
                       
        # end of the while loop
        # check for timeout/failure conditions on each sub-component
@@ -663,7 +663,7 @@ class CspSubarray(SKASubarray):
                    # end of the command: the command has been issued on all the sub-element devices
                    # reset the execution flag for the CSP
                break
            time.sleep(1)
            time.sleep(0.2)
        # end of the while loop
        # check for timeout/failure conditions on each sub-component
        for device in device_list:
@@ -763,7 +763,7 @@ class CspSubarray(SKASubarray):
                self.logger.info("All devices have been handled!")
                break
            self.logger.info("Sleeping...")
            time.sleep(1)
            time.sleep(0.2)
        # end of the while loop
        # check for timeout/failure conditions on each sub-component
        for device in device_list:
+1 −1
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@
"""Release information for Python Package"""

name = """csp-lmc-common"""
version = "0.5.4"
version = "0.5.5"
version_info = version.split(".")
description = """SKA CSP.LMC Common Software"""
author = "INAF-OAA"
+2 −1
Original line number Diff line number Diff line
@@ -151,13 +151,14 @@ class CmdInputArgsCheck(object):
            dev_instance.logger.debug(
                "CmdInputArgsCheck: devices {} to check:".format(device_list))
            # If a sub-element device is already executing a power command, an exception is
            # thown only when the requested command is different from the one
            # thrown only when the requested command is different from the one
            # already running (power commands have to be executed sequentially).
            # TODO:
            # What to do if the required device is performing a software upgrade or is changing its
            # adminMode? How can CSP.LMC detect this condition?
            list_of_running_cmd = [cmd_name for cmd_name, cmd_state in dev_instance._cmd_execution_state.items()
                                   if cmd_state == CmdExecState.RUNNING]
            dev_instance.logger.debug("decorator: list of running commands: {}".format(list_of_running_cmd))
            if list_of_running_cmd:
                # if a command is running, check if its the requested one
                if len(list_of_running_cmd) > 1:
Loading