Commit e2a535cf authored by Elisabetta Giani's avatar Elisabetta Giani
Browse files

fix-bugs-mid-csp-lmc-0.5.3-image: after MVP integration tests, found a bug in

CspMaster monitoring thread.
Reduced the sleep time inside the thread function (from 1 to 0.1 sec).
Removed the callback registration with the asynch commands.
CSP State update is updated inside the event callback only when the CSP
is not running power commands.
Updated HISTORY, release and conf.py files.
parent e1d46ff2
Loading
Loading
Loading
Loading
Loading
+13 −0
Original line number Original line Diff line number Diff line
0.5.5

- reduced the sleep time inside monitoring threads in CspMaster.py and CspSubarray.py
- the Csp State attribute is updated in the event callback only when the device is
  not running any power command.
- don't register the cmd_ended_cb callback with the asynchronous commands issued to power
  the CSP Element. When the CbfMaster is already in the requested State, the exception thrown
  by it is caught after the end of the thread and the class attribute _cmd_execution_state 
  (even if reset to IDLE inside the thread) is still equal RUNNING.  This causes the device
  failure if a new power command is issued on it. Maybe this issue is related to the PyTango
  issue with threading and I/O.


0.5.3
0.5.3


- Use lmcbaseclasses = 0.5.0
- Use lmcbaseclasses = 0.5.0
+179 −133
Original line number Original line Diff line number Diff line
@@ -16,9 +16,9 @@ CSP.LMC Common Class for the CSPMaster TANGO Device.
# Python standard library
# Python standard library
import sys
import sys
import os
import os
from collections import defaultdict
import threading
import time
import time
import threading
from collections import defaultdict
# PROTECTED REGION END# //CspMaster.standardlibray_import
# PROTECTED REGION END# //CspMaster.standardlibray_import


# tango imports
# tango imports
@@ -258,6 +258,7 @@ class CspMaster(SKAMaster):
                    self.logger.error(msg)
                    self.logger.error(msg)
                    self._se_cmd_execution_state[evt.device.dev_name()][evt.cmd_name.lower()] = CmdExecState.FAILED
                    self._se_cmd_execution_state[evt.device.dev_name()][evt.cmd_name.lower()] = CmdExecState.FAILED
                    self._failure_message[evt.cmd_name] = msg
                    self._failure_message[evt.cmd_name] = msg
                    self.logger.info("_cmd_ended_cb _cmd_execution_state:{}".format(self._cmd_execution_state))
                    # obsState and obsMode values take on the CbfSubarray's values via
                    # obsState and obsMode values take on the CbfSubarray's values via
                    # the subscribe/publish mechanism
                    # the subscribe/publish mechanism
            else:
            else:
@@ -287,9 +288,12 @@ class CspMaster(SKAMaster):
        :return: None
        :return: None
        """
        """
        self._update_csp_health_state()
        self._update_csp_health_state()
        if all(value == CmdExecState.IDLE for value in self._cmd_execution_state.values()) or (not any(self._cmd_execution_state)):
            self.logger.debug("_update_csp_state: _cmd_execution_state:{}".format(self._cmd_execution_state))
            self.set_state(self._se_state[self.CspCbf])
            self.set_state(self._se_state[self.CspCbf])
        if self._admin_mode in [AdminMode.OFFLINE, AdminMode.NOT_FITTED, AdminMode.RESERVED]:
        if self._admin_mode in [AdminMode.OFFLINE, AdminMode.NOT_FITTED, AdminMode.RESERVED]:
            self.set_state(tango.DevState.DISABLE)
            self.set_state(tango.DevState.DISABLE)
        self.logger.debug("_update_csp_state: CspState: {}".format(self.get_state()))


    def _update_csp_health_state(self):
    def _update_csp_health_state(self):
        """
        """
@@ -438,11 +442,10 @@ class CspMaster(SKAMaster):
            dev_successful_state = args_dict['dev_state']
            dev_successful_state = args_dict['dev_state']
        except KeyError as key_err:
        except KeyError as key_err:
            self.logger.warning("No key: {}".format(str(key_err)))
            self.logger.warning("No key: {}".format(str(key_err)))
            # reset the CSP and sub-element running flags
            # reset the CSP and CSP sub-elements command execution
            self._cmd_execution_state = CmdExecState.IDLE
            # state flags
            for device in device_list:
            self._se_cmd_execution_state.clear()
                for k, _ in self._se_cmd_execution_state[device].values():                    
            self._cmd_execution_state.clear()
                   self._se_cmd_execution_state[device][k] = CmdExecState.IDLE
            return
            return
        # tango_cmd_name: is the TANGO command name with the capital letter
        # tango_cmd_name: is the TANGO command name with the capital letter
        # In the dictionary keys, is generally used the command name in lower letters
        # In the dictionary keys, is generally used the command name in lower letters
@@ -459,12 +462,9 @@ class CspMaster(SKAMaster):
        se_cmd_duration_measured = defaultdict(lambda:defaultdict(lambda:0))
        se_cmd_duration_measured = defaultdict(lambda:defaultdict(lambda:0))
        # loop on the devices and power-on them sequentially
        # loop on the devices and power-on them sequentially
        for device in device_list:
        for device in device_list:
            # set the sub-element command execution flag
            self._se_cmd_execution_state[device][cmd_name] = CmdExecState.RUNNING
            se_cmd_duration_measured[device][cmd_name] = 0
            se_cmd_duration_measured[device][cmd_name] = 0
            self._se_cmd_progress[device][cmd_name] = 0
            self._se_cmd_progress[device][cmd_name] = 0
            
            
            try:
            device_proxy = self._se_proxies[device] 
            device_proxy = self._se_proxies[device] 
            self.logger.debug("Issue asynch command {} on device {}:".format(cmd_name, device))
            self.logger.debug("Issue asynch command {} on device {}:".format(cmd_name, device))
               
               
@@ -478,17 +478,39 @@ class CspMaster(SKAMaster):
            # (see "SKA System Control Guidelines").
            # (see "SKA System Control Guidelines").
            # In this case the current method enters the while loop and the execution of the
            # In this case the current method enters the while loop and the execution of the
            # sub-element command is tracked in the right way.
            # sub-element command is tracked in the right way.
                device_proxy.command_inout_asynch(tango_cmd_name, self._cmd_ended_cb)
            try:
                # 04-11-2020: removed the registration of the cmd_ended_cb callback. It has
                # been observed a wrong behavior when the callback is thrown after the end of the
                # thread. This happens when the same command is executed twice on the CbfMaster.
                # In this case the CbfMaster throws an exception to signal that the device is
                # already in that state, the check on the device state passes with success,
                # the self._cmd_execution_state is set to 
                # IDLE and the thread exit. After it is received the callback message with the
                # error message generated by the CbfMaster but the 
                # self._cmd_execution_state results RUNNING and the device stucks 
                #device_proxy.command_inout_asynch(tango_cmd_name, self._cmd_ended_cb)
                device_proxy.command_inout_asynch(tango_cmd_name)
            except tango.DevFailed as df:
                # It should not happen! Verify
                msg = "Failure reason: {} Desc: {}".format(str(df.args[0].reason), str(df.args[0].desc))
                self.logger.warning(msg)
                self._se_cmd_execution_state[device][cmd_name] = CmdExecState.FAILED
                self._failure_message[cmd_name] += msg
                num_of_failed_device += 1
                # skip to next device
                continue
            # set the sub-element command execution flag
            self._se_cmd_execution_state[device][cmd_name] = CmdExecState.RUNNING
            # register the starting time for the command
            # register the starting time for the command
            self._se_cmd_starting_time[device] = time.time() 
            self._se_cmd_starting_time[device] = time.time() 
                # loop on the device until the State changes to ON or a timeout occurred
            # loop on the device until the State changes to ON or a timeout or
            # a failure detection occurred
            self.logger.debug("Device {} State {} expected value {}".format(device, self._se_state[device], dev_successful_state))
            self.logger.debug("Device {} State {} expected value {}".format(device, self._se_state[device], dev_successful_state))
            command_progress = self._cmd_progress[cmd_name]
            command_progress = self._cmd_progress[cmd_name]
            while True:
            while True:
                if self._se_state[device] == dev_successful_state:
                if self._se_state[device] == dev_successful_state:
                    self.logger.info("Command {} ended with success on device {}.".format(cmd_name,
                    self.logger.info("Command {} ended with success on device {}.".format(cmd_name,
                                                                                          device))
                                                                                          device))
                        self.logger.info("Command {} executed on device {}.".format(cmd_name,device))
                    # update the list and number of device that completed the task
                    # update the list and number of device that completed the task
                    self._num_dev_completed_task[cmd_name]  += 1
                    self._num_dev_completed_task[cmd_name]  += 1
                    self._list_dev_completed_task[cmd_name].append(device)
                    self._list_dev_completed_task[cmd_name].append(device)
@@ -544,10 +566,11 @@ class CspMaster(SKAMaster):
                        self._timeout_expired[cmd_name] = True
                        self._timeout_expired[cmd_name] = True
                        self._se_cmd_execution_state[device][cmd_name] = CmdExecState.IDLE
                        self._se_cmd_execution_state[device][cmd_name] = CmdExecState.IDLE
                        self._cmd_execution_state[cmd_name] = CmdExecState.IDLE
                        self._cmd_execution_state[cmd_name] = CmdExecState.IDLE
                        self.logger.info("self._cmd_execution_state:{}".format(self._cmd_execution_state))
                        return
                        return
                    # timeout on the sub-element, skip to the next device
                    # timeout on the sub-element, skip to the next device
                    break
                    break
                    time.sleep(1)
                time.sleep(0.1)
                # update the progress counter inside the loop taking into account the number of devices
                # update the progress counter inside the loop taking into account the number of devices
                # executing the command
                # executing the command
                self._cmd_progress[cmd_name] = command_progress + self._se_cmd_progress[device][cmd_name]/len(device_list)
                self._cmd_progress[cmd_name] = command_progress + self._se_cmd_progress[device][cmd_name]/len(device_list)
@@ -564,16 +587,7 @@ class CspMaster(SKAMaster):
                # end of the command: the command has been issued on all the sub-element devices
                # end of the command: the command has been issued on all the sub-element devices
                # reset the execution flag for the CSP
                # reset the execution flag for the CSP
                break   
                break   
            except KeyError as key_err:
                msg = "No key {} found".format(str(key_err))
                self.logger.warning(msg)
            except tango.DevFailed as df:
                # It should not happen! Verify
                msg = "Failure reason: {} Desc: {}".format(str(df.args[0].reason), str(df.args[0].desc))
                self.logger.warning(msg)
        # out of the for loop
        # out of the for loop
        # reset the CSP command execution flag
        self._cmd_execution_state[cmd_name] = CmdExecState.IDLE
        self._last_executed_command = cmd_name
        self._last_executed_command = cmd_name
        # if one or more sub-elements goes in timeout or failure, set the CSP
        # if one or more sub-elements goes in timeout or failure, set the CSP
        # corresponding attribute
        # corresponding attribute
@@ -587,6 +601,11 @@ class CspMaster(SKAMaster):
                self._failure_raised[cmd_name] = True
                self._failure_raised[cmd_name] = True
            # reset the CSP sub-element command execution flag
            # reset the CSP sub-element command execution flag
            self._se_cmd_execution_state[device][cmd_name] = CmdExecState.IDLE
            self._se_cmd_execution_state[device][cmd_name] = CmdExecState.IDLE
        # reset the CSP command execution flag

        self._cmd_execution_state[cmd_name] = CmdExecState.IDLE
        self.logger.debug("__issue_power command end _cmd_execution_state:{}".format(self._cmd_execution_state))
        self._update_csp_state()
    
    
    def _se_write_adminMode(self, value, device_fqdn):
    def _se_write_adminMode(self, value, device_fqdn):
        """
        """
@@ -1208,7 +1227,7 @@ class CspMaster(SKAMaster):
        for fqdn in  self._se_fqdn:
        for fqdn in  self._se_fqdn:
            attribute_properties = csp_tango_db.get_device_attribute_property(fqdn,
            attribute_properties = csp_tango_db.get_device_attribute_property(fqdn,
                                                                              {'adminMode': ['__value']})
                                                                              {'adminMode': ['__value']})
            self.logger.debug("fqdn: {} attribute_properties: {}".format(fqdn, attribute_properties))
            self.logger.info("fqdn: {} attribute_properties: {}".format(fqdn, attribute_properties))
            try:
            try:
                admin_mode_memorized = attribute_properties['adminMode']['__value']
                admin_mode_memorized = attribute_properties['adminMode']['__value']
                self._se_admin_mode[fqdn] = int(admin_mode_memorized[0])
                self._se_admin_mode[fqdn] = int(admin_mode_memorized[0])
@@ -1763,12 +1782,21 @@ class CspMaster(SKAMaster):
        self._command_thread['on'] = threading.Thread(target=self._issue_power_command, name="Thread-On",
        self._command_thread['on'] = threading.Thread(target=self._issue_power_command, name="Thread-On",
                                               args=(device_list,),
                                               args=(device_list,),
                                               kwargs=args_dict)
                                               kwargs=args_dict)
        # set the  CSP command execution running flag
        try:
        self._cmd_execution_state['on'] = CmdExecState.RUNNING
            # start the thread
            # start the thread
            self._command_thread['on'].start()
            self._command_thread['on'].start()
            # set the  CSP command execution running flag
            self._cmd_execution_state['on'] = CmdExecState.RUNNING
            # sleep for a while to let the thread start
            # sleep for a while to let the thread start
            time.sleep(0.2)
            time.sleep(0.2)
        except Exception:
            # reset the sub-element command exec state
            self._se_cmd_execution_state.clear()
            self._cmd_execution_state['on'] = CmdExecState.IDLe
            tango.Except.throw_exception("Command failed",
                                         "Thread non started while executing On command",
                                         "On",
                                         tango.ErrSeverity.ERR)
        # PROTECTED REGION END #    //  CspMaster.On
        # PROTECTED REGION END #    //  CspMaster.On


    @AdminModeCheck('Off')
    @AdminModeCheck('Off')
@@ -1838,12 +1866,21 @@ class CspMaster(SKAMaster):
        self._command_thread['off'] = threading.Thread(target=self._issue_power_command, name="Thread-Off",
        self._command_thread['off'] = threading.Thread(target=self._issue_power_command, name="Thread-Off",
                                                args=(device_list,),
                                                args=(device_list,),
                                                kwargs=args_dict)
                                                kwargs=args_dict)
        # set the  CSP command execution running flag
        try:
        self._cmd_execution_state['off'] = CmdExecState.RUNNING
            # start the thread
            # start the thread
            self._command_thread['off'].start()
            self._command_thread['off'].start()
            # set the  CSP command execution running flag
            self._cmd_execution_state['off'] = CmdExecState.RUNNING
            # sleep for a while to let the thread start
            # sleep for a while to let the thread start
            time.sleep(0.2)
            time.sleep(0.2)
        except Exception:
            # reset the sub-element command exec state
            self._se_cmd_execution_state.clear()
            self._cmd_execution_state['off'] = CmdExecState.IDLE
            tango.Except.throw_exception("Command failed",
                                         "Thread non started while executing Off command",
                                         "Off",
                                         tango.ErrSeverity.ERR)
        
        
        # PROTECTED REGION END #    //  CspMaster.Off
        # PROTECTED REGION END #    //  CspMaster.Off


@@ -1910,11 +1947,20 @@ class CspMaster(SKAMaster):
                                                           name="Thread-Standby",
                                                           name="Thread-Standby",
                                                           args=(device_list,),
                                                           args=(device_list,),
                                                           kwargs=args_dict)
                                                           kwargs=args_dict)
        try:
            # start the thread
            # start the thread
        self._cmd_execution_state['standby'] = CmdExecState.RUNNING
            self._command_thread['standby'].start()
            self._command_thread['standby'].start()
            self._cmd_execution_state['standby'] = CmdExecState.RUNNING
            # sleep for a while to let the thread start
            # sleep for a while to let the thread start
            time.sleep(0.2)
            time.sleep(0.2)
        except Exception:
            # reset the sub-element command exec state
            self._se_cmd_execution_state.clear()
            self._cmd_execution_state['standby'] = CmdExecState.IDLE
            tango.Except.throw_exception("Command failed",
                                         "Thread not started while executing Standby command",
                                         "Standby",
                                         tango.ErrSeverity.ERR)
        # PROTECTED REGION END #    //  CspMaster.Standby
        # PROTECTED REGION END #    //  CspMaster.Standby


    @command(
    @command(
+3 −3
Original line number Original line Diff line number Diff line
@@ -558,7 +558,7 @@ class CspSubarray(SKASubarray):
            if any(device_done.values()) and all(value == True for value in device_done.values()):
            if any(device_done.values()) and all(value == True for value in device_done.values()):
                self.logger.info("All devices have been handled!")
                self.logger.info("All devices have been handled!")
                break
                break
            time.sleep(1)
            time.sleep(0.2)
                       
                       
        # end of the while loop
        # end of the while loop
        # check for timeout/failure conditions on each sub-component
        # check for timeout/failure conditions on each sub-component
@@ -663,7 +663,7 @@ class CspSubarray(SKASubarray):
                    # end of the command: the command has been issued on all the sub-element devices
                    # end of the command: the command has been issued on all the sub-element devices
                    # reset the execution flag for the CSP
                    # reset the execution flag for the CSP
                break
                break
            time.sleep(1)
            time.sleep(0.2)
        # end of the while loop
        # end of the while loop
        # check for timeout/failure conditions on each sub-component
        # check for timeout/failure conditions on each sub-component
        for device in device_list:
        for device in device_list:
@@ -763,7 +763,7 @@ class CspSubarray(SKASubarray):
                self.logger.info("All devices have been handled!")
                self.logger.info("All devices have been handled!")
                break
                break
            self.logger.info("Sleeping...")
            self.logger.info("Sleeping...")
            time.sleep(1)
            time.sleep(0.2)
        # end of the while loop
        # end of the while loop
        # check for timeout/failure conditions on each sub-component
        # check for timeout/failure conditions on each sub-component
        for device in device_list:
        for device in device_list:
+1 −1
Original line number Original line Diff line number Diff line
@@ -10,7 +10,7 @@
"""Release information for Python Package"""
"""Release information for Python Package"""


name = """csp-lmc-common"""
name = """csp-lmc-common"""
version = "0.5.4"
version = "0.5.5"
version_info = version.split(".")
version_info = version.split(".")
description = """SKA CSP.LMC Common Software"""
description = """SKA CSP.LMC Common Software"""
author = "INAF-OAA"
author = "INAF-OAA"
+2 −1
Original line number Original line Diff line number Diff line
@@ -151,13 +151,14 @@ class CmdInputArgsCheck(object):
            dev_instance.logger.debug(
            dev_instance.logger.debug(
                "CmdInputArgsCheck: devices {} to check:".format(device_list))
                "CmdInputArgsCheck: devices {} to check:".format(device_list))
            # If a sub-element device is already executing a power command, an exception is
            # If a sub-element device is already executing a power command, an exception is
            # thown only when the requested command is different from the one
            # thrown only when the requested command is different from the one
            # already running (power commands have to be executed sequentially).
            # already running (power commands have to be executed sequentially).
            # TODO:
            # TODO:
            # What to do if the required device is performing a software upgrade or is changing its
            # What to do if the required device is performing a software upgrade or is changing its
            # adminMode? How can CSP.LMC detect this condition?
            # adminMode? How can CSP.LMC detect this condition?
            list_of_running_cmd = [cmd_name for cmd_name, cmd_state in dev_instance._cmd_execution_state.items()
            list_of_running_cmd = [cmd_name for cmd_name, cmd_state in dev_instance._cmd_execution_state.items()
                                   if cmd_state == CmdExecState.RUNNING]
                                   if cmd_state == CmdExecState.RUNNING]
            dev_instance.logger.debug("decorator: list of running commands: {}".format(list_of_running_cmd))
            if list_of_running_cmd:
            if list_of_running_cmd:
                # if a command is running, check if its the requested one
                # if a command is running, check if its the requested one
                if len(list_of_running_cmd) > 1:
                if len(list_of_running_cmd) > 1:
Loading