Commit 7d48efb1 authored by Cristiano Urban's avatar Cristiano Urban
Browse files

Built file_cleaner.py as a class + added logging and exception handling + minor changes.

parent 665510fa
Loading
Loading
Loading
Loading
Loading

transfer_service/cleaner.py

deleted100644 → 0
+0 −62
Original line number Diff line number Diff line
#!/usr/bin/env python
#
# This file is part of vospace-transfer-service
# Copyright (C) 2021 Istituto Nazionale di Astrofisica
# SPDX-License-Identifier: GPL-3.0-or-later
#

import datetime
import os

from config import Config
from db_connector import DbConnector


config = Config("/etc/vos_ts/vos_ts.conf")
params = config.loadSection("file_catalog")
dbConn = DbConnector(params["user"],
                     params["password"],
                     params["host"],
                     params.getint("port"),
                     params["db"],
                     1,
                     1)
params = config.loadSection("cleanup")
days = params.getint("days")
seconds = params.getint("hours") * 3600 + params.getint("minutes") * 60 + params.getint("seconds")

# Avoid "all zero" condition
if days <= 0 and seconds < 30:
    days = 0
    seconds = 30
elif seconds >= 86400:
    days += seconds // 86400
    seconds = seconds % 86400

fileList = dbConn.getNodesToBeDeleted()
basePaths = []
for row in fileList:
    basePath = row["os_base_path"]
    relPath = row["os_rel_path"]
    filePath = basePath + relPath
    dTime = row["deleted_on"]
    cTime = datetime.datetime.now()
    nodeId = row["node_id"]
    delta = cTime - dTime
    if delta.days >= days and delta.seconds > seconds:
        os.remove(filePath)
        print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ' ' + filePath)
        dbConn.setPhyDeletedOn(nodeId)
        if basePath not in basePaths:
            basePaths.append(basePath)

for basePath in basePaths:
    for root, dirs, files in os.walk(basePath, topdown = False):
        for dir in dirs:
            dirPath = os.path.abspath(root) + '/' + dir
            if not os.listdir(dirPath):
                os.rmdir(dirPath)
                print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ' ' + dirPath)

# 1) delete files and update the db
# 2) check for empty dirs and delete them if different from rootPath
+1 −1
Original line number Diff line number Diff line
@@ -4,4 +4,4 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#

0 3 * * * cd /home/transfer_service/ && /usr/local/bin/python3.9 cleaner.py >> /var/log/cleaner.log 2>&1 
* * * * * cd /home/transfer_service/ && /usr/local/bin/python3.9 file_cleaner.py 2> /var/log/vos_ts/error.log
+98 −0
Original line number Diff line number Diff line
#!/usr/bin/env python
#
# This file is part of vospace-transfer-service
# Copyright (C) 2021 Istituto Nazionale di Astrofisica
# SPDX-License-Identifier: GPL-3.0-or-later
#
#
# This class is called periodically by a cron job and 
# performs the following tasks:
# 1) delete files associated to deleted VOSpace nodes and update the db
# 2) check for empty dirs and delete them if different from rootPath
#

import datetime
import logging
import os

from config import Config
from db_connector import DbConnector
from redis_log_handler import RedisLogHandler


class FileCleaner(object):

    def __init__(self):
        config = Config("/etc/vos_ts/vos_ts.conf")
        params = config.loadSection("file_catalog")
        self.dbConn = DbConnector(params["user"],
                                  params["password"],
                                  params["host"],
                                  params.getint("port"),
                                  params["db"],
                                  1,
                                  1)
        params = config.loadSection("cleanup")
        self.days = params.getint("days")
        self.seconds = params.getint("hours") * 3600 + params.getint("minutes") * 60 + params.getint("seconds")
        params = config.loadSection("logging")
        self.logger = logging.getLogger("file_cleaner")
        logLevel = "logging." + params["log_level"]
        logFormat = params["log_format"]
        logFormatter = logging.Formatter(logFormat)
        self.logger.setLevel(eval(logLevel))
        redisLogHandler = RedisLogHandler()
        redisLogHandler.setFormatter(logFormatter)
        self.logger.addHandler(redisLogHandler)

    def clean(self):
        try:
            self.logger.info("++++++++++ Start of cleanup phase ++++++++++")
            # Avoid "all zero" condition
            if self.days <= 0 and self.seconds < 30:
                self.days = 0
                self.seconds = 30
            elif self.seconds >= 86400:
                self.days += self.seconds // 86400
                self.seconds = self.seconds % 86400
            try:
                fileList = self.dbConn.getNodesToBeDeleted()
            except Exception:
                self.logger.exception("FATAL: unable to retrieve the list of the files to be deleted.")
                return
            self.logger.info("Removing deleted files from disk...")
            basePaths = []
            for row in fileList:
                basePath = row["os_base_path"]
                relPath = row["os_rel_path"]
                filePath = basePath + relPath
                dTime = row["deleted_on"]
                cTime = datetime.datetime.now()
                nodeId = row["node_id"]
                delta = cTime - dTime
                if delta.days >= self.days and delta.seconds > self.seconds:
                    os.remove(filePath)
                    self.logger.debug(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ' ' + filePath)
                    try:
                        self.dbConn.setPhyDeletedOn(nodeId)
                    except Exception:
                        self.logger.exception(f"FATAL: unable to set the 'phy_deleted_on' flag for VOSpace node having ID = {nodeId}")
                        return
                    if basePath not in basePaths:
                        basePaths.append(basePath)
            self.logger.info("Removing empty folders (if any)...")
            for basePath in basePaths:
                for root, dirs, files in os.walk(basePath, topdown = False):
                    for dir in dirs:
                        dirPath = os.path.abspath(root) + '/' + dir
                        if not os.listdir(dirPath):
                            os.rmdir(dirPath)
                            self.logger.debug(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ' ' + dirPath)
        except Exception:
            self.logger.exception("FATAL: something went wrong during the file cleanup procedure.")
        else:
            self.logger.info("++++++++++ End of cleanup phase ++++++++++")

# Main...
fc = FileCleaner()
fc.clean()
+1 −1

File changed.

Contains only whitespace changes.