Search code examples
pythonloggingdatabricks

Logs not being written of parent directory included in filename


Updated with new code

I have a have a main notebook in databricks that is run which creates the root logger in the main notebook, imports the configuration from a logger.py and calls other modules which contain other loggers:

# main notebook

import logging
from logger import get_logfile_name, configure_logger

NAME = 'job1'

from logger import (
    configure_logger,
    get_logfile_name, 
)

# Set up root logger
logger = logging.getLogger()
logfile = get_logfile_name(NAME)
logger = configure_logger(logger, logfile=logfile)
import os
import logging
from dates import get_current_date
from source import project_log_dir


def get_logfile_name(name):
    # Get current date to append to file name
    formatted_date = get_current_date()
    logfile = f"{name}/{name}_{formatted_date}.log"
    return logfile


def configure_logger(logger, logfile, level=logging.DEBUG):
    logger.setLevel(level)

    logfile_output = os.path.join(project_log_dir, logfile)
    os.makedirs(os.path.dirname(logfile_output), exist_ok=True)

    if logfile:
        # Create a file handler with detailed formatting for log output
        file_handler = logging.FileHandler(logfile_output, mode="a")
        fformatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s')
        file_handler.setFormatter(fformatter)
        logger.addHandler(file_handler)

    # Create a stream handler with simple formatting for cell output
    stream_handler = logging.StreamHandler()
    sformatter = logging.Formatter('%(levelname)s: %(message)s')
    stream_handler.setFormatter(sformatter)
    logger.addHandler(stream_handler)

    # Remove random Pyspark logs
    logging.getLogger("py4j").setLevel(logging.ERROR)

    if logfile:
        logger.info(f'Logger initiated. Logs will be written to {logfile_output}')
    else:
        logger.info('Logger initiated.')

    return logger
# other modules
import logging


logger = logging.getLogger(__name__)

The output looks fine when running the notebook but there aren't any files being written. I've experimented with different ways and this seems to be the correct way to manage different loggers that write to the same file logfile.log. The issue is it isn't writing the logs and not sure why.

enter image description here


Solution

  • Return the file_handler along with the logger.

    def get_logger(....)
    
    
    return logger,file_handler
    

    Calling Notebook

    logger,file_handler = get_logger(...)
    

    In the last cell of the calling notebook add this line

    file_handler.close()
    logger.removeHandler(file_handler)
    

    Now you will notice the log file in the same folder as the notebook.