I am in a situation where I wrote a script to extract the lines within the specified time frame from a log file. This script works fine until I found that it is only printing the lines which has the timestamp that is logged within the specified time and leaves the lines which are not having a timestamp but logged within the specified time frame. Those lines which are not having a timestamp but present within the specified time frame must also be printed. But I don't know how to achieve this.
The below is the log file
[17:02:12:161][01-03-2024]some log info here:
step1
step2
step3
[17:02:12:163][01-03-2024]some log here
a
b
c
[17:02:12:185][01-03-2024]Time taken : 11
start timestamp: [17:02:12:161][01-03-2024] end timestamp: [17:02:12:163][01-03-2024]
But I also want those a
, b
, c
lines since they are also logged during the end timestamp
Please note that I have no authority to change this timestamp format. I know this is not in a correct format.
Below is the script
#!/bin/bash
# Function to check the timestamp format
timestamp_pattern_checker() {
local input_pattern="^\[([0-9]{2}:[0-9]{2}:[0-9]{2}:[0-9]{3})\]\[([0-9]{2}-[0-9]{2}-[0-9]{4})\]$"
if [[ ! $1 =~ $input_pattern ]]; then
echo "Invalid Timestamp pattern"
echo "Timestamps should be in the format '[HH:MM:SS:SSS][DD-MM-YYYY]' or HH"
exit 1
fi
}
# Function to convert hours to timestamp format
convert_hours_to_timestamp() {
local hour=$1
printf "[%02d:00:00:000]" "$hour"
}
# Check if correct number of arguments are passed
if [ "$#" -ne 3 ]; then
echo "Usage: $0 <log_file_name> <start_timestamp> <end_timestamp>"
echo "Timestamps should be in the format '[HH:MM:SS:SSS][DD-MM-YYYY]' or as integers representing hours (00 to 23)"
exit 1
fi
log_file=$1
start_input=$2
end_input=$3
# Check if log file exists
if [ ! -f "$log_file" ]; then
echo "File not found: $log_file"
exit 1
fi
# Determine if inputs are hours or full timestamps
if [[ $start_input =~ ^[0-9]{2}$ && $end_input =~ ^[0-9]{2}$ ]]; then
if [[ $start_input -ge 0 && start_input -le 23 && $end_input -ge 0 && $end_input -le 23 ]]; then
if [[ $start_input -le $end_input ]]; then
start_timestamp=$(convert_hours_to_timestamp "$start_input")
end_timestamp=$(convert_hours_to_timestamp "$end_input")
# Extract unique dates from log file
unique_dates=$(awk -F'[][]' '{print $4}' "$log_file" | sort | uniq)
start_timestamps=()
end_timestamps=()
for date in $unique_dates; do
start_timestamps+=("${start_timestamp}[$date]")
end_timestamps+=("${end_timestamp}[$date]")
done
else
echo "Error: start hour must be less than or equal to end hour."
exit 1
fi
else
echo "Error: Hours must be between 00 and 23."
exit 1
fi
else
start_timestamp=$start_input
end_timestamp=$end_input
timestamp_pattern_checker "$start_timestamp"
timestamp_pattern_checker "$end_timestamp"
start_timestamps=("$start_timestamp")
end_timestamps=("$end_timestamp")
fi
awk -v starts="${start_timestamps[*]}" -v ends="${end_timestamps[*]}" '
function parsedate(date) {
split(date, a, /[]:[-]+/)
return a[6] "-" a[7] "-" a[8] "T" a[1] ":" a[2] ":" a[3] ":" a[4] "." a[5]
}
BEGIN {
split(starts, start_arr, " ")
split(ends, end_arr, " ")
for (i in start_arr) {
st[i] = parsedate(start_arr[i])
et[i] = parsedate(end_arr[i])
}
log_count = 0
}
{
p = parsedate($0)
for (i in st) {
if (p >= st[i] && p <= et[i]) {
print $0
log_count = 1
}
}
}
END {
if (log_count == 0) {
print "Nothing was logged at this given time frame"
}
}' "$log_file"
Finally, I figured it out with the help of the guys here who tried. I learned a lot. Thank you so much for reaching out to help me. The perfect answer that works for me is here.
#!/bin/bash
# Function to check the timestamp format
timestamp_pattern_checker() {
local input_pattern="^\[([0-9]{2}:[0-9]{2}:[0-9]{2}:[0-9]{3})\]\[([0-9]{2}-[0-9]{2}-[0-9]{4})\]$"
if [[ ! $1 =~ $input_pattern ]]; then
echo "Invalid Timestamp pattern"
echo "Timestamps should be in the format '[HH:MM:SS:SSS][DD-MM-YYYY]' or HH"
exit 1
fi
}
# Function to convert hours to timestamp format
# Input: hour (integer)
convert_hours_to_timestamp() {
local hour=$1
printf "[%02d:00:00:000]" "$hour"
}
# Function to convert date to timestamp format [DD-MM-YYYY]
# Input: date (string)
convert_date_to_timestamp() {
local date=$1
printf "[%s]" "$date"
}
# Main script starts here
if [ "$#" -ne 1 ]; then
echo "Usage: $0 <log_file>"
exit 1
fi
log_file=$1
# Check if log file exists
if [ ! -f "$log_file" ]; then
echo "File not found: $log_file"
exit 1
fi
read -p "Enter start timestamp (HH or [HH:MM:SS:SSS][DD-MM-YYYY]): " start_input
read -p "Enter end timestamp (HH or [HH:MM:SS:SSS][DD-MM-YYYY]): " end_input
# Determine if inputs are hours or full timestamps
if [[ $start_input =~ ^[0-9]{2}$ && $end_input =~ ^[0-9]{2}$ ]]; then
if [[ $start_input -ge 0 && $start_input -le 23 && $end_input -ge 0 && $end_input -le 23 ]]; then
if [[ $start_input -le $end_input ]]; then
start_timestamp=$(convert_hours_to_timestamp "$start_input")
end_timestamp=$(convert_hours_to_timestamp "$end_input")
# Extract unique dates from log file
unique_dates=$(awk -F'[][]' '/\[/{print $4}' "$log_file" | sort | uniq)
start_timestamps=()
end_timestamps=()
for date in $unique_dates; do
start_timestamps+=("${start_timestamp}[$date]")
end_timestamps+=("${end_timestamp}[$date]")
done
else
echo "Error: start hour must be less than or equal to end hour."
exit 1
fi
else
echo "Error: Hours must be between 00 and 23."
exit 1
fi
else
timestamp_pattern_checker "$start_input"
timestamp_pattern_checker "$end_input"
start_timestamps=("$start_input")
end_timestamps=("$end_input")
fi
awk -v starts="${start_timestamps[*]}" -v ends="${end_timestamps[*]}" '
function parsedate(date) {
split(date, a, /[]:[-]+/)
#ISO 8601 format timestamp conversion
return a[8] "-" a[7] "-" a[6] "T" a[2] ":" a[3] ":" a[4] "." a[5]
}
BEGIN {
split(starts, start_arr, " ")
split(ends, end_arr, " ")
for (i in start_arr) {
st[i] = parsedate(start_arr[i])
et[i] = parsedate(end_arr[i])
}
log_count = 0
}
{
if (match($0, /^\[[0-9]{2}:[0-9]{2}:[0-9]{2}:[0-9]{3}\]\[[0-9]{2}-[0-9]{2}-[0-9]{4}\]/)) {
p = parsedate(substr($0, RSTART, RLENGTH))
in_range = 0
for (i in st) {
if (p >= st[i] && p <= et[i]) {
in_range = 1
break
}
}
}
if (in_range) {
print
log_count = 1
}
}
END {
if (log_count == 0) {
print "Nothing was logged at this given time frame"
}
}
' "$log_file"