I have a dataset with date_time, date, time, and a VALUE1 column that shows measurement values of each time point. For the same ID, there are multiple measurements over a day. Besides, there are 6 different 24 hour measurements for an ID, which is shown in INSPECTION column.
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker
random.seed(0)
df = pd.DataFrame({'DATE_TIME': pd.date_range('2022-11-01', '2022-11-06 23:00:00', freq='20min'),
'ID': [random.randrange(1, 3) for n in range(430)]})
df['VALUE1'] = [random.uniform(110, 160) for n in range(430)]
df['VALUE2'] = [random.uniform(50, 80) for n in range(430)]
df['INSPECTION'] = df['DATE_TIME'].dt.day
# df['INSPECTION'] = df['INSPECTION'].replace(6, 1)
# df['INSPECTION'] = df['INSPECTION'].replace(3, 1)
df['MODE'] = np.select([df['INSPECTION'] == 1, df['INSPECTION'].isin([2, 3])], ['A', 'B'], 'C')
df['TIME'] = df['DATE_TIME'].dt.time
df['TIME'] = df['TIME'].astype('str')
df['TIMEINTERVAL'] = df.DATE_TIME.diff().astype('timedelta64[m]')
df['TIMEINTERVAL'] = df['TIMEINTERVAL'].fillna(0)
def to_day_period(s):
bins = ['0', '06:00:00', '13:00:00', '18:00:00', '23:00:00', '24:00:00']
labels = ['Nighttime', 'Daytime', 'Daytime', 'Nighttime', 'Nighttime']
return pd.cut(
pd.to_timedelta(s),
bins=list(map(pd.Timedelta, bins)),
labels=labels, right=False, ordered=False
)
df['TIME_OF_DAY'] = to_day_period(df['TIME'])
df_monthly = df
# ++++++++++++++++++++++++++++++++ sns plot ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
df_id = df[df.ID==1]
sns.set_style('darkgrid')
sns.set(rc={'figure.figsize':(14,8)})
#print(df_id.INSPECTION.unique())
ax = sns.lineplot(data=df_id, x ='TIME', y = 'VALUE1',
hue='INSPECTION', palette='viridis',
legend='full', lw=3)
ax.xaxis.set_major_locator(ticker.MultipleLocator(10))
plt.legend(bbox_to_anchor=(1, 1))
plt.ylabel('VALUE1')
plt.xlabel('TIME')
plt.show()
How can I show a 24 hours day cycle on the x-axis without repeating the time again? To articulate, x-axis starts at 00:40:00 and then it shows 00:00:00 again. Is there a way to deal with this too? I want to show only time from 00:00:00 until 23:59:00 on the x-axis without repeating the time.
'INSPECTION'
is properly positioned.
.total_seconds()
method.df.DATE_TIME.apply(lambda row: (row - row.replace(hour=0, minute=0, second=0, microsecond=0)).total_seconds())
ax.xaxis.set_major_locator(tkr.MultipleLocator(3600))
['']
is for the last tick at '00:00'
of the next day.
hours = [dtime(i).strftime('%H:%M') for i in range(24)] + ['']
fig, (ax1, ax2) = plt.subplots(2, 1)
, but that's a cosmetic change that's not relevant to the question.
sns.move_legend
, not plt.legend
, as per Move seaborn plot legend to a different position.ax
, the alias for matplotlib.axes.Axes
, than to alternate between ax
and plt
.python 3.11.2
, pandas 2.0.0
, matplotlib 3.7.1
, seaborn 0.12.2
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as tkr
from datetime import time as dtime
# given the existing dataframe with the DATE_TIME column as a datetime Dtype
# add a column for total seconds
df['total_seconds'] = df.DATE_TIME.apply(lambda row: (row - row.replace(hour=0, minute=0, second=0, microsecond=0)).total_seconds())
# iterate through each ID
for id_ in sorted(df.ID.unique()):
# select the data for the given id_
data = df[df.ID.eq(id_)]
# create a figure
fig = plt.figure(figsize=(10, 6))
# plot the data
ax = sns.lineplot(data=data, x ='total_seconds', y = 'VALUE1', hue='INSPECTION', palette='viridis', legend='full')
# set the title and labels
ax.set(title=f'ID: {id_}', xlabel='TIME', ylabel='VALUE1')
# move the legend
sns.move_legend(ax, bbox_to_anchor=(1.0, 0.5), loc='center left', frameon=False)
# constrain the x-axis limits to the number of seconds in a day
ax.set_xlim(0, 24*3600)
# create labels for every hour in the day, and add an extra spot for the last tick position
hours = [dtime(i).strftime('%H:%M') for i in range(24)] + ['']
# create xticks at every hour
ax.xaxis.set_major_locator(tkr.MultipleLocator(3600))
# set the ticks and corresponding labels; cut off extra starting and ending ticks to match labels
ax.set_xticks(ticks=ax.get_xticks()[1:-1], labels=hours, rotation=90)
# remove spines
ax.spines[['top', 'right']].set_visible(False)
df.head()
DATE_TIME ID VALUE1 VALUE2 INSPECTION MODE TIME TIMEINTERVAL total_seconds TIME_OF_DAY
0 2022-11-01 00:00:00 2 145.003985 57.488269 1 A 00:00:00 NaT 0.0 Nighttime
1 2022-11-01 00:20:00 2 142.449613 75.888882 1 A 00:20:00 0 days 00:20:00 1200.0 Nighttime
2 2022-11-01 00:40:00 1 119.748681 70.052981 1 A 00:40:00 0 days 00:20:00 2400.0 Nighttime
3 2022-11-01 01:00:00 2 149.170848 69.793085 1 A 01:00:00 0 days 00:20:00 3600.0 Nighttime
4 2022-11-01 01:20:00 2 148.873049 56.777515 1 A 01:20:00 0 days 00:20:00 4800.0 Nighttime