I created a multiline chart which tracks the CPU consumption of a machine week after week:
But I want to include year in the x axis legend, like in this image:
When I try to change index values (47, 48..., 51) by string values, I have a blank graph. Is it possible to show string label values in x axis for multiline chart?
This is my code:
import pandas as pd
from bokeh.plotting import figure, show, output_file
from bokeh.models import ColumnDataSource
output_file('temp.html')
data = pd.read_csv("data.csv")
data.index = ['2021-51', '2021-52', '2022-1', '2022-2', '2022-2']
cpu_values_daily = data.values.T.tolist()
weeks = []
for i in range(0,len(data.columns)):
weeks.append(data.index)
df = {'semaine': weeks,
'jour': ['Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi', 'Dimanche'],
'color': ['red', 'orange', 'yellow', 'green', 'grey', 'pink', 'purple'],
'HCPU': cpu_values_daily}
source = ColumnDataSource(df)
p = figure(width=800, height=500)
p.multi_line(xs='semaine', ys='HCPU', legend='jour', color='color',
line_width=5, line_alpha=0.6, hover_line_alpha=1.0,
muted_color='color', muted_alpha=0.2,
source=source)
p.xaxis.visible = False
p.left[0].formatter.use_scientific = False
show(p)
And my file "data.csv":
startdate_dayweek;1;2;3;4;5;6;7
47;150290;345005;343329;351631;368029;322604;615009
48;249414;381473;385862;376488;367117;342397;494052
49;236236;395367;499916;392677;372029;377518;518521
50;223065;347776;434387;372996;378691;385578;645206
51;190055;358690;354985;413861;414002;470053;525458
There are two options, how you can achive this goal:
This is very basic. You just define a dictionary with the position and the label. In your example this could be:
from io import StringIO
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource
output_notebook()
data_csv = """startdate_dayweek;1;2;3;4;5;6;7
47;150290;345005;343329;351631;368029;322604;615009
48;249414;381473;385862;376488;367117;342397;494052
49;236236;395367;499916;392677;372029;377518;518521
50;223065;347776;434387;372996;378691;385578;645206
51;190055;358690;354985;413861;414002;470053;525458
"""
data = pd.read_csv(StringIO(data_csv), sep=';')
startdate_dayweek = '2021-' + data.startdate_dayweek.astype(str)
data.drop('startdate_dayweek', axis=1, inplace=True)
df = {'semaine': [data.index]*len(data.columns),
'jour': ['Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi', 'Dimanche'],
'color': ['red', 'orange', 'yellow', 'green', 'grey', 'pink', 'purple'],
'HCPU': data.values.T}
source = ColumnDataSource(df)
p = figure(width=800, height=500)
p.multi_line(xs='semaine', ys='HCPU', legend_group='jour', color='color',
line_width=5, line_alpha=0.6, hover_line_alpha=1.0,
muted_color='color', muted_alpha=0.2,
source=source)
p.left[0].formatter.use_scientific = False
p.xaxis.major_label_overrides = {i: val for i, val in enumerate(startdate_dayweek)}
show(p)
p = figure(x_axis_type='datetime')
and a DatetimeTickFormatter
This is cleaner, because you are working with dates and bokeh does support dates. First convert your index to a datetime-object, I used %Y-%W-%w
as a workaround. Why I need this is explained here. Then define your wanted Formatter, in your case %Y-%W
.
In your example this could be:
from io import StringIO
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, DatetimeTickFormatter
output_notebook()
data_csv = """startdate_dayweek;1;2;3;4;5;6;7
47;150290;345005;343329;351631;368029;322604;615009
48;249414;381473;385862;376488;367117;342397;494052
49;236236;395367;499916;392677;372029;377518;518521
50;223065;347776;434387;372996;378691;385578;645206
51;190055;358690;354985;413861;414002;470053;525458
"""
data = pd.read_csv(StringIO(data_csv), sep=';')
data.startdate_dayweek = '2021-' + data.startdate_dayweek.astype(str) + '-0'
data.index = pd.to_datetime(data.startdate_dayweek, format='%Y-%W-%w')
data.drop('startdate_dayweek', axis=1, inplace=True)
df = {'semaine': [data.index]*len(data.columns),
'jour': ['Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi', 'Dimanche'],
'color': ['red', 'orange', 'yellow', 'green', 'grey', 'pink', 'purple'],
'HCPU': data.values.T}
source = ColumnDataSource(df)
p = figure(width=800, height=500, x_axis_type='datetime')
p.multi_line(xs='semaine', ys='HCPU', legend_group='jour', color='color',
line_width=5, line_alpha=0.6, hover_line_alpha=1.0,
muted_color='color', muted_alpha=0.2,
source=source)
p.left[0].formatter.use_scientific = False
p.xaxis.formatter.days = ['%Y-%W']
show(p)
Both times the poutput looks like this: