A Box Plot of autompg data. This example demonstrates combining multiple basic glyphs to create a more complicated chart.
- Sampledata:
- Bokeh APIs:
- More info:
- Keywords:
bars, boxplot, categorical, pandas
import pandas as pd
from bokeh.models import ColumnDataSource, Whisker
from bokeh.plotting import figure, show
from bokeh.sampledata.autompg2 import autompg2
from bokeh.transform import factor_cmap
df = autompg2[["class", "hwy"]].rename(columns={"class": "kind"})
kinds = df.kind.unique()
# compute quantiles
qs = df.groupby("kind").hwy.quantile([0.25, 0.5, 0.75])
qs = qs.unstack().reset_index()
qs.columns = ["kind", "q1", "q2", "q3"]
# compute IQR outlier bounds
iqr = qs.q3 - qs.q1
qs["upper"] = qs.q3 + 1.5*iqr
qs["lower"] = qs.q1 - 1.5*iqr
df = pd.merge(df, qs, on="kind", how="left")
source = ColumnDataSource(qs)
p = figure(x_range=kinds, tools="", toolbar_location=None,
title="Highway MPG distribution by vehicle class",
background_fill_color="#eaefef", y_axis_label="MPG")
# outlier range
whisker = Whisker(base="kind", upper="upper", lower="lower", source=source)
whisker.upper_head.size = whisker.lower_head.size = 20
# quantile boxes
cmap = factor_cmap("kind", "TolRainbow7", kinds)
p.vbar("kind", 0.7, "q2", "q3", source=source, color=cmap, line_color="black")
p.vbar("kind", 0.7, "q1", "q2", source=source, color=cmap, line_color="black")
# outliers
outliers = df[~df.hwy.between(df.lower, df.upper)]
p.scatter("kind", "hwy", source=outliers, size=6, color="black", alpha=0.3)
p.xgrid.grid_line_color = None