Hierarchical data#
Bokeh does not have any built-in APIs specifically for handling hierarchical data, but it is possible to use Bokeh’s basic components together with other libraries to handle many cases. Some examples are described below.
Treemaps#
A treemap plot provides view a hierarchical data that help highlight patterns, e.g. largest or smallest sellers in sales data. Tree branches are represented by rectangles and sub-branches by smaller, nested rectangles.
Tee example below shows how a treemap plot can be created using the Bokeh
block()
function together with the third-party
Squarify library.
import pandas as pd
from squarify import normalize_sizes, squarify
from bokeh.plotting import figure, show
from bokeh.sampledata.sample_superstore import data
from bokeh.transform import factor_cmap
data = data[["City", "Region", "Sales"]]
regions = ("West", "Central", "South", "East")
sales_by_city = data.groupby(["Region", "City"]).sum("Sales")
sales_by_city = sales_by_city.sort_values(by="Sales").reset_index()
sales_by_region = sales_by_city.groupby("Region").sum("Sales").sort_values(by="Sales")
def treemap(df, col, x, y, dx, dy, *, N=100):
sub_df = df.nlargest(N, col)
normed = normalize_sizes(sub_df[col], dx, dy)
blocks = squarify(normed, x, y, dx, dy)
blocks_df = pd.DataFrame.from_dict(blocks).set_index(sub_df.index)
return sub_df.join(blocks_df, how='left').reset_index()
x, y, w, h = 0, 0, 800, 450
blocks_by_region = treemap(sales_by_region, "Sales", x, y, w, h)
dfs = []
for index, (Region, Sales, x, y, dx, dy) in blocks_by_region.iterrows():
df = sales_by_city[sales_by_city.Region==Region]
dfs.append(treemap(df, "Sales", x, y, dx, dy, N=10))
blocks = pd.concat(dfs)
p = figure(width=w, height=h, tooltips="@City", toolbar_location=None,
x_axis_location=None, y_axis_location=None)
p.x_range.range_padding = p.y_range.range_padding = 0
p.grid.grid_line_color = None
p.block('x', 'y', 'dx', 'dy', source=blocks, line_width=1, line_color="white",
fill_alpha=0.8, fill_color=factor_cmap("Region", "MediumContrast4", regions))
p.text('x', 'y', x_offset=2, text="Region", source=blocks_by_region,
text_font_size="18pt", text_color="white")
blocks["ytop"] = blocks.y + blocks.dy
p.text('x', 'ytop', x_offset=2, y_offset=2, text="City", source=blocks,
text_font_size="6pt", text_baseline="top",
text_color=factor_cmap("Region", ("black", "white", "black", "white"), regions))
show(p)
Cross tabulations#
Cross tabulations (i.e. “crosstabs”) also show relationships between parts of a whole and each other. The example below shows an adjacent bar chart applied to a crosstab of sample superstore data. This example is more involved due to more extensive styling and inline labeling.
import pandas as pd
from bokeh.core.properties import value
from bokeh.plotting import ColumnDataSource, figure, show
from bokeh.sampledata.sample_superstore import data as df
from bokeh.transform import cumsum, factor_cmap
rows = pd.crosstab(df.Category, df.Region, aggfunc='sum', values=df.Sales, normalize="index")
source = ColumnDataSource(rows.T)
cats = ["Office Supplies", "Furniture", "Technology"]
regions = source.data["Region"]
p = figure(y_range=cats, x_range=(-0.55, 1.02), height=400, width=700, tools="",
x_axis_location=None, toolbar_location=None, outline_line_color=None)
p.grid.grid_line_color = None
p.yaxis.fixed_location = 0
p.axis.major_tick_line_color = None
p.axis.major_label_text_color = None
p.axis.axis_line_color = "#4a4a4a"
p.axis.axis_line_width = 6
source.data["color"] = [ "#dadada","#dadada", "#4a4a4a", "#dadada"]
for y in cats:
left, right = cumsum(y, include_zero=True), cumsum(y)
p.hbar(y=value(y), left=left, right=right, source=source, height=0.9,
color=factor_cmap("Region", "MediumContrast4", regions))
pcts = source.data[y]
source.data[f"{y} text"] = [f"{r}\n{x*100:0.1f}%" for r, x in zip(regions, pcts)]
p.text(y=value(y), x=left, text=f"{y} text", source=source, x_offset=10,
text_color="color", text_baseline="center", text_font_size="15px")
totals = pd.crosstab(df.Category, df.Region, margins=True, aggfunc='sum',
values=df.Sales, normalize="columns").All
p.hbar(right=0, left=-totals, y=totals.index, height=0.9, color="#dadada")
text = [f"{name} ({totals.loc[name]*100:0.1f}%)" for name in cats]
p.text(y=cats, x=0, text=text, text_baseline="center", text_align="right",
x_offset=-12, text_color="#4a4a4a", text_font_size="20px",
text_font_style="bold")
show(p)
Data cube#
Future chapters will cover interactions and widgets in more detail, but it is
worth mentioning here that Bokeh does have one widget that is specifically
intended for presenting a view of hierarchical data. A simple example of using
the DataCube
is shown below.
from bokeh.io import show
from bokeh.models import (ColumnDataSource, DataCube, GroupingInfo,
StringFormatter, SumAggregator, TableColumn)
source = ColumnDataSource(data=dict(
d0=['A', 'E', 'E', 'E', 'J', 'L', 'M'],
d1=['B', 'D', 'D', 'H', 'K', 'L', 'N'],
d2=['C', 'F', 'G', 'H', 'K', 'L', 'O'],
px=[10, 20, 30, 40, 50, 60, 70],
))
target = ColumnDataSource(data=dict(row_indices=[], labels=[]))
formatter = StringFormatter(font_style='bold')
columns = [
TableColumn(field='d2', title='Name', width=80, sortable=False, formatter=formatter),
TableColumn(field='px', title='Price', width=40, sortable=False),
]
grouping = [
GroupingInfo(getter='d0', aggregators=[SumAggregator(field_='px')]),
GroupingInfo(getter='d1', aggregators=[SumAggregator(field_='px')]),
]
cube = DataCube(source=source, columns=columns, grouping=grouping, target=target)
show(cube)