Hierarchical data#

Bokeh does not have any built-in APIs specifically for handling hierarchical data, but it is possible to use Bokeh’s basic components together with other libraries to handle many cases. Some examples are described below.

Treemaps#

A treemap plot provides view a hierarchical data that help highlight patterns, e.g. largest or smallest sellers in sales data. Tree branches are represented by rectangles and sub-branches by smaller, nested rectangles.

Tee example below shows how a treemap plot can be created using the Bokeh block() function together with the third-party Squarify library.

import pandas as pd
from squarify import normalize_sizes, squarify

from bokeh.plotting import figure, show
from bokeh.sampledata.sample_superstore import data
from bokeh.transform import factor_cmap

data = data[["City", "Region", "Sales"]]

regions = ("West", "Central", "South", "East")

sales_by_city = data.groupby(["Region", "City"]).sum("Sales")
sales_by_city = sales_by_city.sort_values(by="Sales").reset_index()

sales_by_region = sales_by_city.groupby("Region").sum("Sales").sort_values(by="Sales")

def treemap(df, col, x, y, dx, dy, *, N=100):
    sub_df = df.nlargest(N, col)
    normed = normalize_sizes(sub_df[col], dx, dy)
    blocks = squarify(normed, x, y, dx, dy)
    blocks_df = pd.DataFrame.from_dict(blocks).set_index(sub_df.index)
    return sub_df.join(blocks_df, how='left').reset_index()

x, y, w, h = 0, 0, 800, 450

blocks_by_region = treemap(sales_by_region, "Sales", x, y, w, h)

dfs = []
for index, (Region, Sales, x, y, dx, dy) in blocks_by_region.iterrows():
    df = sales_by_city[sales_by_city.Region==Region]
    dfs.append(treemap(df, "Sales", x, y, dx, dy, N=10))
blocks = pd.concat(dfs)

p = figure(width=w, height=h, tooltips="@City", toolbar_location=None,
           x_axis_location=None, y_axis_location=None)
p.x_range.range_padding = p.y_range.range_padding = 0
p.grid.grid_line_color = None

p.block('x', 'y', 'dx', 'dy', source=blocks, line_width=1, line_color="white",
        fill_alpha=0.8, fill_color=factor_cmap("Region", "MediumContrast4", regions))

p.text('x', 'y', x_offset=2, text="Region", source=blocks_by_region,
       text_font_size="18pt", text_color="white")

blocks["ytop"] = blocks.y + blocks.dy
p.text('x', 'ytop', x_offset=2, y_offset=2, text="City", source=blocks,
       text_font_size="6pt", text_baseline="top",
       text_color=factor_cmap("Region", ("black", "white", "black", "white"), regions))

show(p)

Cross tabulations#

Cross tabulations (i.e. “crosstabs”) also show relationships between parts of a whole and each other. The example below shows an adjacent bar chart applied to a crosstab of sample superstore data. This example is more involved due to more extensive styling and inline labeling.

import pandas as pd

from bokeh.core.properties import value
from bokeh.plotting import ColumnDataSource, figure, show
from bokeh.sampledata.sample_superstore import data as df
from bokeh.transform import cumsum, factor_cmap

rows = pd.crosstab(df.Category, df.Region, aggfunc='sum', values=df.Sales, normalize="index")

source = ColumnDataSource(rows.T)

cats = ["Office Supplies", "Furniture", "Technology"]
regions = source.data["Region"]

p = figure(y_range=cats, x_range=(-0.55, 1.02), height=400, width=700, tools="",
           x_axis_location=None, toolbar_location=None, outline_line_color=None)
p.grid.grid_line_color = None
p.yaxis.fixed_location = 0
p.axis.major_tick_line_color = None
p.axis.major_label_text_color = None
p.axis.axis_line_color = "#4a4a4a"
p.axis.axis_line_width = 6

source.data["color"] = [ "#dadada","#dadada", "#4a4a4a", "#dadada"]
for y in cats:
    left, right = cumsum(y, include_zero=True), cumsum(y)

    p.hbar(y=value(y), left=left, right=right, source=source, height=0.9,
           color=factor_cmap("Region", "MediumContrast4", regions))

    pcts = source.data[y]
    source.data[f"{y} text"] = [f"{r}\n{x*100:0.1f}%" for r, x in zip(regions, pcts)]

    p.text(y=value(y), x=left, text=f"{y} text", source=source, x_offset=10,
           text_color="color", text_baseline="middle", text_font_size="15px")

totals = pd.crosstab(df.Category, df.Region, margins=True, aggfunc='sum',
                     values=df.Sales, normalize="columns").All

p.hbar(right=0, left=-totals, y=totals.index, height=0.9, color="#dadada")

text = [f"{name} ({totals.loc[name]*100:0.1f}%)" for name in cats]
p.text(y=cats, x=0, text=text, text_baseline="middle", text_align="right",
       x_offset=-12, text_color="#4a4a4a", text_font_size="20px",
       text_font_style="bold")

show(p)

Data cube#

Future chapters will cover interactions and widgets in more detail, but it is worth mentioning here that Bokeh does have one widget that is specifically intended for presenting a view of hierarchical data. A simple example of using the DataCube is shown below.

from bokeh.io import show
from bokeh.models import (ColumnDataSource, DataCube, GroupingInfo,
                          StringFormatter, SumAggregator, TableColumn)

source = ColumnDataSource(data=dict(
    d0=['A', 'E', 'E', 'E', 'J', 'L', 'M'],
    d1=['B', 'D', 'D', 'H', 'K', 'L', 'N'],
    d2=['C', 'F', 'G', 'H', 'K', 'L', 'O'],
    px=[10, 20, 30, 40, 50, 60, 70],
))

target = ColumnDataSource(data=dict(row_indices=[], labels=[]))

formatter = StringFormatter(font_style='bold')

columns = [
    TableColumn(field='d2', title='Name', width=80, sortable=False, formatter=formatter),
    TableColumn(field='px', title='Price', width=40, sortable=False),
]

grouping = [
    GroupingInfo(getter='d0', aggregators=[SumAggregator(field_='px')]),
    GroupingInfo(getter='d1', aggregators=[SumAggregator(field_='px')]),
]

cube = DataCube(source=source, columns=columns, grouping=grouping, target=target)

show(cube)