Categorical plots#
Scatters#
Sometimes there are many values associated with each category. For example, a series of measurements on different days of the week. In this case, you can visualize your data using a categorical scatter plot.
Adding jitter#
To avoid overlap between numerous scatter points for a single category, use
the jitter()
function to give each point a random
offset.
The example below shows a scatter plot of every commit time for a GitHub user
between 2012 and 2016. It groups commits by day of the week. By default, this
plot would show thousands of points overlapping in a narrow line for each day.
The jitter
function lets you differentiate the points to produce a useful
plot:
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.sampledata.commits import data
from bokeh.transform import jitter
DAYS = ['Sun', 'Sat', 'Fri', 'Thu', 'Wed', 'Tue', 'Mon']
source = ColumnDataSource(data)
p = figure(width=800, height=300, y_range=DAYS, x_axis_type='datetime',
title="Commits by Time of Day (US/Central) 2012-2016")
p.scatter(x='time', y=jitter('day', width=0.6, range=p.y_range), source=source, alpha=0.3)
p.xaxis.formatter.days = '%Hh'
p.x_range.range_padding = 0
p.ygrid.grid_line_color = None
show(p)
Series#
There may also be ordered series of data associated with each category. In such cases, the series can be represented as a line or area plotted for each category. To accomplish this, Bokeh has a concept of categorical offsets that can afford explicit control over positioning “within” a category.
Categorical offsets#
Outside of the dodge
and jitter
functions, you can also supply an
offset to a categorical location explicitly. To do so, add a numeric value
to the end of a category. For example, ["Jan", 0.2]
gives the category
“Jan” an offset of 0.2.
For multi-level categories, add the value at the end of the existing list:
["West", "Sales", -0,2]
. Bokeh interprets any numeric value at the end
of a list of categories as an offset.
Take the fruit example above and modify it as follows:
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
offsets = [-0.5, -0.2, 0.0, 0.3, 0.1, 0.3]
# This results in [ ['Apples', -0.5], ['Pears', -0.2], ... ]
x = list(zip(fruits, offsets))
p.vbar(x=x, top=[5, 3, 4, 2, 4, 6], width=0.8)
This will shift each bar horizontally by the corresponding offset.
Below is a more sophisticated example of a ridge plot. It uses categorical offsets to specify patch coordinates for each category.
import colorcet as cc
from numpy import linspace
from scipy.stats import gaussian_kde
from bokeh.models import ColumnDataSource, FixedTicker, PrintfTickFormatter
from bokeh.plotting import figure, show
from bokeh.sampledata.perceptions import probly
def ridge(category, data, scale=20):
return list(zip([category]*len(data), scale*data))
cats = list(reversed(probly.keys()))
palette = [cc.rainbow[i*15] for i in range(17)]
x = linspace(-20,110, 500)
source = ColumnDataSource(data=dict(x=x))
p = figure(y_range=cats, width=900, x_range=(-5, 105), toolbar_location=None)
for i, cat in enumerate(reversed(cats)):
pdf = gaussian_kde(probly[cat])
y = ridge(cat, pdf(x))
source.add(y, cat)
p.patch('x', cat, color=palette[i], alpha=0.6, line_color="black", source=source)
p.outline_line_color = None
p.background_fill_color = "#efefef"
p.xaxis.ticker = FixedTicker(ticks=list(range(0, 101, 10)))
p.xaxis.formatter = PrintfTickFormatter(format="%d%%")
p.ygrid.grid_line_color = None
p.xgrid.grid_line_color = "#dddddd"
p.xgrid.ticker = p.xaxis.ticker
p.axis.minor_tick_line_color = None
p.axis.major_tick_line_color = None
p.axis.axis_line_color = None
p.y_range.range_padding = 0.12
show(p)
Heatmaps#
It is possible to have values associated with pairs of categories. In this situation, applying different color shades to rectangles that represent a pair of categories will produce a categorical heatmap. Such a plot has two categorical axes.
The following plot lists years from 1948 to 2016 on its x-axis and months of
the year on the y-axis. Each rectangle of the plot corresponds to a
(year, month)
pair. The color of the rectangle indicates the rate of
unemployment in a given month of a given year.
This example uses the LinearColorMapper
to map the colors of the plot
because the unemployment rate is a continuous variable. This mapper is also
passed to the color bar to provide a visual legend on the right:
from math import pi
import pandas as pd
from bokeh.models import BasicTicker, PrintfTickFormatter
from bokeh.plotting import figure, show
from bokeh.sampledata.unemployment1948 import data
from bokeh.transform import linear_cmap
data['Year'] = data['Year'].astype(str)
data = data.set_index('Year')
data.drop('Annual', axis=1, inplace=True)
data.columns.name = 'Month'
years = list(data.index)
months = list(reversed(data.columns))
# reshape to 1D array or rates with a month and year for each row.
df = pd.DataFrame(data.stack(), columns=['rate']).reset_index()
# this is the colormap from the original NYTimes plot
colors = ["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
p = figure(title=f"US Unemployment ({years[0]} - {years[-1]})",
x_range=years, y_range=months,
x_axis_location="above", width=900, height=400,
tools=TOOLS, toolbar_location='below',
tooltips=[('date', '@Month @Year'), ('rate', '@rate%')])
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "7px"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = pi / 3
r = p.rect(x="Year", y="Month", width=1, height=1, source=df,
fill_color=linear_cmap("rate", colors, low=df.rate.min(), high=df.rate.max()),
line_color=None)
p.add_layout(r.construct_color_bar(
major_label_text_font_size="7px",
ticker=BasicTicker(desired_num_ticks=len(colors)),
formatter=PrintfTickFormatter(format="%d%%"),
label_standoff=6,
border_line_color=None,
padding=5
), 'right')
show(p)
The following periodic table is a good example of the techniques in this chapter:
Color mappers
Visual offsets
pandas DataFrames
Tooltips
from bokeh.plotting import figure, show
from bokeh.sampledata.periodic_table import elements
from bokeh.transform import dodge, factor_cmap
periods = ["I", "II", "III", "IV", "V", "VI", "VII"]
groups = [str(x) for x in range(1, 19)]
df = elements.copy()
df["atomic mass"] = df["atomic mass"].astype(str)
df["group"] = df["group"].astype(str)
df["period"] = [periods[x-1] for x in df.period]
df = df[df.group != "-"]
df = df[df.symbol != "Lr"]
df = df[df.symbol != "Lu"]
cmap = {
"alkali metal" : "#a6cee3",
"alkaline earth metal" : "#1f78b4",
"metal" : "#d93b43",
"halogen" : "#999d9a",
"metalloid" : "#e08d49",
"noble gas" : "#eaeaea",
"nonmetal" : "#f1d4Af",
"transition metal" : "#599d7A",
}
TOOLTIPS = [
("Name", "@name"),
("Atomic number", "@{atomic number}"),
("Atomic mass", "@{atomic mass}"),
("Type", "@metal"),
("CPK color", "$color[hex, swatch]:CPK"),
("Electronic configuration", "@{electronic configuration}"),
]
p = figure(title="Periodic Table (omitting LA and AC Series)", width=1000, height=450,
x_range=groups, y_range=list(reversed(periods)),
tools="hover", toolbar_location=None, tooltips=TOOLTIPS)
r = p.rect("group", "period", 0.95, 0.95, source=df, fill_alpha=0.6, legend_field="metal",
color=factor_cmap('metal', palette=list(cmap.values()), factors=list(cmap.keys())))
text_props = dict(source=df, text_align="left", text_baseline="middle")
x = dodge("group", -0.4, range=p.x_range)
p.text(x=x, y="period", text="symbol", text_font_style="bold", **text_props)
p.text(x=x, y=dodge("period", 0.3, range=p.y_range), text="atomic number",
text_font_size="11px", **text_props)
p.text(x=x, y=dodge("period", -0.35, range=p.y_range), text="name",
text_font_size="7px", **text_props)
p.text(x=x, y=dodge("period", -0.2, range=p.y_range), text="atomic mass",
text_font_size="7px", **text_props)
p.text(x=["3", "3"], y=["VI", "VII"], text=["LA", "AC"], text_align="center", text_baseline="middle")
p.outline_line_color = None
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_standoff = 0
p.legend.orientation = "horizontal"
p.legend.location ="top_center"
p.hover.renderers = [r] # only hover element boxes
show(p)