"""This is the Bokeh charts interface. It gives you a high level API
to build complex plot is a simple way.
This is the Chord class which lets you build your Chord charts
just passing the arguments to the Chart class and calling the proper
functions.
"""
# -----------------------------------------------------------------------------
# Copyright (c) 2012 - 2016, Anaconda, Inc. All rights reserved.
#
# Powered by the Bokeh Development Team.
#
# The full license is in the file LICENSE.txt, distributed with this software.
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Imports
# -----------------------------------------------------------------------------
from __future__ import absolute_import, division
import numpy as np
import pandas as pd
from math import cos, sin, pi
from bokeh.charts.properties import Dimension
from bokeh.charts.builder import create_and_build, Builder
from bokeh.charts.attributes import MarkerAttr, ColorAttr, CatAttr
from bokeh.charts.utils import color_in_equal_space, help
from bokeh.models import Range1d
from bokeh.models.glyphs import Arc, Bezier, Text
from bokeh.models.renderers import GlyphRenderer
from bokeh.models.sources import ColumnDataSource
from bokeh.core.properties import Instance, Bool, String, Array, Float, Any, Seq, Either, Int
# -----------------------------------------------------------------------------
# Classes and functions
# -----------------------------------------------------------------------------
class Area:
""" It represents an arc area. It will create a list of available points through the arc representing that area and
then those points will be used as start and end for the beziers lines.
"""
def __init__(self, n_conn, start_point, end_point):
# Number of connections in that arc area
self.n_conn = n_conn
# The start point of the arc representing the area
self.start_point = start_point
self.end_point = end_point
# Equally spaced points between start point and end point
free_points_angles = np.linspace(start_point, end_point, n_conn)
# A list of available X,Y in the chart to consume by each bezier's start and end point
self.free_points = [[cos(angle), sin(angle)] for angle in free_points_angles]
assert self.n_conn == len(self.free_points)
[docs]class ChordBuilder(Builder):
""" This is the Chord builder and it is in charge of plotting
Chord graphs in an easy and intuitive way.
Essentially, we provide a way to ingest the data, make the proper
calculations and push the references into a source object.
We additionally make calculations for the ranges. And finally add
the needed glyphs (markers) taking the references from the source.
"""
default_attributes = {'color': ColorAttr(),
'marker': MarkerAttr(),
'stack': CatAttr()}
dimensions = ['values']
values = Dimension('values')
arcs_data = Instance(ColumnDataSource)
text_data = Instance(ColumnDataSource)
connection_data = Instance(ColumnDataSource)
origin = String()
destination = String()
value = Any()
square_matrix = Bool()
label = Seq(Any())
matrix = Array(Array(Either(Float(), Int())))
[docs] def set_ranges(self):
rng = 1.1 if not self.label else 1.8
self.x_range = Range1d(-rng, rng)
self.y_range = Range1d(-rng, rng)
[docs] def setup(self):
# Process only if not a square_matrix
if not self.square_matrix:
source = self.values._data[self.origin]
target = self.values._data[self.destination]
union = source.append(target).unique()
N = union.shape[0]
m = pd.DataFrame(np.zeros((N, N)), columns=union, index=union)
if not self.label:
self.label = list(union)
if self.value is None:
for _, row in self.values._data.iterrows():
m[row[self.origin]][row[self.destination]] += 1
self.matrix = m.get_values()
if self.value is not None:
if isinstance(self.value, int) or isinstance(self.value, float):
for _, row in self.values._data.iterrows():
m[row[self.origin]][row[self.destination]] = self.value
self.matrix = m.get_values()
elif isinstance(self.value, str):
for _, row in self.values._data.iterrows():
m[row[self.origin]][row[self.destination]] = row[self.value]
self.matrix = m.get_values().T
else:
# It's already a square matrix
self.matrix = self._data.df.get_values()
if self.label:
assert len(self.label) == self.matrix.shape[0]
[docs] def process_data(self):
weights_of_areas = (self.matrix.sum(axis=0) + self.matrix.sum(axis=1)) - self.matrix.diagonal()
areas_in_radians = (weights_of_areas / weights_of_areas.sum()) * (2 * pi)
# We add a zero in the begging for the cumulative sum
points = np.zeros((areas_in_radians.shape[0] + 1))
points[1:] = areas_in_radians
points = points.cumsum()
colors = [color_in_equal_space(area / areas_in_radians.shape[0]) for area in range(areas_in_radians.shape[0])]
arcs_data = pd.DataFrame({
'start_angle': points[:-1],
'end_angle': points[1:],
'line_color': colors
})
self.arcs_data = ColumnDataSource(arcs_data)
# Text
if self.label:
text_radius = 1.1
angles = (points[:-1]+points[1:])/2.0
text_positions = pd.DataFrame({
'angles': angles,
'text_x': np.cos(angles) * text_radius,
'text_y': np.sin(angles) * text_radius,
'text': list(self.label)
})
self.text_data = ColumnDataSource(text_positions)
# Lines
all_areas = []
for i in range(areas_in_radians.shape[0]):
all_areas.append(Area(weights_of_areas[i], points[:-1][i], points[1:][i]))
all_connections = []
for j, region1 in enumerate(self.matrix):
# Get the connections origin region
source = all_areas[j]
color = colors[j]
weight = weights_of_areas[j]
for k, region2 in enumerate(region1):
# Get the connection destination region
target = all_areas[k]
for _ in range(int(region2)):
p1 = source.free_points.pop()
p2 = target.free_points.pop()
# Get both regions free points and create a connection with the data
all_connections.append(p1 + p2 + [color, weight])
connections_df = pd.DataFrame(all_connections, dtype=str)
connections_df.columns = ["start_x", "start_y", "end_x", "end_y", "colors", "weight"]
connections_df["cx0"] = connections_df.start_x.astype("float64")/2
connections_df["cy0"] = connections_df.start_y.astype("float64")/2
connections_df["cx1"] = connections_df.end_x.astype("float64")/2
connections_df["cy1"] = connections_df.end_y.astype("float64")/2
connections_df.weight = (connections_df.weight.astype("float64")/connections_df.weight.astype("float64").sum()) * 3000
self.connection_data = ColumnDataSource(connections_df)
[docs] def yield_renderers(self):
"""Use the marker glyphs to display the arcs and beziers.
Takes reference points from data loaded at the ColumnDataSource.
"""
beziers = Bezier(x0='start_x',
y0='start_y',
x1='end_x',
y1='end_y',
cx0='cx0',
cy0='cy0',
cx1='cx1',
cy1='cy1',
line_alpha='weight',
line_color='colors')
yield GlyphRenderer(data_source=self.connection_data, glyph=beziers)
arcs = Arc(x=0,
y=0,
radius=1,
line_width=10,
start_angle='start_angle',
end_angle='end_angle',
line_color='line_color')
yield GlyphRenderer(data_source=self.arcs_data, glyph=arcs)
if self.label:
text_props = {
"text_color": "#000000",
"text_font_size": "8pt",
"text_align": "left",
"text_baseline": "middle"
}
labels = Text(x='text_x',
y='text_y',
text='text',
angle='angles',
**text_props
)
yield GlyphRenderer(data_source=self.text_data, glyph=labels)
@help(ChordBuilder)
[docs]def Chord(data, source=None, target=None, value=None, square_matrix=False, label=None, xgrid=False, ygrid=False, **kw):
"""
Create a chord chart using :class:`ChordBuilder <bokeh.charts.builders.chord_builder.ChordBuilder>`
to render a chord graph from a variety of value forms.
This chart displays the inter-relationships between data in a matrix.
The data can be generated by the chart interface. Given a :class:`DataFrame <pandas.DataFrame>`,
select two columns to be used as arcs with `source` and `target` attributes, passing by the name of those columns.
The :class:`Chord <bokeh.charts.builders.chord_builder.Chord>` chart will then deduce the
relationship between the arcs.
The value of the connections can be inferred automatically by counting `source` and `target`. If you prefer
you can assign a fixed value for all the connections with `value` simply passing by a number. A third option is to
pass a reference to a third column in the :class:`DataFrame <pandas.DataFrame>` with the values for the connections.
If you want to plot the relationships in a squared matrix, simply pass the matrix and set `square_matrix` attribute
to `True`.
Reference: `Chord diagram on Wikipedia <https://en.wikipedia.org/wiki/Chord_diagram>`_
Args:
data (:ref:`userguide_charts_data_types`): the data source for the chart.
source (list(str) or str, optional): Data source to use as origin of the connection to a destination.
target (list(str) or str, optional): Data source to use as destination of a connection.
value (list(num) or num, optional): The value the connection should have.
square_matrix (bool, optional): If square matrix, avoid any calculations during the setup.
label (list(str), optional): The labels to be put in the areas.
Returns:
:class:`Chart`: includes glyph renderers that generate the chord
Examples:
.. bokeh-plot::
:source-position: above
import pandas as pd
from bokeh.charts import Chord
from bokeh.io import show, output_file
from bokeh.sampledata.les_mis import data
nodes = data['nodes']
links = data['links']
nodes_df = pd.DataFrame(nodes)
links_df = pd.DataFrame(links)
source_data = links_df.merge(nodes_df, how='left', left_on='source', right_index=True)
source_data = source_data.merge(nodes_df, how='left', left_on='target', right_index=True)
source_data = source_data[source_data["value"] > 5] # Select those with 5 or more connections
chord_from_df = Chord(source_data, source="name_x", target="name_y", value="value")
output_file('chord_from_df.html')
show(chord_from_df)
"""
kw["origin"] = source
kw["destination"] = target
kw["value"] = value
kw["square_matrix"] = square_matrix
kw["label"] = label
kw['xgrid'] = xgrid
kw['ygrid'] = ygrid
chart = create_and_build(ChordBuilder, data, **kw)
chart.left[0].visible = False
chart.below[0].visible = False
chart.outline_line_color = None
return chart