Source code for bokeh.charts.attributes

'''

'''
from __future__ import absolute_import

from copy import copy
from itertools import cycle

import pandas as pd

from bokeh.core.enums import DashPattern
from bokeh.core.has_props import HasProps
from bokeh.core.properties import Any, Bool, Dict, Either, Instance, List, Override, String
from bokeh.models.sources import ColumnDataSource

from . import DEFAULT_PALETTE
from .data_source import ChartDataSource
from .properties import ColumnLabel
from .utils import marker_types
from .stats import Bins

[docs]class AttrSpec(HasProps):
    """A container for assigning attributes to values and retrieving them as needed.

    A special function this provides is automatically handling cases where the provided
    iterator is too short compared to the distinct values provided.

    Once created as attr_spec, you can do attr_spec[data_label], where data_label must
    be a one dimensional tuple of values, representing the unique group in the data.

    See the :meth:`AttrSpec.setup` method for the primary way to provide an existing
    AttrSpec with data and column values and update all derived property values.
    """

    data = Instance(ColumnDataSource)

    iterable = List(Any, default=None)

    attrname = String(help='Name of the attribute the spec provides.')

    columns = Either(ColumnLabel, List(ColumnLabel), help="""
        The label or list of column labels that correspond to the columns that will be
        used to find all distinct values (single column) or combination of values (
        multiple columns) to then assign a unique attribute to. If not enough unique
        attribute values are found, then the attribute values will be cycled.
        """)

    default = Any(default=None, help="""
        The default value for the attribute, which is used if no column is assigned to
        the attribute for plotting. If the default value is not provided, the first
        value in the `iterable` property is used.
        """)

    attr_map = Dict(Any, Any, help="""
        Created by the attribute specification when `iterable` and `data` are
        available. The `attr_map` will include a mapping between the distinct value(s)
        found in `columns` and the attribute value that has been assigned.
        """)

    items = Any(default=None, help="""
        The attribute specification calculates this list of distinct values that are
        found in `columns` of `data`.
        """)

    sort = Bool(default=True, help="""
        A boolean flag to tell the attribute specification to sort `items`, when it is
        calculated. This affects which value of `iterable` is assigned to each distinct
        value in `items`.
        """)

    ascending = Bool(default=True, help="""
        A boolean flag to tell the attribute specification how to sort `items` if the
        `sort` property is set to `True`. The default setting for `ascending` is `True`.
        """)

    bins = Instance(Bins, help="""
        If an attribute spec is binning data, so that we can map one value in the
        `iterable` to one value in `items`, then this attribute will contain an instance
        of the Bins stat. This is used to create unique labels for each bin, which is
        then used for `items` instead of the actual unique values in `columns`.
        """)

[docs]    def __init__(self, columns=None, df=None, iterable=None, default=None,
                 items=None, **properties):
        """Create a lazy evaluated attribute specification.

        Args:
            columns: a list of column labels
            df(:class:`~pandas.DataFrame`): the data source for the attribute spec.
            iterable: an iterable of distinct attribute values
            default: a value to use as the default attribute when no columns are passed
            items: the distinct values in columns. If items is provided as input,
                then the values provided are used instead of being calculated. This can
                be used to force a specific order for assignment.
            **properties: other properties to pass to parent :class:`HasProps`
        """
        properties['columns'] = self._ensure_list(columns)

        if df is not None:
            properties['data'] = ColumnDataSource(df)

        if default is None and iterable is not None:
            default_iter = copy(iterable)
            properties['default'] = next(iter(default_iter))
        elif default is not None:
            properties['default'] = default

        if iterable is not None:
            properties['iterable'] = iterable

        if items is not None:
            properties['items'] = items

        super(AttrSpec, self).__init__(**properties)

        if self.default is None and self.iterable is not None:
            self.default = next(iter(copy(self.iterable)))

        if self.data is not None and self.columns is not None:
            if df is None:
                df = self.data.to_df()

            self._generate_items(df, columns=self.columns)

        if self.items is not None and self.iterable is not None:
            self.attr_map = self._create_attr_map()

    @staticmethod
    def _ensure_list(attr):
        """Always returns a list with the provided value. Returns the value if a list."""
        if isinstance(attr, str):
            return [attr]
        elif isinstance(attr, tuple):
            return list(attr)
        else:
            return attr

    @staticmethod
    def _ensure_tuple(attr):
        """Return tuple with the provided value. Returns the value if a tuple."""
        if not isinstance(attr, tuple):
            return (attr,)
        else:
            return attr

    def _setup_default(self):
        """Stores the first value of iterable into `default` property."""
        self.default = next(self._setup_iterable())

    def _setup_iterable(self):
        """Default behavior is to copy and cycle the provided iterable."""
        return cycle(copy(self.iterable))

    def _generate_items(self, df, columns):
        """Produce list of unique tuples that identify each item."""
        if self.sort:
            # TODO (fpliger):   this handles pandas API change so users do not experience
            #                   the related annoying deprecation warning. This is probably worth
            #                   removing when pandas deprecated version (0.16) is "old" enough
            try:
                df = df.sort_values(by=columns, ascending=self.ascending)
            except AttributeError:
                df = df.sort(columns=columns, ascending=self.ascending)

        items = df[columns].drop_duplicates()
        self.items = [tuple(x) for x in items.to_records(index=False)]

    def _create_attr_map(self, df=None, columns=None):
        """Creates map between unique values and available attributes."""

        if df is not None and columns is not None:
            self._generate_items(df, columns)

        iterable = self._setup_iterable()

        return {item: next(iterable) for item in self._item_tuples()}

    def _item_tuples(self):
        return [self._ensure_tuple(item) for item in self.items]

[docs]    def set_columns(self, columns):
        """Set columns property and update derived properties as needed."""
        columns = self._ensure_list(columns)

        if all([col in self.data.column_names for col in columns]):
            self.columns = columns
        else:
            # we have input values other than columns
            # assume this is now the iterable at this point
            self.iterable = columns
            self._setup_default()

[docs]    def setup(self, data=None, columns=None):
        """Set the data and update derived properties as needed."""
        if data is not None:
            self.data = data

        if columns is not None and self.data is not None:
            self.set_columns(columns)

        if self.columns is not None and self.data is not None:
            self.attr_map = self._create_attr_map(self.data.to_df(), self.columns)

[docs]    def update_data(self, data):
        self.setup(data=data, columns=self.columns)

    def __getitem__(self, item):
        """Lookup the attribute to use for the given unique group label."""

        if not self.attr_map:
            return self.default
        elif self._ensure_tuple(item) not in self.attr_map.keys():

            # make sure we have attr map
            self.setup()

        return self.attr_map[self._ensure_tuple(item)]

    @property
    def series(self):
        if not self.attr_map:
            return pd.Series()
        else:
            index = pd.MultiIndex.from_tuples(self._item_tuples(), names=self.columns)
            return pd.Series(list(self.attr_map.values()), index=index)


[docs]class ColorAttr(AttrSpec):
    """An attribute specification for mapping unique data values to colors.

    .. note::
        Should be expanded to support more complex coloring options.
    """
    attrname = Override(default='color')
    iterable = Override(default=DEFAULT_PALETTE)
    bin = Bool(default=False)

    def __init__(self, **kwargs):
        iterable = kwargs.pop('palette', None)
        if iterable is not None:
            kwargs['iterable'] = iterable
        super(ColorAttr, self).__init__(**kwargs)

    def _generate_items(self, df, columns):
        """Produce list of unique tuples that identify each item."""
        if not self.bin:
            super(ColorAttr, self)._generate_items(df, columns)
        else:

            if len(columns) == 1 and ChartDataSource.is_number(df[columns[0]]):

                self.bins = Bins(source=ColumnDataSource(df), column=columns[0],
                                 bins=len(self.iterable), aggregate=False)

                if self.sort:
                    self.bins.sort(ascending=self.ascending)

                self.items = [bin.label[0] for bin in self.bins]
            else:
                raise ValueError('Binned colors can only be created for one column of \
                                 numerical data.')

[docs]    def add_bin_labels(self, data):
        col = self.columns[0]
        # save original values into new column
        data._data[col + '_values'] = data._data[col]

        for bin in self.bins:
            # set all rows associated to each bin to the bin label being mapped to colors
            data._data.ix[data._data[col + '_values'].isin(bin.values),
                          col] = bin.label[0]

        data._data[col] = pd.Categorical(data._data[col], categories=list(self.items),
                                         ordered=self.sort)


[docs]class MarkerAttr(AttrSpec):
    """An attribute specification for mapping unique data values to markers."""
    attrname = Override(default='marker')
    iterable = Override(default=list(marker_types.keys()))

    def __init__(self, **kwargs):
        iterable = kwargs.pop('markers', None)
        if iterable is not None:
            kwargs['iterable'] = iterable
        super(MarkerAttr, self).__init__(**kwargs)


dashes = DashPattern._values


[docs]class DashAttr(AttrSpec):
    """An attribute specification for mapping unique data values to line dashes."""
    attrname = Override(default='dash')
    iterable = Override(default=dashes)

    def __init__(self, **kwargs):
        iterable = kwargs.pop('dash', None)
        if iterable is not None:
            kwargs['iterable'] = iterable
        super(DashAttr, self).__init__(**kwargs)


class IdAttr(AttrSpec):
    """An attribute specification for mapping unique data values to line dashes."""
    attrname = Override(default='id')

    def _setup_iterable(self):
        return iter(range(0, len(self.items)))


[docs]class CatAttr(AttrSpec):
    """An attribute specification for mapping unique data values to labels.

    .. note::
        this is a special attribute specification, which is used for defining which
        labels are used for one aspect of a chart (grouping) vs another (stacking or
        legend)
    """
    attrname = Override(default='nest')

    def __init__(self, **kwargs):
        super(CatAttr, self).__init__(**kwargs)

    def _setup_iterable(self):
        return iter(self.items)

[docs]    def get_levels(self, columns):
        """Provides a list of levels the attribute represents."""
        if self.columns is not None:
            levels = [columns.index(col) for col in self.columns]
            return levels
        else:
            return []


""" Attribute Spec Functions

Convenient functions for producing attribute specifications. These would be
the interface used by end users when providing attribute specs as inputs
to the Chart.
"""


[docs]def color(columns=None, palette=None, bin=False, **kwargs):
    """Produces a ColorAttr specification for coloring groups of data based on columns.

    Args:
        columns (str or list(str), optional): a column or list of columns for coloring
        palette (list(str), optional): a list of colors to use for assigning to unique
            values in `columns`.
        **kwargs: any keyword, arg supported by :class:`AttrSpec`

    Returns:
        a `ColorAttr` object
    """
    if palette is not None:
        kwargs['palette'] = palette

    kwargs['columns'] = columns
    kwargs['bin'] = bin
    return ColorAttr(**kwargs)


[docs]def marker(columns=None, markers=None, **kwargs):

    """ Specifies detailed configuration for a marker attribute.

    Args:
        columns (list or str):
        markers (list(str) or str): a custom list of markers. Must exist within
            :data:`marker_types`.
        **kwargs: any keyword, arg supported by :class:`AttrSpec`

    Returns:
        a `MarkerAttr` object
    """
    if markers is not None:
        kwargs['markers'] = markers

    kwargs['columns'] = columns
    return MarkerAttr(**kwargs)


[docs]def cat(columns=None, cats=None, sort=True, ascending=True, **kwargs):
    """ Specifies detailed configuration for a chart attribute that uses categoricals.

    Args:
        columns (list or str): the columns used to generate the categorical variable
        cats (list, optional): overrides the values derived from columns
        sort (bool, optional): whether to sort the categorical values (default=True)
        ascending (bool, optional): whether to sort the categorical values (default=True)
        **kwargs: any keyword, arg supported by :class:`AttrSpec`

    Returns:
        a `CatAttr` object
    """
    if cats is not None:
        kwargs['cats'] = cats

    kwargs['columns'] = columns
    kwargs['sort'] = sort
    kwargs['ascending'] = ascending

    return CatAttr(**kwargs)