Source code for holoviews.element.stats

import param
import numpy as np

from ..core.dimension import Dimension, process_dimensions
from ..core.data import Dataset
from ..core.element import Element, Element2D
from ..core.util import get_param_values, unique_iterator, OrderedDict
from .selection import Selection1DExpr, Selection2DExpr


[docs]class StatisticsElement(Dataset, Element2D):
    """
    StatisticsElement provides a baseclass for Element types that
    compute statistics based on the input data, usually a density.
    The value dimension of such elements are therefore usually virtual
    and not computed until the element is plotted.
    """

    __abstract = True

    # Ensure Interface does not add an index
    _auto_indexable_1d = False

    def __init__(self, data, kdims=None, vdims=None, **params):
        if (isinstance(data, Element) and
                data.interface.datatype != "dataframe"):
            params.update(get_param_values(data))
            kdims = kdims or data.dimensions()[:len(self.kdims)]
            data = tuple(data.dimension_values(d) for d in kdims)
        params.update(dict(kdims=kdims, vdims=[], _validate_vdims=False))
        super(StatisticsElement, self).__init__(data, **params)
        if not vdims:
            self.vdims = [Dimension('Density')]
        elif len(vdims) > 1:
            raise ValueError("%s expects at most one vdim." %
                             type(self).__name__)
        else:
            self.vdims = process_dimensions(None, vdims)['vdims']

    @property
    def dataset(self):
        """
        The Dataset that this object was created from
        """
        from . import Dataset
        if self._dataset is None:
            datatype = list(unique_iterator(self.datatype+Dataset.datatype))
            dataset = Dataset(self, dataset=None, pipeline=None, transforms=None,
                              vdims=[], datatype=datatype)
            return dataset
        elif not isinstance(self._dataset, Dataset):
            return Dataset(self, _validate_vdims=False, **self._dataset)
        return self._dataset


    def range(self, dim, data_range=True, dimension_range=True):
        """Return the lower and upper bounds of values along dimension.

        Args:
            dimension: The dimension to compute the range on.
            data_range (bool): Compute range from data values
            dimension_range (bool): Include Dimension ranges
                Whether to include Dimension range and soft_range
                in range calculation

        Returns:
            Tuple containing the lower and upper bound
        """
        iskdim = self.get_dimension(dim) not in self.vdims
        return super(StatisticsElement, self).range(dim, iskdim, dimension_range)


    def dimension_values(self, dim, expanded=True, flat=True):
        """Return the values along the requested dimension.

        Args:
            dimension: The dimension to return values for
            expanded (bool, optional): Whether to expand values
                Whether to return the expanded values, behavior depends
                on the type of data:
                  * Columnar: If false returns unique values
                  * Geometry: If false returns scalar values per geometry
                  * Gridded: If false returns 1D coordinates
            flat (bool, optional): Whether to flatten array

        Returns:
            NumPy array of values along the requested dimension
        """
        dim = self.get_dimension(dim, strict=True)
        if dim in self.vdims:
            return np.full(len(self), np.NaN)
        return self.interface.values(self, dim, expanded, flat)


    def get_dimension_type(self, dim):
        """Get the type of the requested dimension.

        Type is determined by Dimension.type attribute or common
        type of the dimension values, otherwise None.

        Args:
            dimension: Dimension to look up by name or by index

        Returns:
            Declared type of values along the dimension
        """
        dim = self.get_dimension(dim)
        if dim is None:
            return None
        elif dim.type is not None:
            return dim.type
        elif dim in self.vdims:
            return np.float64
        return self.interface.dimension_type(self, dim)


    def dframe(self, dimensions=None, multi_index=False):
        """Convert dimension values to DataFrame.

        Returns a pandas dataframe of columns along each dimension,
        either completely flat or indexed by key dimensions.

        Args:
            dimensions: Dimensions to return as columns
            multi_index: Convert key dimensions to (multi-)index

        Returns:
            DataFrame of columns corresponding to each dimension
        """
        if dimensions:
            dimensions = [self.get_dimension(d, strict=True) for d in dimensions]
        else:
            dimensions = self.kdims
        vdims = [d for d in dimensions if d in self.vdims]
        if vdims:
            raise ValueError('%s element does not hold data for value '
                             'dimensions. Could not return data for %s '
                             'dimension(s).' %
                             (type(self).__name__, ', '.join([d.name for d in vdims])))
        return super(StatisticsElement, self).dframe(dimensions, False)


    def columns(self, dimensions=None):
        """Convert dimension values to a dictionary.

        Returns a dictionary of column arrays along each dimension
        of the element.

        Args:
            dimensions: Dimensions to return as columns

        Returns:
            Dictionary of arrays for each dimension
        """
        if dimensions is None:
            dimensions = self.kdims
        else:
            dimensions = [self.get_dimension(d, strict=True) for d in dimensions]
        vdims = [d for d in dimensions if d in self.vdims]
        if vdims:
            raise ValueError('%s element does not hold data for value '
                             'dimensions. Could not return data for %s '
                             'dimension(s).' %
                             (type(self).__name__, ', '.join([d.name for d in vdims])))
        return OrderedDict([(d.name, self.dimension_values(d)) for d in dimensions])



[docs]class Bivariate(Selection2DExpr, StatisticsElement):
    """
    Bivariate elements are containers for two dimensional data, which
    is to be visualized as a kernel density estimate. The data should
    be supplied in a tabular format of x- and y-columns.
    """

    group = param.String(default="Bivariate", constant=True)

    kdims = param.List(default=[Dimension('x'), Dimension('y')],
                       bounds=(2, 2))

    vdims = param.List(default=[Dimension('Density')], bounds=(0,1))



[docs]class Distribution(Selection1DExpr, StatisticsElement):
    """
    Distribution elements provides a representation for a
    one-dimensional distribution which can be visualized as a kernel
    density estimate. The data should be supplied in a tabular format
    and will use the first column.
    """

    group = param.String(default='Distribution', constant=True)

    kdims = param.List(default=[Dimension('Value')], bounds=(1, 1))

    vdims = param.List(default=[Dimension('Density')], bounds=(0, 1))



[docs]class BoxWhisker(Selection1DExpr, Dataset, Element2D):
    """
    BoxWhisker represent data as a distributions highlighting the
    median, mean and various percentiles. It may have a single value
    dimension and any number of key dimensions declaring the grouping
    of each violin.
    """

    group = param.String(default='BoxWhisker', constant=True)

    kdims = param.List(default=[], bounds=(0, None))

    vdims = param.List(default=[Dimension('y')], bounds=(1,1))

    _inverted_expr = True


[docs]class Violin(BoxWhisker):
    """
    Violin elements represent data as 1D distributions visualized
    as a kernel-density estimate. It may have a single value dimension
    and any number of key dimensions declaring the grouping of each
    violin.
    """

    group = param.String(default='Violin', constant=True)


[docs]class HexTiles(Selection2DExpr, Dataset, Element2D):
    """
    HexTiles is a statistical element with a visual representation
    that renders a density map of the data values as a hexagonal grid.

    Before display the data is aggregated either by counting the values
    in each hexagonal bin or by computing aggregates.
    """

    group = param.String(default='HexTiles', constant=True)

    kdims = param.List(default=[Dimension('x'), Dimension('y')],
                       bounds=(2, 2))