Source code for holoviews.core.data

from __future__ import absolute_import

try:
    import itertools.izip as zip
except ImportError:
    pass

import types
import copy

import numpy as np
import param

from param.parameterized import add_metaclass, ParameterizedMetaclass

from .. import util
from ..accessors import Redim
from ..dimension import (
    Dimension, Dimensioned, LabelledData, dimension_name, process_dimensions
)
from ..element import Element
from ..ndmapping import OrderedDict, MultiDimensionalMapping
from ..spaces import HoloMap, DynamicMap
from .interface import Interface, iloc, ndloc
from .array import ArrayInterface
from .dictionary import DictInterface
from .grid import GridInterface
from .multipath import MultiInterface         # noqa (API import)
from .image import ImageInterface             # noqa (API import)

default_datatype = 'dictionary'
datatypes = ['dictionary', 'grid']

try:
    import pandas as pd # noqa (Availability import)
    from .pandas import PandasInterface
    default_datatype = 'dataframe'
    datatypes.insert(0, 'dataframe')
    DFColumns = PandasInterface
except ImportError:
    pd = None
except Exception as e:
    pd = None
    param.main.param.warning('Pandas interface failed to import with '
                             'following error: %s' % e)

try:
    from .spatialpandas import SpatialPandasInterface # noqa (API import)
    datatypes.append('spatialpandas')
except ImportError:
    pass

try:
    from .xarray import XArrayInterface # noqa (Conditional API import)
    datatypes.append('xarray')
except ImportError:
    pass

try:
    from .cudf import cuDFInterface   # noqa (Conditional API import)
    datatypes.append('cuDF')
except ImportError:
    pass

try:
    from .dask import DaskInterface   # noqa (Conditional API import)
    datatypes.append('dask')
except ImportError:
    pass

if 'array' not in datatypes:
    datatypes.append('array')
if 'multitabular' not in datatypes:
    datatypes.append('multitabular')


[docs]def concat(datasets, datatype=None):
    """Concatenates collection of datasets along NdMapping dimensions.

    Concatenates multiple datasets wrapped in an NdMapping type along
    all of its dimensions. Before concatenation all datasets are cast
    to the same datatype, which may be explicitly defined or
    implicitly derived from the first datatype that is
    encountered. For columnar data concatenation adds the columns for
    the dimensions being concatenated along and then concatenates all
    the old and new columns. For gridded data a new axis is created
    for each dimension being concatenated along and then
    hierarchically concatenates along each dimension.

    Args:
        datasets: NdMapping of Datasets to concatenate
        datatype: Datatype to cast data to before concatenation

    Returns:
        Concatenated dataset
    """
    return Interface.concatenate(datasets, datatype)


[docs]class DataConversion(object):
    """
    DataConversion is a very simple container object which can be
    given an existing Dataset Element and provides methods to convert
    the Dataset into most other Element types.
    """

    def __init__(self, element):
        self._element = element

    def __call__(self, new_type, kdims=None, vdims=None, groupby=None,
                 sort=False, **kwargs):
        """
        Generic conversion method for Dataset based Element
        types. Supply the Dataset Element type to convert to and
        optionally the key dimensions (kdims), value dimensions
        (vdims) and the dimensions.  to group over. Converted Columns
        can be automatically sorted via the sort option and kwargs can
        be passed through.
        """
        element_params = new_type.param.objects()
        kdim_param = element_params['kdims']
        vdim_param = element_params['vdims']
        if isinstance(kdim_param.bounds[1], int):
            ndim = min([kdim_param.bounds[1], len(kdim_param.default)])
        else:
            ndim = None
        nvdim = vdim_param.bounds[1] if isinstance(vdim_param.bounds[1], int) else None
        if kdims is None:
            kd_filter = groupby or []
            if not isinstance(kd_filter, list):
                kd_filter = [groupby]
            kdims = [kd for kd in self._element.kdims if kd not in kd_filter][:ndim]
        elif kdims and not isinstance(kdims, list): kdims = [kdims]
        if vdims is None:
            vdims = [d for d in self._element.vdims if d not in kdims][:nvdim]
        if vdims and not isinstance(vdims, list): vdims = [vdims]

        # Checks Element type supports dimensionality
        type_name = new_type.__name__
        for dim_type, dims in (('kdims', kdims), ('vdims', vdims)):
            min_d, max_d = element_params[dim_type].bounds
            if ((min_d is not None and len(dims) < min_d) or
                (max_d is not None and len(dims) > max_d)):
                raise ValueError("%s %s must be between length %s and %s." %
                                 (type_name, dim_type, min_d, max_d))

        if groupby is None:
            groupby = [d for d in self._element.kdims if d not in kdims+vdims]
        elif groupby and not isinstance(groupby, list):
            groupby = [groupby]

        if self._element.interface.gridded:
            dropped_kdims = [kd for kd in self._element.kdims if kd not in groupby+kdims]
            if dropped_kdims:
                selected = self._element.reindex(groupby+kdims, vdims)
            else:
                selected = self._element
        else:
            if pd and issubclass(self._element.interface, PandasInterface):
                ds_dims = self._element.dimensions()
                ds_kdims = [self._element.get_dimension(d) if d in ds_dims else d
                            for d in groupby+kdims]
                ds_vdims = [self._element.get_dimension(d) if d in ds_dims else d
                            for d in vdims]
                selected = self._element.clone(kdims=ds_kdims, vdims=ds_vdims)
            else:
                selected = self._element.reindex(groupby+kdims, vdims)
        params = {'kdims': [selected.get_dimension(kd, strict=True) for kd in kdims],
                  'vdims': [selected.get_dimension(vd, strict=True) for vd in vdims],
                  'label': selected.label}
        if selected.group != selected.param.objects('existing')['group'].default:
            params['group'] = selected.group
        params.update(kwargs)
        if len(kdims) == selected.ndims or not groupby:
            # Propagate dataset
            params['dataset'] = self._element.dataset
            params['pipeline'] = self._element._pipeline
            element = new_type(selected, **params)
            return element.sort() if sort else element
        group = selected.groupby(groupby, container_type=HoloMap,
                                 group_type=new_type, **params)
        if sort:
            return group.map(lambda x: x.sort(), [new_type])
        else:
            return group


[docs]class PipelineMeta(ParameterizedMetaclass):

    # Public methods that should not be wrapped
    blacklist = ['__init__', 'clone']

    def __new__(mcs, classname, bases, classdict):

        for method_name in classdict:
            method_fn = classdict[method_name]
            if method_name in mcs.blacklist or method_name.startswith('_'):
                continue
            elif isinstance(method_fn, types.FunctionType):
                classdict[method_name] = mcs.pipelined(method_fn, method_name)

        inst = type.__new__(mcs, classname, bases, classdict)
        return inst

    @staticmethod
    def pipelined(method_fn, method_name):
        def pipelined_fn(*args, **kwargs):
            from ...operation.element import method as method_op
            inst = args[0]
            inst_pipeline = copy.copy(getattr(inst, '_pipeline', None))
            in_method = inst._in_method
            if not in_method:
                inst._in_method = True

            try:
                result = method_fn(*args, **kwargs)

                op = method_op.instance(
                    input_type=type(inst),
                    method_name=method_name,
                    args=list(args[1:]),
                    kwargs=kwargs,
                )

                if not in_method:
                    if isinstance(result, Dataset):
                        result._pipeline = inst_pipeline.instance(
                            operations=inst_pipeline.operations + [op],
                            output_type=type(result),
                        )

                    elif isinstance(result, MultiDimensionalMapping):
                        for key, element in result.items():
                            if isinstance(element, Dataset):
                                getitem_op = method_op.instance(
                                    input_type=type(result),
                                    method_name='__getitem__',
                                    args=[key]
                                )
                                element._pipeline = inst_pipeline.instance(
                                    operations=inst_pipeline.operations + [
                                        op, getitem_op
                                    ],
                                    output_type=type(result),
                                )
            finally:
                if not in_method:
                    inst._in_method = False
            return result

        pipelined_fn.__doc__ = method_fn.__doc__

        return pipelined_fn


[docs]@add_metaclass(PipelineMeta)
class Dataset(Element):
    """
    Dataset provides a general baseclass for Element types that
    contain structured data and supports a range of data formats.

    The Dataset class supports various methods offering a consistent
    way of working with the stored data regardless of the storage
    format used. These operations include indexing, selection and
    various ways of aggregating or collapsing the data with a supplied
    function.
    """

    datatype = param.List(datatypes, doc="""
        A priority list of the data types to be used for storage
        on the .data attribute. If the input supplied to the element
        constructor cannot be put into the requested format, the next
        format listed will be used until a suitable format is found (or
        the data fails to be understood).""")

    group = param.String(default='Dataset', constant=True)

    # In the 1D case the interfaces should not automatically add x-values
    # to supplied data
    _auto_indexable_1d = False

    # Define a class used to transform Datasets into other Element types
    _conversion_interface = DataConversion

    # Whether the key dimensions are specified as bins
    _binned = False

    _vdim_reductions = {}
    _kdim_reductions = {}

    def __new__(cls, data=None, kdims=None, vdims=None, **kwargs):
        """
        Allows casting a DynamicMap to an Element class like hv.Curve, by applying the
        class to each underlying element.
        """
        if isinstance(data, DynamicMap):
            class_name = cls.__name__
            repr_kdims = 'kdims=%r' % kdims if kdims else None
            repr_vdims = 'vdims=%r' % vdims if vdims else None
            repr_kwargs = (', '.join('%s=%r' % (k,v) for k,v in kwargs.items())
                           if kwargs else None)
            extras = ', '.join([el for el in [repr_kdims, repr_vdims, repr_kwargs]
                               if el is not None])
            extras = ', ' + extras if extras else ''
            apply_args= 'hv.{class_name}{extras}'.format(class_name=class_name,
                                                         extras=extras)
            msg = "Cannot construct a {class_name} from the supplied object of type DynamicMap. Implicitly creating a DynamicMap of {class_name} objects, but instead please explicitly call .apply({apply_args}) on the supplied DynamicMap."
            cls.param.warning(cls, msg.format(class_name=class_name, apply_args=apply_args))
            return data.apply(cls, per_element=True, kdims=kdims, vdims=vdims, **kwargs)
        else:
            return super(Dataset, cls).__new__(cls)

    def __init__(self, data, kdims=None, vdims=None, **kwargs):
        from ...operation.element import (
            chain as chain_op, factory
        )
        self._in_method = False
        input_data = data
        dataset_provided = 'dataset' in kwargs
        input_dataset = kwargs.pop('dataset', None)
        input_pipeline = kwargs.pop('pipeline', None)
        input_transforms = kwargs.pop('transforms', None)

        if isinstance(data, Element):
            if 'kdims' in kwargs:
                kwargs['kdims'] = [
                    data.get_dimension(kd) if isinstance(kd, util.basestring) else kd
                    for kd in kwargs['kdims']
                ]
            if 'kdims' in kwargs:
                kwargs['vdims'] = [
                    data.get_dimension(vd) if isinstance(vd, util.basestring) else vd
                    for vd in kwargs['vdims']
                ]
            pvals = util.get_param_values(data)
            kwargs.update([(l, pvals[l]) for l in ['group', 'label']
                           if l in pvals and l not in kwargs])
        if isinstance(data, Dataset):
            if not dataset_provided and data._dataset is not None:
                input_dataset = data._dataset
            if input_pipeline is None:
                input_pipeline = data.pipeline
            if input_transforms is None:
                input_transforms = data._transforms

        kwargs.update(process_dimensions(kdims, vdims))
        kdims, vdims = kwargs.get('kdims'), kwargs.get('vdims')

        validate_vdims = kwargs.pop('_validate_vdims', True)
        initialized = Interface.initialize(type(self), data, kdims, vdims,
                                           datatype=kwargs.get('datatype'))
        (data, self.interface, dims, extra_kws) = initialized
        super(Dataset, self).__init__(data, **dict(kwargs, **dict(dims, **extra_kws)))
        self.interface.validate(self, validate_vdims)

        # Handle _pipeline property
        if input_pipeline is None:
            input_pipeline = chain_op.instance()

        kwargs['kdims'] = self.kdims
        kwargs['vdims'] = self.vdims
        init_op = factory.instance(
            output_type=type(self),
            args=[],
            kwargs=dict(kwargs, kdims=self.kdims, vdims=self.vdims),
        )
        self._pipeline = input_pipeline.instance(
            operations=input_pipeline.operations + [init_op],
            output_type=type(self),
        )
        self._transforms = input_transforms or []

        # Handle initializing the dataset property.
        self._dataset = input_dataset
        if self._dataset is None and isinstance(input_data, Dataset) and not dataset_provided:
            if input_data.data is self.data:
                self._dataset = {'kdims': input_data.kdims, 'vdims': input_data.vdims}
            else:
                self._dataset = Dataset(input_data, dataset=None, pipeline=None,
                                        transforms=None, _validate_vdims=False)
                if hasattr(self, '_binned'):
                    self._dataset._binned = self._binned

    @property
    def redim(self):
        return Redim(self, mode='dataset')

    @property
    def dataset(self):
        """
        The Dataset that this object was created from
        """
        if self._dataset is None:
            if type(self) is Dataset:
                return self
            datatype = list(util.unique_iterator(self.datatype+Dataset.datatype))
            dataset = Dataset(self, dataset=None, pipeline=None, transforms=None,
                              _validate_vdims=False, datatype=datatype)
            if hasattr(self, '_binned'):
                dataset._binned = self._binned
            return dataset
        elif not isinstance(self._dataset, Dataset):
            return Dataset(self, _validate_vdims=False, **self._dataset)
        return self._dataset


    @property
    def pipeline(self):
        """
        Chain operation that evaluates the sequence of operations that was
        used to create this object, starting with the Dataset stored in
        dataset property
        """
        return self._pipeline

[docs]    def closest(self, coords=[], **kwargs):
        """Snaps coordinate(s) to closest coordinate in Dataset

        Args:
            coords: List of coordinates expressed as tuples
            **kwargs: Coordinates defined as keyword pairs

        Returns:
            List of tuples of the snapped coordinates

        Raises:
            NotImplementedError: Raised if snapping is not supported
        """
        if self.ndims > 1:
            raise NotImplementedError("Closest method currently only "
                                      "implemented for 1D Elements")

        if kwargs:
            if len(kwargs) > 1:
                raise NotImplementedError("Closest method currently only "
                                          "supports 1D indexes")
            samples = list(kwargs.values())[0]
            coords = samples if isinstance(samples, list) else [samples]

        xs = self.dimension_values(0)
        if xs.dtype.kind in 'SO':
            raise NotImplementedError("Closest only supported for numeric types")
        idxs = [np.argmin(np.abs(xs-coord)) for coord in coords]
        return [xs[idx] for idx in idxs]


[docs]    def sort(self, by=None, reverse=False):
        """
        Sorts the data by the values along the supplied dimensions.

        Args:
            by: Dimension(s) to sort by
            reverse (bool, optional): Reverse sort order

        Returns:
            Sorted Dataset
        """
        if by is None:
            by = self.kdims
        elif not isinstance(by, list):
            by = [by]
        sorted_columns = self.interface.sort(self, by, reverse)
        return self.clone(sorted_columns)


[docs]    def range(self, dim, data_range=True, dimension_range=True):
        """Return the lower and upper bounds of values along dimension.

        Args:
            dimension: The dimension to compute the range on.
            data_range (bool): Compute range from data values
            dimension_range (bool): Include Dimension ranges
                Whether to include Dimension range and soft_range
                in range calculation

        Returns:
            Tuple containing the lower and upper bound
        """
        dim = self.get_dimension(dim)

        if dim is None or (not data_range and not dimension_range):
            return (None, None)
        elif all(util.isfinite(v) for v in dim.range) and dimension_range:
            return dim.range
        elif dim in self.dimensions() and data_range and bool(self):
            lower, upper = self.interface.range(self, dim)
        else:
            lower, upper = (np.NaN, np.NaN)
        if not dimension_range:
            return lower, upper
        return util.dimension_range(lower, upper, dim.range, dim.soft_range)


[docs]    def add_dimension(self, dimension, dim_pos, dim_val, vdim=False, **kwargs):
        """Adds a dimension and its values to the Dataset

        Requires the dimension name or object, the desired position in
        the key dimensions and a key value scalar or array of values,
        matching the length or shape of the Dataset.

        Args:
            dimension: Dimension or dimension spec to add
            dim_pos (int): Integer index to insert dimension at
            dim_val (scalar or ndarray): Dimension value(s) to add
            vdim: Disabled, this type does not have value dimensions
            **kwargs: Keyword arguments passed to the cloned element
        Returns:
            Cloned object containing the new dimension
        """
        if isinstance(dimension, (util.basestring, tuple)):
            dimension = Dimension(dimension)

        if dimension.name in self.kdims:
            raise Exception('{dim} dimension already defined'.format(dim=dimension.name))

        if vdim:
            dims = self.vdims[:]
            dims.insert(dim_pos, dimension)
            dimensions = dict(vdims=dims)
            dim_pos += self.ndims
        else:
            dims = self.kdims[:]
            dims.insert(dim_pos, dimension)
            dimensions = dict(kdims=dims)

        if issubclass(self.interface, ArrayInterface) and np.asarray(dim_val).dtype != self.data.dtype:
            element = self.clone(datatype=[default_datatype])
            data = element.interface.add_dimension(element, dimension, dim_pos, dim_val, vdim)
        else:
            data = self.interface.add_dimension(self, dimension, dim_pos, dim_val, vdim)
        return self.clone(data, **dimensions)


[docs]    def select(self, selection_expr=None, selection_specs=None, **selection):
        """Applies selection by dimension name

        Applies a selection along the dimensions of the object using
        keyword arguments. The selection may be narrowed to certain
        objects using selection_specs. For container objects the
        selection will be applied to all children as well.

        Selections may select a specific value, slice or set of values:

        * value: Scalar values will select rows along with an exact
                 match, e.g.:

            ds.select(x=3)

        * slice: Slices may be declared as tuples of the upper and
                 lower bound, e.g.:

            ds.select(x=(0, 3))

        * values: A list of values may be selected using a list or
                  set, e.g.:

            ds.select(x=[0, 1, 2])

        * predicate expression: A holoviews.dim expression, e.g.:

            from holoviews import dim
            ds.select(selection_expr=dim('x') % 2 == 0)

        Args:
            selection_expr: holoviews.dim predicate expression
                specifying selection.
            selection_specs: List of specs to match on
                A list of types, functions, or type[.group][.label]
                strings specifying which objects to apply the
                selection on.
            **selection: Dictionary declaring selections by dimension
                Selections can be scalar values, tuple ranges, lists
                of discrete values and boolean arrays

        Returns:
            Returns an Dimensioned object containing the selected data
            or a scalar if a single value was selected
        """
        from ...util.transform import dim
        if selection_expr is not None and not isinstance(selection_expr, dim):
            raise ValueError("""\
The first positional argument to the Dataset.select method is expected to be a
holoviews.util.transform.dim expression. Use the selection_specs keyword
argument to specify a selection specification""")

        if selection_specs is not None and not isinstance(selection_specs, (list, tuple)):
            selection_specs = [selection_specs]
        selection = {dim_name: sel for dim_name, sel in selection.items()
                     if dim_name in self.dimensions()+['selection_mask']}
        if (selection_specs and not any(self.matches(sp) for sp in selection_specs)
            or (not selection and not selection_expr)):
            return self

        # Handle selection dim expression
        if selection_expr is not None:
            mask = selection_expr.apply(self, compute=False, keep_index=True)
            dataset = self[mask]
        else:
            dataset = self

        # Handle selection kwargs
        if selection:
            data = dataset.interface.select(dataset, **selection)
        else:
            data = dataset.data

        if np.isscalar(data):
            return data
        else:
            return self.clone(data)


[docs]    def reindex(self, kdims=None, vdims=None):
        """Reindexes Dataset dropping static or supplied kdims

        Creates a new object with a reordered or reduced set of key
        dimensions. By default drops all non-varying key dimensions.x

        Args:
            kdims (optional): New list of key dimensionsx
            vdims (optional): New list of value dimensions

        Returns:
            Reindexed object
        """
        gridded = self.interface.gridded
        scalars = []
        if gridded:
            coords = [(d, self.interface.coords(self, d.name)) for d in self.kdims]
            scalars = [d for d, vs in coords if len(vs) == 1]

        if kdims is None:
            # If no key dimensions are defined and interface is gridded
            # drop all scalar key dimensions
            key_dims = [d for d in self.kdims if (not vdims or d not in vdims)
                        and not d in scalars]
        elif not isinstance(kdims, list):
            key_dims = [self.get_dimension(kdims, strict=True)]
        else:
            key_dims = [self.get_dimension(k, strict=True) for k in kdims]
        dropped = [d for d in self.kdims if not d in key_dims and not d in scalars]

        new_type = None
        if vdims is None:
            val_dims = [d for d in self.vdims if not kdims or d not in kdims]
        else:
            val_dims = [self.get_dimension(v, strict=True) for v in vdims]
            new_type = self._vdim_reductions.get(len(val_dims), type(self))

        data = self.interface.reindex(self, key_dims, val_dims)
        datatype = self.datatype
        if gridded and dropped:
            interfaces = self.interface.interfaces
            datatype = [dt for dt in datatype if not
                        getattr(interfaces.get(dt, None), 'gridded', True)]
        return self.clone(data, kdims=key_dims, vdims=val_dims,
                          new_type=new_type, datatype=datatype)


    def __getitem__(self, slices):
        """
        Allows slicing and selecting values in the Dataset object.
        Supports multiple indexing modes:

           (1) Slicing and indexing along the values of each dimension
               in the columns object using either scalars, slices or
               sets of values.
           (2) Supplying the name of a dimension as the first argument
               will return the values along that dimension as a numpy
               array.
           (3) Slicing of all key dimensions and selecting a single
               value dimension by name.
           (4) A boolean array index matching the length of the Dataset
               object.
        """
        slices = util.process_ellipses(self, slices, vdim_selection=True)
        if getattr(getattr(slices, 'dtype', None), 'kind', None) == 'b':
            if not len(slices) == len(self):
                raise IndexError("Boolean index must match length of sliced object")
            return self.clone(self.select(selection_mask=slices))
        elif slices in [(), Ellipsis]:
            return self
        if not isinstance(slices, tuple): slices = (slices,)
        value_select = None
        if len(slices) == 1 and slices[0] in self.dimensions():
            return self.dimension_values(slices[0])
        elif len(slices) == self.ndims+1 and slices[self.ndims] in self.dimensions():
            selection = dict(zip(self.dimensions('key', label=True), slices))
            value_select = slices[self.ndims]
        elif len(slices) == self.ndims+1 and isinstance(slices[self.ndims],
                                                        (Dimension,str)):
            raise IndexError("%r is not an available value dimension" % slices[self.ndims])
        else:
            selection = dict(zip(self.dimensions(label=True), slices))
        data = self.select(**selection)
        if value_select:
            if data.shape[0] == 1:
                return data[value_select][0]
            else:
                return data.reindex(vdims=[value_select])
        return data


[docs]    def sample(self, samples=[], bounds=None, closest=True, **kwargs):
        """Samples values at supplied coordinates.

        Allows sampling of element with a list of coordinates matching
        the key dimensions, returning a new object containing just the
        selected samples. Supports multiple signatures:

        Sampling with a list of coordinates, e.g.:

            ds.sample([(0, 0), (0.1, 0.2), ...])

        Sampling a range or grid of coordinates, e.g.:

            1D: ds.sample(3)
            2D: ds.sample((3, 3))

        Sampling by keyword, e.g.:

            ds.sample(x=0)

        Args:
            samples: List of nd-coordinates to sample
            bounds: Bounds of the region to sample
                Defined as two-tuple for 1D sampling and four-tuple
                for 2D sampling.
            closest: Whether to snap to closest coordinates
            **kwargs: Coordinates specified as keyword pairs
                Keywords of dimensions and scalar coordinates

        Returns:
            Element containing the sampled coordinates
        """
        if kwargs and samples != []:
            raise Exception('Supply explicit list of samples or kwargs, not both.')
        elif kwargs:
            sample = [slice(None) for _ in range(self.ndims)]
            for dim, val in kwargs.items():
                sample[self.get_dimension_index(dim)] = val
            samples = [tuple(sample)]
        elif isinstance(samples, tuple) or util.isscalar(samples):
            if self.ndims == 1:
                xlim = self.range(0)
                lower, upper = (xlim[0], xlim[1]) if bounds is None else bounds
                edges = np.linspace(lower, upper, samples+1)
                linsamples = [(l+u)/2.0 for l,u in zip(edges[:-1], edges[1:])]
            elif self.ndims == 2:
                (rows, cols) = samples
                if bounds:
                    (l,b,r,t) = bounds
                else:
                    l, r = self.range(0)
                    b, t = self.range(1)

                xedges = np.linspace(l, r, cols+1)
                yedges = np.linspace(b, t, rows+1)
                xsamples = [(lx+ux)/2.0 for lx,ux in zip(xedges[:-1], xedges[1:])]
                ysamples = [(ly+uy)/2.0 for ly,uy in zip(yedges[:-1], yedges[1:])]

                Y,X = np.meshgrid(ysamples, xsamples)
                linsamples = list(zip(X.flat, Y.flat))
            else:
                raise NotImplementedError("Regular sampling not implemented "
                                          "for elements with more than two dimensions.")
            samples = list(util.unique_iterator(self.closest(linsamples)))

        # Note: Special handling sampling of gridded 2D data as Curve
        # may be replaced with more general handling
        # see https://github.com/ioam/holoviews/issues/1173
        from ...element import Table, Curve
        datatype = ['dataframe', 'dictionary', 'dask']
        if len(samples) == 1:
            sel = {kd.name: s for kd, s in zip(self.kdims, samples[0])}
            dims = [kd for kd, v in sel.items() if not np.isscalar(v)]
            selection = self.select(**sel)

            # If a 1D cross-section of 2D space return Curve
            if self.interface.gridded and self.ndims == 2 and len(dims) == 1:
                new_type = Curve
                kdims = [self.get_dimension(kd) for kd in dims]
            else:
                new_type = Table
                kdims = self.kdims

            if np.isscalar(selection):
                selection = [samples[0]+(selection,)]
            else:
                reindexed = selection.clone(new_type=Dataset, datatype=datatype).reindex(kdims)
                selection = tuple(reindexed.columns(kdims+self.vdims).values())

            datatype = list(util.unique_iterator(self.datatype+['dataframe', 'dict']))
            return self.clone(selection, kdims=kdims, new_type=new_type,
                              datatype=datatype)

        lens = set(len(util.wrap_tuple(s)) for s in samples)
        if len(lens) > 1:
            raise IndexError('Sample coordinates must all be of the same length.')

        if closest:
            try:
                samples = self.closest(samples)
            except NotImplementedError:
                pass
        samples = [util.wrap_tuple(s) for s in samples]
        sampled = self.interface.sample(self, samples)
        return self.clone(sampled, new_type=Table, datatype=datatype)


[docs]    def reduce(self, dimensions=[], function=None, spreadfn=None, **reductions):
        """Applies reduction along the specified dimension(s).

        Allows reducing the values along one or more key dimension
        with the supplied function. Supports two signatures:

        Reducing with a list of dimensions, e.g.:

            ds.reduce(['x'], np.mean)

        Defining a reduction using keywords, e.g.:

            ds.reduce(x=np.mean)

        Args:
            dimensions: Dimension(s) to apply reduction on
                Defaults to all key dimensions
            function: Reduction operation to apply, e.g. numpy.mean
            spreadfn: Secondary reduction to compute value spread
                Useful for computing a confidence interval, spread, or
                standard deviation.
            **reductions: Keyword argument defining reduction
                Allows reduction to be defined as keyword pair of
                dimension and function

        Returns:
            The Dataset after reductions have been applied.
        """
        if any(dim in self.vdims for dim in dimensions):
            raise Exception("Reduce cannot be applied to value dimensions")
        function, dims = self._reduce_map(dimensions, function, reductions)
        dims = [d for d in self.kdims if d not in dims]
        return self.aggregate(dims, function, spreadfn)


[docs]    def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
        """Aggregates data on the supplied dimensions.

        Aggregates over the supplied key dimensions with the defined
        function or dim_transform specified as a tuple of the transformed
        dimension name and dim transform.

        Args:
            dimensions: Dimension(s) to aggregate on
                Default to all key dimensions
            function: Aggregation function or transform to apply
                Supports both simple functions and dimension transforms
            spreadfn: Secondary reduction to compute value spread
                Useful for computing a confidence interval, spread, or
                standard deviation.
            **kwargs: Keyword arguments either passed to the aggregation function
                or to create new names for the transformed variables

        Returns:
            Returns the aggregated Dataset
        """
        from ...util.transform import dim
        if dimensions is None: dimensions = self.kdims
        elif not isinstance(dimensions, list): dimensions = [dimensions]
        if isinstance(function, tuple) or any(isinstance(v, dim) for v in kwargs.values()):
            dataset = self.clone(new_type=Dataset)
            if dimensions:
                dataset = dataset.groupby(dimensions)
            args = () if function is None else (function,)
            transformed = dataset.apply.transform(*args, drop=True, **kwargs)
            if not isinstance(transformed, Dataset):
                transformed = transformed.collapse()
            return transformed.clone(new_type=type(self))

        # Handle functions
        kdims = [self.get_dimension(d, strict=True) for d in dimensions]
        if not len(self):
            if spreadfn:
                spread_name = spreadfn.__name__
                vdims = [d for vd in self.vdims for d in [vd, vd.clone('_'.join([vd.name, spread_name]))]]
            else:
                vdims = self.vdims
            return self.clone([], kdims=kdims, vdims=vdims)

        vdims = self.vdims
        aggregated, dropped = self.interface.aggregate(self, kdims, function, **kwargs)
        aggregated = self.interface.unpack_scalar(self, aggregated)
        vdims = [vd for vd in vdims if vd not in dropped]

        ndims = len(dimensions)
        min_d, max_d = self.param.objects('existing')['kdims'].bounds
        generic_type = (min_d is not None and ndims < min_d) or (max_d is not None and ndims > max_d)

        if spreadfn:
            error, _ = self.interface.aggregate(self, dimensions, spreadfn)
            spread_name = spreadfn.__name__
            ndims = len(vdims)
            error = self.clone(error, kdims=kdims, new_type=Dataset)
            combined = self.clone(aggregated, kdims=kdims, new_type=Dataset)
            for i, d in enumerate(vdims):
                dim = d.clone('_'.join([d.name, spread_name]))
                dvals = error.dimension_values(d, flat=False)
                combined = combined.add_dimension(dim, ndims+i, dvals, True)
            return combined.clone(new_type=Dataset if generic_type else type(self))

        if np.isscalar(aggregated):
            return aggregated
        else:
            try:
                # Should be checking the dimensions declared on the element are compatible
                return self.clone(aggregated, kdims=kdims, vdims=vdims)
            except:
                datatype = self.param.objects('existing')['datatype'].default
                return self.clone(aggregated, kdims=kdims, vdims=vdims,
                                  new_type=Dataset if generic_type else None,
                                  datatype=datatype)


[docs]    def groupby(self, dimensions=[], container_type=HoloMap, group_type=None,
                dynamic=False, **kwargs):
        """Groups object by one or more dimensions

        Applies groupby operation over the specified dimensions
        returning an object of type container_type (expected to be
        dictionary-like) containing the groups.

        Args:
            dimensions: Dimension(s) to group by
            container_type: Type to cast group container to
            group_type: Type to cast each group to
            dynamic: Whether to return a DynamicMap
            **kwargs: Keyword arguments to pass to each group

        Returns:
            Returns object of supplied container_type containing the
            groups. If dynamic=True returns a DynamicMap instead.
        """
        if not isinstance(dimensions, list): dimensions = [dimensions]
        if not len(dimensions): dimensions = self.dimensions('key', True)
        if group_type is None: group_type = type(self)

        dimensions = [self.get_dimension(d, strict=True) for d in dimensions]
        dim_names = [d.name for d in dimensions]

        if dynamic:
            group_dims = [kd for kd in self.kdims if kd not in dimensions]
            kdims = [self.get_dimension(d) for d in kwargs.pop('kdims', group_dims)]
            drop_dim = len(group_dims) != len(kdims)
            group_kwargs = dict(util.get_param_values(self), kdims=kdims)
            group_kwargs.update(kwargs)
            def load_subset(*args):
                constraint = dict(zip(dim_names, args))
                group = self.select(**constraint)
                if np.isscalar(group):
                    return group_type(([group],), group=self.group,
                                      label=self.label, vdims=self.vdims)
                data = group.reindex(kdims)
                if drop_dim and self.interface.gridded:
                    data = data.columns()
                return group_type(data, **group_kwargs)
            dynamic_dims = [d.clone(values=list(self.interface.values(self, d.name, False)))
                            for d in dimensions]
            return DynamicMap(load_subset, kdims=dynamic_dims)

        return self.interface.groupby(self, dim_names, container_type,
                                      group_type, **kwargs)

[docs]    def transform(self, *args, **kwargs):
        """Transforms the Dataset according to a dimension transform.

        Transforms may be supplied as tuples consisting of the
        dimension(s) and the dim transform to apply or keyword
        arguments mapping from dimension(s) to dim transforms. If the
        arg or kwarg declares multiple dimensions the dim transform
        should return a tuple of values for each.

        A transform may override an existing dimension or add a new
        one in which case it will be added as an additional value
        dimension.

        Args:
            args: Specify the output arguments and transforms as a
                  tuple of dimension specs and dim transforms
            drop (bool): Whether to drop all variables not part of the transform
            keep_index (bool): Whether to keep indexes
                  Whether to apply transform on datastructure with
                  index, e.g. pandas.Series or xarray.DataArray,
                  (important for dask datastructures where index may
                  be required to align datasets).
            kwargs: Specify new dimensions in the form new_dim=dim_transform

        Returns:
            Transformed dataset with new dimensions
        """
        drop = kwargs.pop('drop', False)
        keep_index = kwargs.pop('keep_index', True)
        transforms = OrderedDict()
        for s, transform in list(args)+list(kwargs.items()):
            transforms[util.wrap_tuple(s)] = transform

        new_data = OrderedDict()
        for signature, transform in transforms.items():
            applied = transform.apply(
                self, compute=False, keep_index=keep_index
            )
            if len(signature) == 1:
                new_data[signature[0]] = applied
            else:
                for s, vals in zip(signature, applied):
                    new_data[s] = vals

        new_dims = []
        for d in new_data:
            if self.get_dimension(d) is None:
                new_dims.append(d)

        ds = self
        if ds.interface.datatype in ('image', 'array'):
            ds = ds.clone(datatype=[dt for dt in ds.datatype if dt != ds.interface.datatype])

        if drop:
            kdims = [ds.get_dimension(d) for d in new_data if d in ds.kdims]
            vdims = [ds.get_dimension(d) or d for d in new_data if d not in ds.kdims]
            data = OrderedDict([(dimension_name(d), values) for d, values in new_data.items()])
            return ds.clone(data, kdims=kdims, vdims=vdims)
        else:
            new_data = OrderedDict([(dimension_name(d), values) for d, values in new_data.items()])
            data = ds.interface.assign(ds, new_data)
            data, drop = data if isinstance(data, tuple) else (data, [])
            kdims = [kd for kd in self.kdims if kd.name not in drop]
            return ds.clone(data, kdims=kdims, vdims=ds.vdims+new_dims)

    def __len__(self):
        "Number of values in the Dataset."
        return self.interface.length(self)

    def __nonzero__(self):
        "Whether the Dataset contains any values"
        return self.interface.nonzero(self)

    __bool__ = __nonzero__

    @property
    def shape(self):
        "Returns the shape of the data."
        return self.interface.shape(self)


[docs]    def dimension_values(self, dimension, expanded=True, flat=True):
        """Return the values along the requested dimension.

        Args:
            dimension: The dimension to return values for
            expanded (bool, optional): Whether to expand values
                Whether to return the expanded values, behavior depends
                on the type of data:
                  * Columnar: If false returns unique values
                  * Geometry: If false returns scalar values per geometry
                  * Gridded: If false returns 1D coordinates
            flat (bool, optional): Whether to flatten array

        Returns:
            NumPy array of values along the requested dimension
        """
        dim = self.get_dimension(dimension, strict=True)
        return self.interface.values(self, dim, expanded, flat)


[docs]    def get_dimension_type(self, dim):
        """Get the type of the requested dimension.

        Type is determined by Dimension.type attribute or common
        type of the dimension values, otherwise None.

        Args:
            dimension: Dimension to look up by name or by index

        Returns:
            Declared type of values along the dimension
        """
        dim_obj = self.get_dimension(dim)
        if dim_obj and dim_obj.type is not None:
            return dim_obj.type
        return self.interface.dimension_type(self, dim_obj)


[docs]    def dframe(self, dimensions=None, multi_index=False):
        """Convert dimension values to DataFrame.

        Returns a pandas dataframe of columns along each dimension,
        either completely flat or indexed by key dimensions.

        Args:
            dimensions: Dimensions to return as columns
            multi_index: Convert key dimensions to (multi-)index

        Returns:
            DataFrame of columns corresponding to each dimension
        """
        if dimensions is None:
            dimensions = [d.name for d in self.dimensions()]
        else:
            dimensions = [self.get_dimension(d, strict=True).name for d in dimensions]
        df = self.interface.dframe(self, dimensions)
        if multi_index:
            df = df.set_index([d for d in dimensions if d in self.kdims])
        return df


[docs]    def columns(self, dimensions=None):
        """Convert dimension values to a dictionary.

        Returns a dictionary of column arrays along each dimension
        of the element.

        Args:
            dimensions: Dimensions to return as columns

        Returns:
            Dictionary of arrays for each dimension
        """
        if dimensions is None:
            dimensions = self.dimensions()
        else:
            dimensions = [self.get_dimension(d, strict=True) for d in dimensions]
        return OrderedDict([(d.name, self.dimension_values(d)) for d in dimensions])


    @property
    def to(self):
        "Returns the conversion interface with methods to convert Dataset"
        return self._conversion_interface(self)


[docs]    def clone(self, data=None, shared_data=True, new_type=None, link=True,
              *args, **overrides):
        """Clones the object, overriding data and parameters.

        Args:
            data: New data replacing the existing data
            shared_data (bool, optional): Whether to use existing data
            new_type (optional): Type to cast object to
            link (bool, optional): Whether clone should be linked
                Determines whether Streams and Links attached to
                original object will be inherited.
            *args: Additional arguments to pass to constructor
            **overrides: New keyword arguments to pass to constructor

        Returns:
            Cloned object
        """
        if 'datatype' not in overrides:
            datatypes = [self.interface.datatype] + self.datatype
            overrides['datatype'] = list(util.unique_iterator(datatypes))

        if data is None:
            overrides['_validate_vdims'] = False

            # Allows datatype conversions
            if shared_data:
                data = self
                if link:
                    overrides['plot_id'] = self._plot_id
        elif self._in_method and 'dataset' not in overrides:
            overrides['dataset'] = self.dataset

        return super(Dataset, self).clone(
            data, shared_data, new_type, *args, **overrides
        )

    # Overrides of superclass methods that are needed so that PipelineMeta
    # will find them to wrap with pipeline support
[docs]    def options(self, *args, **kwargs):
        return super(Dataset, self).options(*args, **kwargs)
    options.__doc__ = Dimensioned.options.__doc__

[docs]    def map(self, *args, **kwargs):
        return super(Dataset, self).map(*args, **kwargs)
    map.__doc__ = LabelledData.map.__doc__

[docs]    def relabel(self, *args, **kwargs):
        return super(Dataset, self).relabel(*args, **kwargs)
    relabel.__doc__ = LabelledData.relabel.__doc__

    @property
    def iloc(self):
        """Returns iloc indexer with support for columnar indexing.

        Returns an iloc object providing a convenient interface to
        slice and index into the Dataset using row and column indices.
        Allow selection by integer index, slice and list of integer
        indices and boolean arrays.

        Examples:

        * Index the first row and column:

            dataset.iloc[0, 0]

        * Select rows 1 and 2 with a slice:

            dataset.iloc[1:3, :]

        * Select with a list of integer coordinates:

            dataset.iloc[[0, 2, 3]]
        """
        return iloc(self)


    @property
    def ndloc(self):
        """Returns ndloc indexer with support for gridded indexing.

        Returns an ndloc object providing nd-array like indexing for
        gridded datasets. Follows NumPy array indexing conventions,
        allowing for indexing, slicing and selecting a list of indices
        on multi-dimensional arrays using integer indices. The order
        of array indices is inverted relative to the Dataset key
        dimensions, e.g. an Image with key dimensions 'x' and 'y' can
        be indexed with ``image.ndloc[iy, ix]``, where ``iy`` and
        ``ix`` are integer indices along the y and x dimensions.

        Examples:

        * Index value in 2D array:

            dataset.ndloc[3, 1]

        * Slice along y-axis of 2D array:

            dataset.ndloc[2:5, :]

        * Vectorized (non-orthogonal) indexing along x- and y-axes:

            dataset.ndloc[[1, 2, 3], [0, 2, 3]]
        """
        return ndloc(self)


# Aliases for pickle backward compatibility
Columns      = Dataset
ArrayColumns = ArrayInterface
DictColumns  = DictInterface
GridColumns  = GridInterface