Source code for holoviews.core.data.array

try:
    import itertools.izip as zip
except ImportError:
    pass

import numpy as np

from .interface import Interface, DataError
from ..dimension import dimension_name
from ..element import Element
from ..ndmapping import NdMapping, item_check, sorted_context
from .. import util


[docs]class ArrayInterface(Interface): types = (np.ndarray,) datatype = 'array' named = False @classmethod def dimension_type(cls, dataset, dim): return dataset.data.dtype.type @classmethod def init(cls, eltype, data, kdims, vdims): if kdims is None: kdims = eltype.kdims if vdims is None: vdims = eltype.vdims dimensions = [dimension_name(d) for d in kdims + vdims] if ((isinstance(data, dict) or util.is_dataframe(data)) and all(d in data for d in dimensions)): dataset = [d if isinstance(d, np.ndarray) else np.asarray(data[d]) for d in dimensions] if len(set(d.dtype.kind for d in dataset)) > 1: raise ValueError('ArrayInterface expects all columns to be of the same dtype') data = np.column_stack(dataset) elif isinstance(data, dict) and not all(d in data for d in dimensions): dict_data = sorted(data.items()) dataset = zip(*((util.wrap_tuple(k)+util.wrap_tuple(v)) for k, v in dict_data)) data = np.column_stack(list(dataset)) elif isinstance(data, tuple): data = [d if isinstance(d, np.ndarray) else np.asarray(d) for d in data] if len(set(d.dtype.kind for d in data)) > 1: raise ValueError('ArrayInterface expects all columns to be of the same dtype') elif cls.expanded(data): data = np.column_stack(data) else: raise ValueError('ArrayInterface expects data to be of uniform shape.') elif isinstance(data, list) and data == []: data = np.empty((0,len(dimensions))) elif not isinstance(data, np.ndarray): data = np.array([], ndmin=2).T if data is None else list(data) try: data = np.array(data) except: data = None if kdims is None: kdims = eltype.kdims if vdims is None: vdims = eltype.vdims if data is None or data.ndim > 2 or data.dtype.kind in ['S', 'U', 'O']: raise ValueError("ArrayInterface interface could not handle input type.") elif data.ndim == 1: if eltype._auto_indexable_1d and len(kdims)+len(vdims)>1: data = np.column_stack([np.arange(len(data)), data]) else: data = np.atleast_2d(data).T return data, {'kdims':kdims, 'vdims':vdims}, {} @classmethod def validate(cls, dataset, vdims=True): ndims = len(dataset.dimensions()) if vdims else dataset.ndims ncols = dataset.data.shape[1] if dataset.data.ndim > 1 else 1 if ncols < ndims: raise DataError("Supplied data does not match specified " "dimensions, expected at least %s columns." % ndims, cls) @classmethod def isscalar(cls, dataset, dim): idx = dataset.get_dimension_index(dim) return len(np.unique(dataset.data[:, idx])) == 1 @classmethod def array(cls, dataset, dimensions): if dimensions: indices = [dataset.get_dimension_index(d) for d in dimensions] return dataset.data[:, indices] else: return dataset.data @classmethod def dtype(cls, dataset, dimension): return dataset.data.dtype @classmethod def add_dimension(cls, dataset, dimension, dim_pos, values, vdim): data = dataset.data.copy() return np.insert(data, dim_pos, values, axis=1) @classmethod def sort(cls, dataset, by=[], reverse=False): data = dataset.data if len(by) == 1: sorting = cls.values(dataset, by[0]).argsort() else: dtypes = [(d.name, dataset.data.dtype) for d in dataset.dimensions()] sort_fields = tuple(dataset.get_dimension(d).name for d in by) sorting = dataset.data.view(dtypes, np.recarray).T sorting = sorting.argsort(order=sort_fields)[0] sorted_data = data[sorting] return sorted_data[::-1] if reverse else sorted_data @classmethod def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False): data = dataset.data dim_idx = dataset.get_dimension_index(dim) if data.ndim == 1: data = np.atleast_2d(data).T values = data[:, dim_idx] if not expanded: return util.unique_array(values) return values @classmethod def mask(cls, dataset, mask, mask_value=np.nan): masked = np.copy(dataset.data) masked[mask] = mask_value return masked @classmethod def reindex(cls, dataset, kdims=None, vdims=None): # DataFrame based tables don't need to be reindexed dims = kdims + vdims data = [dataset.dimension_values(d) for d in dims] return np.column_stack(data) @classmethod def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs): data = dataset.data # Get dimension objects, labels, indexes and data dimensions = [dataset.get_dimension(d, strict=True) for d in dimensions] dim_idxs = [dataset.get_dimension_index(d) for d in dimensions] kdims = [kdim for kdim in dataset.kdims if kdim not in dimensions] vdims = dataset.vdims # Find unique entries along supplied dimensions # by creating a view that treats the selected # groupby keys as a single object. indices = data[:, dim_idxs].copy() group_shape = indices.dtype.itemsize * indices.shape[1] view = indices.view(np.dtype((np.void, group_shape))) _, idx = np.unique(view, return_index=True) idx.sort() unique_indices = indices[idx] # Get group group_kwargs = {} if group_type != 'raw' and issubclass(group_type, Element): group_kwargs.update(util.get_param_values(dataset)) group_kwargs['kdims'] = kdims group_kwargs.update(kwargs) # Iterate over the unique entries building masks # to apply the group selection grouped_data = [] col_idxs = [dataset.get_dimension_index(d) for d in dataset.dimensions() if d not in dimensions] for group in unique_indices: mask = np.logical_and.reduce([data[:, d_idx] == group[i] for i, d_idx in enumerate(dim_idxs)]) group_data = data[mask][:, col_idxs] if not group_type == 'raw': if issubclass(group_type, dict): group_data = {d.name: group_data[:, i] for i, d in enumerate(kdims+vdims)} else: group_data = group_type(group_data, **group_kwargs) grouped_data.append((tuple(group), group_data)) if issubclass(container_type, NdMapping): with item_check(False), sorted_context(False): return container_type(grouped_data, kdims=dimensions) else: return container_type(grouped_data) @classmethod def select(cls, dataset, selection_mask=None, **selection): if selection_mask is None: selection_mask = cls.select_mask(dataset, selection) indexed = cls.indexed(dataset, selection) data = np.atleast_2d(dataset.data[selection_mask, :]) if len(data) == 1 and indexed and len(dataset.vdims) == 1: data = data[0, dataset.ndims] return data @classmethod def sample(cls, dataset, samples=[]): data = dataset.data mask = False for sample in samples: sample_mask = True if np.isscalar(sample): sample = [sample] for i, v in enumerate(sample): sample_mask &= data[:, i]==v mask |= sample_mask return data[mask]
[docs] @classmethod def unpack_scalar(cls, dataset, data): """ Given a dataset object and data in the appropriate format for the interface, return a simple scalar. """ if data.shape == (1, 1): return data[0, 0] return data
@classmethod def assign(cls, dataset, new_data): data = dataset.data.copy() for d, arr in new_data.items(): if dataset.get_dimension(d) is None: continue idx = dataset.get_dimension_index(d) data[:, idx] = arr new_cols = [arr for d, arr in new_data.items() if dataset.get_dimension(d) is None] return np.column_stack([data]+new_cols) @classmethod def aggregate(cls, dataset, dimensions, function, **kwargs): reindexed = dataset.reindex(dimensions) grouped = (cls.groupby(reindexed, dimensions, list, 'raw') if len(dimensions) else [((), reindexed.data)]) rows = [] for k, group in grouped: if isinstance(function, np.ufunc): reduced = function.reduce(group, axis=0, **kwargs) else: reduced = function(group, axis=0, **kwargs) rows.append(np.concatenate([k, (reduced,) if np.isscalar(reduced) else reduced])) return np.atleast_2d(rows), [] @classmethod def iloc(cls, dataset, index): rows, cols = index if np.isscalar(cols): if isinstance(cols, util.basestring): cols = dataset.get_dimension_index(cols) if np.isscalar(rows): return dataset.data[rows, cols] cols = [dataset.get_dimension_index(cols)] elif not isinstance(cols, slice): cols = [dataset.get_dimension_index(d) for d in cols] if np.isscalar(rows): rows = [rows] data = dataset.data[rows, :][:, cols] if data.ndim == 1: return np.atleast_2d(data).T return data
Interface.register(ArrayInterface)