try:
import itertools.izip as zip
except ImportError:
pass
import numpy as np
from .interface import Interface, DataError
from ..dimension import dimension_name
from ..element import Element
from ..ndmapping import NdMapping, item_check, sorted_context
from .. import util
[docs]class ArrayInterface(Interface):
types = (np.ndarray,)
datatype = 'array'
named = False
@classmethod
def dimension_type(cls, dataset, dim):
return dataset.data.dtype.type
@classmethod
def init(cls, eltype, data, kdims, vdims):
if kdims is None:
kdims = eltype.kdims
if vdims is None:
vdims = eltype.vdims
dimensions = [dimension_name(d) for d in kdims + vdims]
if ((isinstance(data, dict) or util.is_dataframe(data)) and
all(d in data for d in dimensions)):
dataset = [d if isinstance(d, np.ndarray) else np.asarray(data[d]) for d in dimensions]
if len(set(d.dtype.kind for d in dataset)) > 1:
raise ValueError('ArrayInterface expects all columns to be of the same dtype')
data = np.column_stack(dataset)
elif isinstance(data, dict) and not all(d in data for d in dimensions):
dict_data = sorted(data.items())
dataset = zip(*((util.wrap_tuple(k)+util.wrap_tuple(v))
for k, v in dict_data))
data = np.column_stack(list(dataset))
elif isinstance(data, tuple):
data = [d if isinstance(d, np.ndarray) else np.asarray(d) for d in data]
if len(set(d.dtype.kind for d in data)) > 1:
raise ValueError('ArrayInterface expects all columns to be of the same dtype')
elif cls.expanded(data):
data = np.column_stack(data)
else:
raise ValueError('ArrayInterface expects data to be of uniform shape.')
elif isinstance(data, list) and data == []:
data = np.empty((0,len(dimensions)))
elif not isinstance(data, np.ndarray):
data = np.array([], ndmin=2).T if data is None else list(data)
try:
data = np.array(data)
except:
data = None
if kdims is None:
kdims = eltype.kdims
if vdims is None:
vdims = eltype.vdims
if data is None or data.ndim > 2 or data.dtype.kind in ['S', 'U', 'O']:
raise ValueError("ArrayInterface interface could not handle input type.")
elif data.ndim == 1:
if eltype._auto_indexable_1d and len(kdims)+len(vdims)>1:
data = np.column_stack([np.arange(len(data)), data])
else:
data = np.atleast_2d(data).T
return data, {'kdims':kdims, 'vdims':vdims}, {}
@classmethod
def validate(cls, dataset, vdims=True):
ndims = len(dataset.dimensions()) if vdims else dataset.ndims
ncols = dataset.data.shape[1] if dataset.data.ndim > 1 else 1
if ncols < ndims:
raise DataError("Supplied data does not match specified "
"dimensions, expected at least %s columns." % ndims, cls)
@classmethod
def isscalar(cls, dataset, dim):
idx = dataset.get_dimension_index(dim)
return len(np.unique(dataset.data[:, idx])) == 1
@classmethod
def array(cls, dataset, dimensions):
if dimensions:
indices = [dataset.get_dimension_index(d) for d in dimensions]
return dataset.data[:, indices]
else:
return dataset.data
@classmethod
def dtype(cls, dataset, dimension):
return dataset.data.dtype
@classmethod
def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):
data = dataset.data.copy()
return np.insert(data, dim_pos, values, axis=1)
@classmethod
def sort(cls, dataset, by=[], reverse=False):
data = dataset.data
if len(by) == 1:
sorting = cls.values(dataset, by[0]).argsort()
else:
dtypes = [(d.name, dataset.data.dtype) for d in dataset.dimensions()]
sort_fields = tuple(dataset.get_dimension(d).name for d in by)
sorting = dataset.data.view(dtypes, np.recarray).T
sorting = sorting.argsort(order=sort_fields)[0]
sorted_data = data[sorting]
return sorted_data[::-1] if reverse else sorted_data
@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False):
data = dataset.data
dim_idx = dataset.get_dimension_index(dim)
if data.ndim == 1:
data = np.atleast_2d(data).T
values = data[:, dim_idx]
if not expanded:
return util.unique_array(values)
return values
@classmethod
def mask(cls, dataset, mask, mask_value=np.nan):
masked = np.copy(dataset.data)
masked[mask] = mask_value
return masked
@classmethod
def reindex(cls, dataset, kdims=None, vdims=None):
# DataFrame based tables don't need to be reindexed
dims = kdims + vdims
data = [dataset.dimension_values(d) for d in dims]
return np.column_stack(data)
@classmethod
def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):
data = dataset.data
# Get dimension objects, labels, indexes and data
dimensions = [dataset.get_dimension(d, strict=True) for d in dimensions]
dim_idxs = [dataset.get_dimension_index(d) for d in dimensions]
kdims = [kdim for kdim in dataset.kdims
if kdim not in dimensions]
vdims = dataset.vdims
# Find unique entries along supplied dimensions
# by creating a view that treats the selected
# groupby keys as a single object.
indices = data[:, dim_idxs].copy()
group_shape = indices.dtype.itemsize * indices.shape[1]
view = indices.view(np.dtype((np.void, group_shape)))
_, idx = np.unique(view, return_index=True)
idx.sort()
unique_indices = indices[idx]
# Get group
group_kwargs = {}
if group_type != 'raw' and issubclass(group_type, Element):
group_kwargs.update(util.get_param_values(dataset))
group_kwargs['kdims'] = kdims
group_kwargs.update(kwargs)
# Iterate over the unique entries building masks
# to apply the group selection
grouped_data = []
col_idxs = [dataset.get_dimension_index(d) for d in dataset.dimensions()
if d not in dimensions]
for group in unique_indices:
mask = np.logical_and.reduce([data[:, d_idx] == group[i]
for i, d_idx in enumerate(dim_idxs)])
group_data = data[mask][:, col_idxs]
if not group_type == 'raw':
if issubclass(group_type, dict):
group_data = {d.name: group_data[:, i] for i, d in
enumerate(kdims+vdims)}
else:
group_data = group_type(group_data, **group_kwargs)
grouped_data.append((tuple(group), group_data))
if issubclass(container_type, NdMapping):
with item_check(False), sorted_context(False):
return container_type(grouped_data, kdims=dimensions)
else:
return container_type(grouped_data)
@classmethod
def select(cls, dataset, selection_mask=None, **selection):
if selection_mask is None:
selection_mask = cls.select_mask(dataset, selection)
indexed = cls.indexed(dataset, selection)
data = np.atleast_2d(dataset.data[selection_mask, :])
if len(data) == 1 and indexed and len(dataset.vdims) == 1:
data = data[0, dataset.ndims]
return data
@classmethod
def sample(cls, dataset, samples=[]):
data = dataset.data
mask = False
for sample in samples:
sample_mask = True
if np.isscalar(sample): sample = [sample]
for i, v in enumerate(sample):
sample_mask &= data[:, i]==v
mask |= sample_mask
return data[mask]
[docs] @classmethod
def unpack_scalar(cls, dataset, data):
"""
Given a dataset object and data in the appropriate format for
the interface, return a simple scalar.
"""
if data.shape == (1, 1):
return data[0, 0]
return data
@classmethod
def assign(cls, dataset, new_data):
data = dataset.data.copy()
for d, arr in new_data.items():
if dataset.get_dimension(d) is None:
continue
idx = dataset.get_dimension_index(d)
data[:, idx] = arr
new_cols = [arr for d, arr in new_data.items() if dataset.get_dimension(d) is None]
return np.column_stack([data]+new_cols)
@classmethod
def aggregate(cls, dataset, dimensions, function, **kwargs):
reindexed = dataset.reindex(dimensions)
grouped = (cls.groupby(reindexed, dimensions, list, 'raw')
if len(dimensions) else [((), reindexed.data)])
rows = []
for k, group in grouped:
if isinstance(function, np.ufunc):
reduced = function.reduce(group, axis=0, **kwargs)
else:
reduced = function(group, axis=0, **kwargs)
rows.append(np.concatenate([k, (reduced,) if np.isscalar(reduced) else reduced]))
return np.atleast_2d(rows), []
@classmethod
def iloc(cls, dataset, index):
rows, cols = index
if np.isscalar(cols):
if isinstance(cols, util.basestring):
cols = dataset.get_dimension_index(cols)
if np.isscalar(rows):
return dataset.data[rows, cols]
cols = [dataset.get_dimension_index(cols)]
elif not isinstance(cols, slice):
cols = [dataset.get_dimension_index(d) for d in cols]
if np.isscalar(rows):
rows = [rows]
data = dataset.data[rows, :][:, cols]
if data.ndim == 1:
return np.atleast_2d(data).T
return data
Interface.register(ArrayInterface)