import numpy as np
from .. import util
from ..element import Element
from ..ndmapping import NdMapping, item_check, sorted_context
from .dictionary import DictInterface
from .interface import Interface, DataError
[docs]class MultiInterface(Interface):
"""
MultiInterface allows wrapping around a list of tabular datasets
including dataframes, the columnar dictionary format or 2D tabular
NumPy arrays. Using the split method the list of tabular data can
be split into individual datasets.
The interface makes the data appear a list of tabular datasets as
a single dataset. The interface may be used to represent geometries
so the behavior depends on the type of geometry being represented.
"""
types = ()
datatype = 'multitabular'
subtypes = ['dictionary', 'dataframe', 'array', 'dask']
geom_types = ['Polygon', 'Ring', 'Line', 'Point']
multi = True
@classmethod
def init(cls, eltype, data, kdims, vdims):
from ...element import Polygons, Path
new_data = []
dims = {'kdims': eltype.kdims, 'vdims': eltype.vdims}
if kdims is not None:
dims['kdims'] = kdims
if vdims is not None:
dims['vdims'] = vdims
if (isinstance(data, list) and len(data) and
all(isinstance(d, tuple) and all(util.isscalar(v) for v in d) for d in data)):
data = [data]
elif not isinstance(data, list):
interface = [Interface.interfaces.get(st).applies(data)
for st in cls.subtypes if st in Interface.interfaces]
if (interface or isinstance(data, tuple)) and issubclass(eltype, Path):
data = [data]
else:
raise ValueError('MultiInterface data must be a list of tabular data types.')
prev_interface, prev_dims = None, None
for d in data:
datatype = cls.subtypes
if isinstance(d, dict):
if Polygons._hole_key in d:
datatype = [dt for dt in datatype
if hasattr(Interface.interfaces.get(dt), 'has_holes')]
geom_type = d.get('geom_type')
if geom_type is not None and geom_type not in cls.geom_types:
raise DataError("Geometry type '%s' not recognized, "
"must be one of %s." % (geom_type, cls.geom_types))
else:
datatype = [dt for dt in datatype
if hasattr(Interface.interfaces.get(dt), 'geom_type')]
d, interface, dims, _ = Interface.initialize(eltype, d, kdims, vdims,
datatype=datatype)
if prev_interface:
if prev_interface != interface:
raise DataError('MultiInterface subpaths must all have matching datatype.', cls)
if dims['kdims'] != prev_dims['kdims']:
raise DataError('MultiInterface subpaths must all have matching kdims.', cls)
if dims['vdims'] != prev_dims['vdims']:
raise DataError('MultiInterface subpaths must all have matching vdims.', cls)
new_data.append(d)
prev_interface, prev_dims = interface, dims
return new_data, dims, {}
@classmethod
def validate(cls, dataset, vdims=True):
if not dataset.data:
return
from holoviews.element import Polygons
ds = cls._inner_dataset_template(dataset, validate_vdims=vdims)
for d in dataset.data:
ds.data = d
ds.interface.validate(ds, vdims)
if isinstance(dataset, Polygons) and ds.interface is DictInterface:
holes = ds.interface.holes(ds)
if not isinstance(holes, list):
raise DataError('Polygons holes must be declared as a list-of-lists.', cls)
subholes = holes[0]
coords = ds.data[ds.kdims[0].name]
splits = np.isnan(coords.astype('float')).sum()
if len(subholes) != (splits+1):
raise DataError('Polygons with holes containing multi-geometries '
'must declare a list of holes for each geometry.', cls)
@classmethod
def geom_type(cls, dataset):
from holoviews.element import Polygons, Path, Points
if isinstance(dataset, type):
eltype = dataset
else:
eltype = type(dataset)
if isinstance(dataset.data, list):
ds = cls._inner_dataset_template(dataset)
if hasattr(ds.interface, 'geom_type'):
geom_type = ds.interface.geom_type(ds)
if geom_type is not None:
return geom_type
if issubclass(eltype, Polygons):
return 'Polygon'
elif issubclass(eltype, Path):
return 'Line'
elif issubclass(eltype, Points):
return 'Point'
@classmethod
def _inner_dataset_template(cls, dataset, validate_vdims=True):
"""
Returns a Dataset template used as a wrapper around the data
contained within the multi-interface dataset.
"""
from . import Dataset
vdims = dataset.vdims if getattr(dataset, 'level', None) is None else []
return Dataset(dataset.data[0], datatype=cls.subtypes,
kdims=dataset.kdims, vdims=vdims,
_validate_vdims=validate_vdims)
@classmethod
def dimension_type(cls, dataset, dim):
if not dataset.data:
# Note: Required to make empty datasets work at all (should fix)
# Other interfaces declare equivalent of empty array
# which defaults to float type
return float
ds = cls._inner_dataset_template(dataset)
return ds.interface.dimension_type(ds, dim)
@classmethod
def range(cls, dataset, dim):
if not dataset.data:
return (None, None)
ranges = []
ds = cls._inner_dataset_template(dataset)
# Backward compatibility for Contours/Polygons level
level = getattr(dataset, 'level', None)
dim = dataset.get_dimension(dim)
if level is not None and dim is dataset.vdims[0]:
return (level, level)
for d in dataset.data:
ds.data = d
ranges.append(ds.interface.range(ds, dim))
return util.max_range(ranges)
@classmethod
def has_holes(cls, dataset):
if not dataset.data:
return False
ds = cls._inner_dataset_template(dataset)
for d in dataset.data:
ds.data = d
if ds.interface.has_holes(ds):
return True
return False
@classmethod
def holes(cls, dataset):
holes = []
if not dataset.data:
return holes
ds = cls._inner_dataset_template(dataset)
for d in dataset.data:
ds.data = d
holes += ds.interface.holes(ds)
return holes
[docs] @classmethod
def isscalar(cls, dataset, dim, per_geom=False):
"""
Tests if dimension is scalar in each subpath.
"""
if not dataset.data:
return True
geom_type = cls.geom_type(dataset)
ds = cls._inner_dataset_template(dataset)
combined = []
for d in dataset.data:
ds.data = d
values = ds.interface.values(ds, dim, expanded=False)
unique = list(util.unique_iterator(values))
if len(unique) > 1:
return False
elif per_geom and geom_type != 'Point':
continue
unique = unique[0]
if unique not in combined:
if combined:
return False
combined.append(unique)
return True
[docs] @classmethod
def select(cls, dataset, selection_mask=None, **selection):
"""
Applies selectiong on all the subpaths.
"""
from ...element import Polygons
if not dataset.data:
return dataset.data
elif selection_mask is not None:
return [d for b, d in zip(selection_mask, dataset.data) if b]
ds = cls._inner_dataset_template(dataset)
skipped = (Polygons._hole_key,)
if hasattr(ds.interface, 'geo_column'):
skipped += (ds.interface.geo_column(ds),)
data = []
for d in dataset.data:
ds.data = d
selection_mask = ds.interface.select_mask(ds, selection)
sel = ds.interface.select(ds, selection_mask)
is_dict = isinstance(sel, dict)
if ((not len(sel) and not is_dict) or
(is_dict and any(False if util.isscalar(v) else len(v) == 0
for k, v in sel.items() if k not in skipped))):
continue
data.append(sel)
return data
[docs] @classmethod
def select_paths(cls, dataset, index):
"""
Allows selecting paths with usual NumPy slicing index.
"""
selection = np.array([{0: p} for p in dataset.data])[index]
if isinstance(selection, dict):
return [selection[0]]
return [s[0] for s in selection]
@classmethod
def aggregate(cls, dataset, dimensions, function, **kwargs):
raise NotImplementedError('Aggregation currently not implemented')
@classmethod
def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):
# Get dimensions information
dimensions = [dataset.get_dimension(d) for d in dimensions]
kdims = [kdim for kdim in dataset.kdims if kdim not in dimensions]
# Update the kwargs appropriately for Element group types
group_kwargs = {}
group_type = list if group_type == 'raw' else group_type
if issubclass(group_type, Element):
group_kwargs.update(util.get_param_values(dataset))
group_kwargs['kdims'] = kdims
group_kwargs.update(kwargs)
# Find all the keys along supplied dimensions
values = []
for d in dimensions:
if not cls.isscalar(dataset, d, True):
raise ValueError('MultiInterface can only apply groupby '
'on scalar dimensions, %s dimension '
'is not scalar' % d)
vals = cls.values(dataset, d, False, True)
values.append(vals)
values = tuple(values)
# Iterate over the unique entries applying selection masks
from . import Dataset
ds = Dataset(values, dimensions)
keys = (tuple(vals[i] for vals in values) for i in range(len(vals)))
grouped_data = []
for unique_key in util.unique_iterator(keys):
mask = ds.interface.select_mask(ds, dict(zip(dimensions, unique_key)))
selection = [data for data, m in zip(dataset.data, mask) if m]
group_data = group_type(selection, **group_kwargs)
grouped_data.append((unique_key, group_data))
if issubclass(container_type, NdMapping):
with item_check(False), sorted_context(False):
return container_type(grouped_data, kdims=dimensions)
else:
return container_type(grouped_data)
@classmethod
def sample(cls, dataset, samples=[]):
raise NotImplementedError('Sampling operation on subpaths not supported')
[docs] @classmethod
def shape(cls, dataset):
"""
Returns the shape of all subpaths, making it appear like a
single array of concatenated subpaths separated by NaN values.
"""
if not dataset.data:
return (0, len(dataset.dimensions()))
elif cls.geom_type(dataset) != 'Point':
return (len(dataset.data), len(dataset.dimensions()))
rows, cols = 0, 0
ds = cls._inner_dataset_template(dataset)
for d in dataset.data:
ds.data = d
r, cols = ds.interface.shape(ds)
rows += r
return rows, cols
[docs] @classmethod
def length(cls, dataset):
"""
Returns the length of the multi-tabular dataset making it appear
like a single array of concatenated subpaths separated by NaN
values.
"""
if not dataset.data:
return 0
elif cls.geom_type(dataset) != 'Point':
return len(dataset.data)
length = 0
ds = cls._inner_dataset_template(dataset)
for d in dataset.data:
ds.data = d
length += ds.interface.length(ds)
return length
@classmethod
def dtype(cls, dataset, dimension):
if not dataset.data:
return np.dtype('float')
ds = cls._inner_dataset_template(dataset)
return ds.interface.dtype(ds, dimension)
@classmethod
def sort(cls, dataset, by=[], reverse=False):
by = [dataset.get_dimension(d).name for d in by]
if len(by) == 1:
sorting = cls.values(dataset, by[0], False).argsort()
else:
arrays = [dataset.dimension_values(d, False) for d in by]
sorting = util.arglexsort(arrays)
return [dataset.data[s] for s in sorting]
@classmethod
def nonzero(cls, dataset):
return bool(dataset.data)
@classmethod
def reindex(cls, dataset, kdims=None, vdims=None):
new_data = []
ds = cls._inner_dataset_template(dataset)
for d in dataset.data:
ds.data = d
new_data.append(ds.reindex(kdims, vdims))
return new_data
@classmethod
def redim(cls, dataset, dimensions):
if not dataset.data:
return dataset.data
new_data = []
ds = cls._inner_dataset_template(dataset)
for d in dataset.data:
ds.data = d
new_data.append(ds.interface.redim(ds, dimensions))
return new_data
[docs] @classmethod
def values(cls, dataset, dimension, expanded=True, flat=True,
compute=True, keep_index=False):
"""
Returns a single concatenated array of all subpaths separated
by NaN values. If expanded keyword is False an array of arrays
is returned.
"""
if not dataset.data:
return np.array([])
values, scalars = [], []
all_scalar = True
ds = cls._inner_dataset_template(dataset)
geom_type = cls.geom_type(dataset)
is_points = geom_type == 'Point'
is_geom = dimension in dataset.kdims[:2]
for d in dataset.data:
ds.data = d
dvals = ds.interface.values(
ds, dimension, True, flat, compute, keep_index
)
scalar = len(util.unique_array(dvals)) == 1 and not is_geom
gt = ds.interface.geom_type(ds) if hasattr(ds.interface, 'geom_type') else None
if gt is None:
gt = geom_type
if (gt in ('Polygon', 'Ring') and (not scalar or expanded) and
not geom_type == 'Points'):
gvals = ds.array([0, 1])
dvals = ensure_ring(gvals, dvals)
if scalar and not expanded:
dvals = dvals[:1]
all_scalar &= scalar
scalars.append(scalar)
if not len(dvals):
continue
values.append(dvals)
if not is_points and expanded:
values.append([np.NaN])
if not values:
return np.array([])
elif expanded or (all_scalar and not is_geom):
if not is_points and expanded:
values = values[:-1]
return np.concatenate(values) if values else np.array([])
else:
array = np.empty(len(values), dtype=object)
array[:] = [a[0] if s else a for s, a in zip(scalars, values)]
return array
[docs] @classmethod
def split(cls, dataset, start, end, datatype, **kwargs):
"""
Splits a multi-interface Dataset into regular Datasets using
regular tabular interfaces.
"""
objs = []
if datatype is None:
for d in dataset.data[start: end]:
objs.append(dataset.clone([d]))
return objs
elif not dataset.data:
return objs
geom_type = cls.geom_type(dataset)
ds = dataset.clone([])
for d in dataset.data[start:end]:
ds.data = [d]
if datatype == 'array':
obj = ds.array(**kwargs)
elif datatype == 'dataframe':
obj = ds.dframe(**kwargs)
elif datatype in ('columns', 'dictionary'):
if hasattr(ds.interface, 'geom_type'):
gt = ds.interface.geom_type(ds)
if gt is None:
gt = geom_type
if isinstance(ds.data[0], dict):
obj = dict(ds.data[0])
xd, yd = ds.kdims
if (geom_type in ('Polygon', 'Ring') or
xd not in obj or yd not in obj):
obj[xd.name] = ds.interface.values(ds, xd)
obj[yd.name] = ds.interface.values(ds, yd)
else:
obj = ds.columns()
if gt is not None:
obj['geom_type'] = gt
else:
raise ValueError("%s datatype not support" % datatype)
objs.append(obj)
return objs
@classmethod
def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):
if not len(dataset.data):
return dataset.data
elif values is None or util.isscalar(values):
values = [values]*len(dataset.data)
elif not len(values) == len(dataset.data):
raise ValueError('Added dimension values must be scalar or '
'match the length of the data.')
new_data = []
template = cls._inner_dataset_template(dataset)
array_type = template.interface.datatype == 'array'
for d, v in zip(dataset.data, values):
template.data = d
if array_type:
ds = template.clone(template.columns())
else:
ds = template
new_data.append(ds.interface.add_dimension(ds, dimension, dim_pos, v, vdim))
return new_data
@classmethod
def iloc(cls, dataset, index):
rows, cols = index
scalar = np.isscalar(cols) and np.isscalar(rows)
template = cls._inner_dataset_template(dataset)
if cls.geom_type(dataset) != 'Point':
geoms = cls.select_paths(dataset, rows)
new_data = []
for d in geoms:
template.data = d
new_data.append(template.iloc[:, cols].data)
return new_data
count = 0
new_data = []
for d in dataset.data:
template.data = d
length = len(template)
if np.isscalar(rows):
if (count+length) > rows >= count:
data = template.iloc[rows-count, cols]
return data if scalar else [data.data]
elif isinstance(rows, slice):
if rows.start is not None and rows.start > (count+length):
continue
elif rows.stop is not None and rows.stop < count:
break
start = None if rows.start is None else max(rows.start - count, 0)
stop = None if rows.stop is None else min(rows.stop - count, length)
if rows.step is not None:
dataset.param.warning(".iloc step slicing currently not supported for"
"the multi-tabular data format.")
slc = slice(start, stop)
new_data.append(template.iloc[slc, cols].data)
else:
sub_rows = [r-count for r in rows if 0 <= (r-count) < (count+length)]
new = template.iloc[sub_rows, cols]
if len(new):
new_data.append(new.data)
count += length
return new_data
[docs]def ensure_ring(geom, values=None):
"""Ensure the (multi-)geometry forms a ring.
Checks the start- and end-point of each geometry to ensure they
form a ring, if not the start point is inserted at the end point.
If a values array is provided (which must match the geometry in
length) then the insertion will occur on the values instead,
ensuring that they will match the ring geometry.
Args:
geom: 2-D array of geometry coordinates
values: Optional array of values
Returns:
Array where values have been inserted and ring closing indexes
"""
if values is None:
values = geom
breaks = np.where(np.isnan(geom.astype('float')).sum(axis=1))[0]
starts = [0] + list(breaks+1)
ends = list(breaks-1) + [len(geom)-1]
zipped = zip(geom[starts], geom[ends], ends, values[starts])
unpacked = tuple(zip(*[(v, i+1) for s, e, i, v in zipped
if (s!=e).any()]))
if not unpacked:
return values
inserts, inds = unpacked
return np.insert(values, list(inds), list(inserts), axis=0)
Interface.register(MultiInterface)