hub/venv/lib/python3.7/site-packages/trimesh/voxel/runlength.py

698 lines
20 KiB
Python

"""
Numpy encode/decode/utility implementations for run length encodings.
# Run Length Encoded Features
Encoding/decoding functions for run length encoded data.
We include code for two variations:
* run length encoding (RLE)
* binary run length encdoing (BRLE)
RLE stores sequences of repeated values as the value followed by its count, e.g.
```python
dense_to_rle([5, 5, 3, 2, 2, 2, 2, 6]) == [5, 2, 3, 1, 2, 4, 6, 1]
```
i.e. the value `5` is repeated `2` times, then `3` is repeated `1` time, `2` is
repeated `4` times and `6` is repeated `1` time.
BRLE is an optimized form for when the stored values can only be `0` or `1`.
This means we only need to save the counts, and assume the values alternate
(starting at `0`).
```python
dense_to_brle([1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]) == \
[0, 2, 4, 7, 2]
```
i.e. the value zero occurs `0` times, followed by `2` ones, `4` zeros, `7` ones
and `2` zeros.
Sequences with counts exceeding the data type's maximum value have to be
handled carefully. For example, the `uint8` encoding of 300 zeros
(`uint8` has a max value of 255) is:
* RLE: `[0, 255, 0, 45]` (`0` repeated `255` times + `0` repeated `45` times)
* BRLE: `[255, 0, 45, 0]` (`255` zeros + `0` ones + `45` zeros + `0` ones)
This module contains implementations of various RLE/BRLE operations.
"""
import functools
import numpy as np
def brle_length(brle):
"""Optimized implementation of `len(brle_to_dense(brle))`"""
return np.sum(brle)
def rle_length(rle):
"""Optimized implementation of `len(rle_to_dense(rle_to_brle(rle)))`"""
return np.sum(rle[1::2])
def rle_to_brle(rle, dtype=None):
"""
Convert run length encoded (RLE) value/counts to BRLE.
RLE data is stored in a rank 1 array with each pair giving:
(value, count)
e.g. the RLE encoding of [4, 4, 4, 1, 1, 6] is [4, 3, 1, 2, 6, 1].
Parameters
----------
rle : (n,) int
Run length encoded data
Returns
----------
equivalent binary run length encoding. a list if dtype is None,
otherwise brle_to_brle is called on that list before returning.
Raises
----------
ValueError
If any of the even counts of `rle` are not zero or 1.
"""
curr_val = 0
out = [0]
acc = 0
for value, count in np.reshape(rle, (-1, 2)):
acc += count
if value not in (0, 1):
raise ValueError(
"Invalid run length encoding for conversion to BRLE")
if value == curr_val:
out[-1] += count
else:
out.append(int(count))
curr_val = value
if len(out) % 2:
out.append(0)
if dtype is not None:
out = brle_to_brle(out, dtype=dtype)
return out
def brle_logical_not(brle):
"""
Get the BRLE encoding of the `logical_not`ed dense form of `brle`.
Equivalent to `dense_to_brle(np.logical_not(brle_to_dense(brle)))` but
highly optimized - just pads brle with a 0 on each end (or strips is
existing endpoints are both zero).
Parameters
----------
brle: rank 1 int array of binary run length encoded data
Returns
----------
rank 1 int array of binary run length encoded data corresponding to
element-wise not of the input.
"""
if brle[0] or brle[-1]:
return np.pad(brle, [1, 1], mode='constant')
else:
return brle[1:-1]
def merge_brle_lengths(lengths):
"""Inverse of split_long_brle_lengths."""
if len(lengths) == 0:
return []
out = [int(lengths[0])]
accumulating = False
for length in lengths[1:]:
if accumulating:
out[-1] += length
accumulating = False
else:
if length == 0:
accumulating = True
else:
out.append(int(length))
return out
def split_long_brle_lengths(lengths, dtype=np.int64):
"""
Split lengths that exceed max dtype value.
Lengths `l` are converted into [max_val, 0] * l // max_val + [l % max_val]
e.g. for dtype=np.uint8 (max_value == 255)
```
split_long_brle_lengths([600, 300, 2, 6], np.uint8) == \
[255, 0, 255, 0, 90, 255, 0, 45, 2, 6]
```
"""
lengths = np.asarray(lengths)
max_val = np.iinfo(dtype).max
bad_length_mask = lengths > max_val
if np.any(bad_length_mask):
# there are some bad lengths
nl = len(lengths)
repeats = np.asarray(lengths) // max_val
remainders = (lengths % max_val).astype(dtype)
lengths = np.concatenate(
[np.array([max_val, 0] * repeat + [remainder], dtype=dtype)
for repeat, remainder in zip(repeats, remainders)])
lengths = lengths.reshape((np.sum(repeats) * 2 + nl,)).astype(dtype)
return lengths
elif lengths.dtype != dtype:
return lengths.astype(dtype)
else:
return lengths
def dense_to_brle(dense_data, dtype=np.int64):
"""
Get the binary run length encoding of `dense_data`.
Parameters
----------
dense_data: rank 1 bool array of data to encode.
dtype: numpy int type.
Returns
----------
Binary run length encoded rank 1 array of dtype `dtype`.
Raises
----------
ValuError if dense_data is not a rank 1 bool array.
"""
if dense_data.dtype != np.bool:
raise ValueError("`dense_data` must be bool")
if len(dense_data.shape) != 1:
raise ValueError("`dense_data` must be rank 1.")
n = len(dense_data)
starts = np.r_[0, np.flatnonzero(dense_data[1:] != dense_data[:-1]) + 1]
lengths = np.diff(np.r_[starts, n])
lengths = split_long_brle_lengths(lengths, dtype=dtype)
if dense_data[0]:
lengths = np.pad(lengths, [1, 0], mode='constant')
return lengths
_ft = np.array([False, True], dtype=np.bool)
def brle_to_dense(brle_data, vals=None):
"""Decode binary run length encoded data to dense.
Parameters
----------
brle_data: BRLE counts of False/True values
vals: if not `None`, a length 2 array/list/tuple with False/True substitute
values, e.g. brle_to_dense([2, 3, 1, 0], [7, 9]) == [7, 7, 9, 9, 9, 7]
Returns
----------
rank 1 dense data of dtype `bool if vals is None else vals.dtype`
Raises
----------
ValueError if vals it not None and shape is not (2,)
"""
if vals is None:
vals = _ft
else:
vals = np.asarray(vals)
if vals.shape != (2,):
raise ValueError("vals.shape must be (2,), got %s" % (vals.shape))
ft = np.repeat(
_ft[np.newaxis, :], (len(brle_data) + 1) // 2, axis=0).flatten()
return np.repeat(ft[:len(brle_data)], brle_data).flatten()
def rle_to_dense(rle_data, dtype=np.int64):
"""Get the dense decoding of the associated run length encoded data."""
values, counts = np.split(np.reshape(rle_data, (-1, 2)), 2, axis=-1)
if dtype is not None:
values = np.asanyarray(values, dtype=dtype)
try:
result = np.repeat(np.squeeze(values, axis=-1),
np.squeeze(counts, axis=-1))
except TypeError:
# on windows it sometimes fails to cast data type
result = np.repeat(np.squeeze(values.astype(np.int64), axis=-1),
np.squeeze(counts.astype(np.int64), axis=-1))
return result
def dense_to_rle(dense_data, dtype=np.int64):
"""Get run length encoding of the provided dense data."""
n = len(dense_data)
starts = np.r_[0, np.flatnonzero(dense_data[1:] != dense_data[:-1]) + 1]
lengths = np.diff(np.r_[starts, n])
values = dense_data[starts]
values, lengths = split_long_rle_lengths(values, lengths, dtype=dtype)
out = np.stack((values, lengths), axis=1)
return out.flatten()
def split_long_rle_lengths(values, lengths, dtype=np.int64):
"""
Split long lengths in the associated run length encoding.
e.g.
```python
split_long_rle_lengths([5, 300, 2, 12], np.uint8) == [5, 255, 5, 45, 2, 12]
```
Parameters
----------
values: values column of run length encoding, or `rle[::2]`
lengths: counts in run length encoding, or `rle[1::2]`
dtype: numpy data type indicating the maximum value.
Returns
----------
values, lengths associated with the appropriate splits. `lengths` will be
of type `dtype`, while `values` will be the same as the value passed in.
"""
max_length = np.iinfo(dtype).max
lengths = np.asarray(lengths)
repeats = lengths // max_length
if np.any(repeats):
repeats += 1
remainder = lengths % max_length
values = np.repeat(values, repeats)
lengths = np.empty(len(repeats), dtype=dtype)
lengths.fill(max_length)
lengths = np.repeat(lengths, repeats)
lengths[np.cumsum(repeats) - 1] = remainder
elif lengths.dtype != dtype:
lengths = lengths.astype(dtype)
return values, lengths
def merge_rle_lengths(values, lengths):
"""Inverse of split_long_rle_lengths except returns normal python lists."""
ret_values = []
ret_lengths = []
curr = None
for value, length in zip(values, lengths):
if length == 0:
continue
if value == curr:
ret_lengths[-1] += length
else:
curr = value
ret_lengths.append(int(length))
ret_values.append(value)
return ret_values, ret_lengths
def brle_to_rle(brle, dtype=np.int64):
if len(brle) % 2 == 1:
brle = np.concatenate([brle, [0]])
lengths = brle
values = np.tile(_ft, len(brle) // 2)
return rle_to_rle(
np.stack((values, lengths), axis=1).flatten(), dtype=dtype)
def brle_to_brle(brle, dtype=np.int64):
"""
Almost the identity function.
Checks for possible merges and required splits.
"""
return split_long_brle_lengths(merge_brle_lengths(brle), dtype=dtype)
def rle_to_rle(rle, dtype=np.int64):
"""
Almost the identity function.
Checks for possible merges and required splits.
"""
values, lengths = np.reshape(rle, (-1, 2)).T
values, lengths = merge_rle_lengths(values, lengths)
values, lengths = split_long_rle_lengths(values, lengths, dtype=dtype)
return np.stack((values, lengths), axis=1).flatten()
def _unsorted_gatherer(indices, sorted_gather_fn):
if not isinstance(indices, np.ndarray):
indices = np.array(indices, copy=False)
order = np.argsort(indices)
ordered_indices = indices[order]
def f(data, dtype=None):
result = np.empty(len(order), dtype=dtype or getattr(data, 'dtype', None))
result[order] = tuple(sorted_gather_fn(data, ordered_indices))
return result
return f
def sorted_rle_gather_1d(rle_data, ordered_indices):
"""
Gather brle_data at ordered_indices.
This is equivalent to `rle_to_dense(brle_data)[ordered_indices]` but avoids
the decoding.
Parameters
----------
brle_data: iterable of run-length-encoded data.
ordered_indices: iterable of ints in ascending order.
Returns
----------
`brle_data` iterable of values at the dense indices, same length as
ordered indices.
"""
data_iter = iter(rle_data)
index_iter = iter(ordered_indices)
index = next(index_iter)
start = 0
while True:
while start <= index:
try:
value = next(data_iter)
start += next(data_iter)
except StopIteration:
raise IndexError(
'Index %d out of range of raw_values length %d'
% (index, start))
try:
while index < start:
yield value
index = next(index_iter)
except StopIteration:
break
def rle_mask(rle_data, mask):
"""
Perform masking of the input run-length data.
Parameters
----------
rle_data: iterable of run length encoded data
mask: iterable of bools corresponding to the dense mask.
Returns
----------
iterable of dense values of rle_data wherever mask is True.
"""
data_iter = iter(rle_data)
mask_iter = iter(mask)
while True:
try:
value = next(data_iter)
count = next(data_iter)
except StopIteration:
break
for _ in range(count):
m = next(mask_iter)
if m:
yield value
def brle_mask(rle_data, mask):
"""
Perform masking of the input binary run-length data.
Parameters
----------
brle_data: iterable of binary run length encoded data
mask: iterable of bools corresponding to the dense mask.
Returns
----------
iterable dense values of brle_data wherever mask is True.
"""
data_iter = iter(rle_data)
mask_iter = iter(mask)
value = True
while True:
try:
value = not value
count = next(data_iter)
except StopIteration:
break
for _ in range(count):
m = next(mask_iter)
if m:
yield value
def rle_gatherer_1d(indices):
"""
Get a gather function at the given indices.
Because gathering on RLE data requires sorting, for instances where
gathering at the same indices on different RLE data this can save the
sorting process.
If only gathering on a single RLE iterable, use `rle_gather_1d`.
Parameters
----------
indices: iterable of integers
Returns
----------
gather function, mapping `(rle_data, dtype=None) -> values`.
`values` will have the same length as `indices` and dtype provided,
or rle_data.dtype if no dtype is provided.
"""
return _unsorted_gatherer(indices, sorted_rle_gather_1d)
def rle_gather_1d(rle_data, indices, dtype=None):
"""
Gather RLE data values at the provided dense indices.
This is equivalent to `rle_to_dense(rle_data)[indices]` but the
implementation does not require the construction of the dense array.
If indices is known to be in order, use `sorted_gather_1d`.
Parameters
----------
rle_data: run length encoded data
indices: dense indices
dtype: numpy dtype. If not provided, uses rle_data.dtype
Returns
----------
numpy array, dense data at indices, same length as indices and dtype as
rle_data
"""
return rle_gatherer_1d(indices)(rle_data, dtype=dtype)
def sorted_brle_gather_1d(brle_data, ordered_indices):
"""
Gather brle_data at ordered_indices.
This is equivalent to `brle_to_dense(brle_data)[ordered_indices]` but
avoids the decoding.
Parameters
----------
raw_data: iterable of run-length-encoded data.
ordered_indices: iterable of ints in ascending order.
Returns
----------
`raw_data` iterable of values at the dense indices, same length as
ordered indices.
"""
data_iter = iter(brle_data)
index_iter = iter(ordered_indices)
index = next(index_iter)
start = 0
value = True
while True:
while start <= index:
try:
value = not value
start += next(data_iter)
except StopIteration:
raise IndexError(
'Index %d out of range of raw_values length %d'
% (index, start))
try:
while index < start:
yield value
index = next(index_iter)
except StopIteration:
break
def brle_gatherer_1d(indices):
"""
Get a gather function at the given indices.
Because gathering on BRLE data requires sorting, for instances where
gathering at the same indices on different RLE data this can save the
sorting process.
If only gathering on a single RLE iterable, use `brle_gather_1d`.
Parameters
----------
indices: iterable of integers
Returns
----------
gather function, mapping `(rle_data, dtype=None) -> values`.
`values` will have the same length as `indices` and dtype provided,
or rle_data.dtype if no dtype is provided.
"""
return functools.partial(
_unsorted_gatherer(indices, sorted_brle_gather_1d), dtype=np.bool)
def brle_gather_1d(brle_data, indices):
"""
Gather BRLE data values at the provided dense indices.
This is equivalent to `rle_to_dense(rle_data)[indices]` but the
implementation does not require the construction of the dense array.
If indices is known to be in order, use `sorted_brle_gather_1d`.
Parameters
----------
rle_data: run length encoded data
indices: dense indices
Returns
----------
numpy array, dense data at indices, same length as indices and dtype as
rle_data
"""
return brle_gatherer_1d(indices)(brle_data)
def brle_reverse(brle_data):
"""Equivalent to dense_to_brle(brle_to_dense(brle_data)[-1::-1])."""
if len(brle_data) % 2 == 0:
brle_data = np.concatenate([brle_data, [0]], axis=0)
end = -1 if brle_data[-1] == 0 else None
return brle_data[-1:end:-1]
def rle_reverse(rle_data):
"""Get the rle encoding of the reversed dense array."""
if not isinstance(rle_data, np.ndarray):
rle_data = np.array(rle_data, copy=False)
rle_data = np.reshape(rle_data, (-1, 2))
rle_data = rle_data[-1::-1]
return np.reshape(rle_data, (-1,))
def rle_to_sparse(rle_data):
"""Get dense indices associated with non-zeros."""
indices = []
values = []
it = iter(rle_data)
index = 0
try:
while True:
value = next(it)
counts = next(it)
end = index + counts
if value:
indices.append(np.arange(index, end, dtype=np.int64))
values.append(np.repeat(value, counts))
index = end
except StopIteration:
pass
if len(indices) == 0:
assert(len(values)) == 0
return indices, values
indices = np.concatenate(indices)
values = np.concatenate(values)
return indices, values
def brle_to_sparse(brle_data, dtype=np.int64):
ends = np.cumsum(brle_data)
indices = [np.arange(s, e, dtype=dtype) for s, e in
zip(ends[::2], ends[1::2])]
return np.concatenate(indices)
def rle_strip(rle_data):
"""
Remove leading and trailing zeros.
Parameters
----------
rle_data: run length encoded data
Returns
----------
(stripped_rle_data, padding)
stripped_rle_data: rle data without any leading or trailing zeros
padding: 2-element dense padding
"""
rle_data = np.reshape(rle_data, (-1, 2))
start = 0
for i, (val, count) in enumerate(rle_data):
if val and count > 0:
break
else:
start += count
end = 0
for j, (val, count) in enumerate(rle_data[::-1]):
if val and count > 0:
break
else:
end += count
rle_data = rle_data[i:None if j == 0 else -j].reshape((-1,))
return rle_data, (start, end)
def brle_strip(brle_data):
"""
Remove leading and trailing zeros.
Parameters
----------
brle_data: binary run length encoded data.
Returns
----------
(stripped_brle_data, padding)
stripped_brle_data: rle data without any leading or trailing zeros
padding: 2-element dense padding
"""
start = 0
val = True
for i, count in enumerate(brle_data):
val = not val
if val and count > 0:
break
else:
start += count
end = 0
val = bool(len(brle_data) % 2)
for j, count in enumerate(brle_data[::-1]):
val = not val
if val and count > 0:
break
else:
end += count
brle_data = brle_data[i:None if j == 0 else -j]
brle_data = np.concatenate([[0], brle_data])
return brle_data, (start, end)