hub/venv/lib/python3.7/site-packages/trimesh/grouping.py

826 lines
25 KiB
Python

"""
grouping.py
-------------
Functions for grouping values and rows.
"""
import numpy as np
from . import util
from .constants import log, tol
try:
from scipy.spatial import cKDTree
except BaseException as E:
# wrapping just ImportError fails in some cases
# will raise the error when someone tries to use KDtree
from . import exceptions
cKDTree = exceptions.closure(E)
def merge_vertices(mesh,
merge_tex=False,
merge_norm=False,
digits_vertex=None,
digits_norm=2,
digits_uv=4,
**kwargs):
"""
Removes duplicate vertices. By default, based on integer hashes of
each row.
Parameters
-------------
mesh : Trimesh object
Mesh to merge vertices on
merge_tex : bool
If True textured meshes with UV coordinates will
have vertices merged regardless of UV coordinates
merge_norm : bool
If True, meshes with vertex normals will have
vertices merged ignoring different normals
digits_vertex : None or int
Number of digits to consider for vertex position
digits_norm : int
Number of digits to consider for unit normals
digits_uv : int
Number of digits to consider for UV coordinates
"""
# use tol.merge if digit precision not passed
if not isinstance(digits_vertex, int):
digits_vertex = util.decimal_to_digits(tol.merge)
# if we have a ton of unreferenced vertices it will
# make the unique_rows call super slow so cull first
if hasattr(mesh, 'faces') and len(mesh.faces) > 0:
referenced = np.zeros(len(mesh.vertices), dtype=np.bool)
referenced[mesh.faces] = True
else:
# this is used for geometry without faces
referenced = np.ones(len(mesh.vertices), dtype=np.bool)
# collect vertex attributes into sequence we can stack
stacked = [mesh.vertices * (10 ** digits_vertex)]
# UV texture visuals require us to update the
# vertices and normals differently
if (not merge_tex and
mesh.visual.defined and
mesh.visual.kind == 'texture' and
mesh.visual.uv is not None and
len(mesh.visual.uv) == len(mesh.vertices)):
# get an array with vertices and UV coordinates
# converted to integers at requested precision
stacked.append(mesh.visual.uv * (10 ** digits_uv))
# check to see if we have vertex normals
normals = mesh._cache['vertex_normals']
if not merge_norm and np.shape(normals) == mesh.vertices.shape:
stacked.append(normals * (10 ** digits_norm))
# stack collected vertex properties and round to integer
stacked = np.column_stack(stacked).round().astype(np.int64)
# check unique rows of referenced vertices
u, i = unique_rows(stacked[referenced])
# construct an inverse using the subset
inverse = np.zeros(len(mesh.vertices), dtype=np.int64)
inverse[referenced] = i
# get the vertex mask
mask = np.nonzero(referenced)[0][u]
# run the update including normals and UV coordinates
mesh.update_vertices(mask=mask, inverse=inverse)
def group(values, min_len=0, max_len=np.inf):
"""
Return the indices of values that are identical
Parameters
----------
values : (n,) int
Values to group
min_len : int
The shortest group allowed
All groups will have len >= min_length
max_len : int
The longest group allowed
All groups will have len <= max_length
Returns
----------
groups : sequence
Contains indices to form groups
IE [0,1,0,1] returns [[0,2], [1,3]]
"""
original = np.asanyarray(values)
# save the sorted order and then apply it
order = original.argsort()
values = original[order]
# find the indexes which are duplicates
if values.dtype.kind == 'f':
# for floats in a sorted array, neighbors are not duplicates
# if the difference between them is greater than approximate zero
nondupe = np.greater(np.abs(np.diff(values)), tol.zero)
else:
# for ints and strings we can check exact non- equality
# for all other types this will only work if they defined
# an __eq__
nondupe = values[1:] != values[:-1]
dupe_idx = np.append(0, np.nonzero(nondupe)[0] + 1)
dupe_len = np.diff(np.concatenate((dupe_idx, [len(values)])))
dupe_ok = np.logical_and(np.greater_equal(dupe_len, min_len),
np.less_equal(dupe_len, max_len))
groups = [order[i:(i + j)]
for i, j in zip(dupe_idx[dupe_ok],
dupe_len[dupe_ok])]
groups = np.array(groups)
return groups
def hashable_rows(data, digits=None):
"""
We turn our array into integers based on the precision
given by digits and then put them in a hashable format.
Parameters
---------
data : (n, m) array
Input data
digits : int or None
How many digits to add to hash if data is floating point
If None, tol.merge will be used
Returns
---------
hashable : (n,) array
Custom data type which can be sorted
or used as hash keys
"""
# if there is no data return immediately
if len(data) == 0:
return np.array([])
# get array as integer to precision we care about
as_int = float_to_int(data, digits=digits)
# if it is flat integers already, return
if len(as_int.shape) == 1:
return as_int
# if array is 2D and smallish, we can try bitbanging
# this is significantly faster than the custom dtype
if len(as_int.shape) == 2 and as_int.shape[1] <= 4:
# time for some righteous bitbanging
# can we pack the whole row into a single 64 bit integer
precision = int(np.floor(64 / as_int.shape[1]))
# if the max value is less than precision we can do this
if np.abs(as_int).max() < 2**(precision - 1):
# the resulting package
hashable = np.zeros(len(as_int), dtype=np.int64)
# loop through each column and bitwise xor to combine
# make sure as_int is int64 otherwise bit offset won't work
for offset, column in enumerate(as_int.astype(np.int64).T):
# will modify hashable in place
np.bitwise_xor(hashable,
column << (offset * precision),
out=hashable)
return hashable
# reshape array into magical data type that is weird but hashable
dtype = np.dtype((np.void, as_int.dtype.itemsize * as_int.shape[1]))
# make sure result is contiguous and flat
hashable = np.ascontiguousarray(as_int).view(dtype).reshape(-1)
return hashable
def float_to_int(data, digits=None, dtype=np.int32):
"""
Given a numpy array of float/bool/int, return as integers.
Parameters
-------------
data : (n, d) float, int, or bool
Input data
digits : float or int
Precision for float conversion
dtype : numpy.dtype
What datatype should result be returned as
Returns
-------------
as_int : (n, d) int
Data as integers
"""
# convert to any numpy array
data = np.asanyarray(data)
# if data is already an integer or boolean we're done
# if the data is empty we are also done
if data.dtype.kind in 'ib' or data.size == 0:
return data.astype(dtype)
# populate digits from kwargs
if digits is None:
digits = util.decimal_to_digits(tol.merge)
elif isinstance(digits, float) or isinstance(digits, np.float):
digits = util.decimal_to_digits(digits)
elif not (isinstance(digits, int) or isinstance(digits, np.integer)):
log.warning('Digits were passed as %s!', digits.__class__.__name__)
raise ValueError('Digits must be None, int, or float!')
# data is float so convert to large integers
data_max = np.abs(data).max() * 10**digits
# ignore passed dtype if we have something large
dtype = [np.int32, np.int64][int(data_max > 2**31)]
# multiply by requested power of ten
# then subtract small epsilon to avoid "go either way" rounding
# then do the rounding and convert to integer
as_int = np.round((data * 10 ** digits) - 1e-6).astype(dtype)
return as_int
def unique_ordered(data):
"""
Returns the same as np.unique, but ordered as per the
first occurrence of the unique value in data.
Examples
---------
In [1]: a = [0, 3, 3, 4, 1, 3, 0, 3, 2, 1]
In [2]: np.unique(a)
Out[2]: array([0, 1, 2, 3, 4])
In [3]: trimesh.grouping.unique_ordered(a)
Out[3]: array([0, 3, 4, 1, 2])
"""
data = np.asanyarray(data)
order = np.sort(np.unique(data, return_index=True)[1])
result = data[order]
return result
def unique_bincount(values,
minlength=0,
return_inverse=False,
return_counts=False):
"""
For arrays of integers find unique values using bin counting.
Roughly 10x faster for correct input than np.unique
Parameters
--------------
values : (n,) int
Values to find unique members of
minlength : int
Maximum value that will occur in values (values.max())
return_inverse : bool
If True, return an inverse such that unique[inverse] == values
return_counts : bool
If True, also return the number of times each
unique item appears in values
Returns
------------
unique : (m,) int
Unique values in original array
inverse : (n,) int, optional
An array such that unique[inverse] == values
Only returned if return_inverse is True
counts : (m,) int, optional
An array holding the counts of each unique item in values
Only returned if return_counts is True
"""
values = np.asanyarray(values)
if len(values.shape) != 1 or values.dtype.kind != 'i':
raise ValueError('input must be 1D integers!')
try:
# count the number of occurrences of each value
counts = np.bincount(values, minlength=minlength)
except TypeError:
# casting failed on 32 bit windows
log.warning('casting failed, falling back!')
# fall back to numpy unique
return np.unique(values,
return_inverse=return_inverse,
return_counts=return_counts)
# which bins are occupied at all
# counts are integers so this works
unique_bin = counts.astype(np.bool)
# which values are unique
# indexes correspond to original values
unique = np.where(unique_bin)[0]
ret = (unique,)
if return_inverse:
# find the inverse to reconstruct original
inverse = (np.cumsum(unique_bin) - 1)[values]
ret += (inverse,)
if return_counts:
unique_counts = counts[unique]
ret += (unique_counts,)
if len(ret) == 1:
return ret[0]
return ret
def merge_runs(data, digits=None):
"""
Merge duplicate sequential values. This differs from unique_ordered
in that values can occur in multiple places in the sequence, but
only consecutive repeats are removed
Parameters
-----------
data: (n,) float or int
Returns
--------
merged: (m,) float or int
Examples
---------
In [1]: a
Out[1]:
array([-1, -1, -1, 0, 0, 1, 1, 2, 0,
3, 3, 4, 4, 5, 5, 6, 6, 7,
7, 8, 8, 9, 9, 9])
In [2]: trimesh.grouping.merge_runs(a)
Out[2]: array([-1, 0, 1, 2, 0, 3, 4, 5, 6, 7, 8, 9])
"""
data = np.asanyarray(data)
mask = np.abs(np.diff(data)) > tol.merge
mask = np.concatenate((np.array([True]), mask))
return data[mask]
def unique_float(data,
return_index=False,
return_inverse=False,
digits=None):
"""
Identical to the numpy.unique command, except evaluates floating point
numbers, using a specified number of digits.
If digits isn't specified, the library default TOL_MERGE will be used.
"""
data = np.asanyarray(data)
as_int = float_to_int(data, digits)
_junk, unique, inverse = np.unique(as_int,
return_index=True,
return_inverse=True)
if (not return_index) and (not return_inverse):
return data[unique]
result = [data[unique]]
if return_index:
result.append(unique)
if return_inverse:
result.append(inverse)
return tuple(result)
def unique_rows(data, digits=None):
"""
Returns indices of unique rows. It will return the
first occurrence of a row that is duplicated:
[[1,2], [3,4], [1,2]] will return [0,1]
Parameters
---------
data : (n, m) array
Floating point data
digits : int or None
How many digits to consider
Returns
--------
unique : (j,) int
Index in data which is a unique row
inverse : (n,) int
Array to reconstruct original
Example: unique[inverse] == data
"""
hashes = hashable_rows(data, digits=digits)
garbage, unique, inverse = np.unique(
hashes,
return_index=True,
return_inverse=True)
return unique, inverse
def unique_value_in_row(data, unique=None):
"""
For a 2D array of integers find the position of a
value in each row which only occurs once.
If there are more than one value per row which
occur once, the last one is returned.
Parameters
----------
data : (n, d) int
Data to check values
unique : (m,) int
List of unique values contained in data.
Generated from np.unique if not passed
Returns
---------
result : (n, d) bool
With one or zero True values per row.
Examples
-------------------------------------
In [0]: r = np.array([[-1, 1, 1],
[-1, 1, -1],
[-1, 1, 1],
[-1, 1, -1],
[-1, 1, -1]], dtype=np.int8)
In [1]: unique_value_in_row(r)
Out[1]:
array([[ True, False, False],
[False, True, False],
[ True, False, False],
[False, True, False],
[False, True, False]], dtype=bool)
In [2]: unique_value_in_row(r).sum(axis=1)
Out[2]: array([1, 1, 1, 1, 1])
In [3]: r[unique_value_in_row(r)]
Out[3]: array([-1, 1, -1, 1, 1], dtype=int8)
"""
if unique is None:
unique = np.unique(data)
data = np.asanyarray(data)
result = np.zeros_like(data, dtype=np.bool, subok=False)
for value in unique:
test = np.equal(data, value)
test_ok = test.sum(axis=1) == 1
result[test_ok] = test[test_ok]
return result
def group_rows(data, require_count=None, digits=None):
"""
Returns index groups of duplicate rows, for example:
[[1,2], [3,4], [1,2]] will return [[0,2], [1]]
Note that using require_count allows numpy advanced
indexing to be used in place of looping and
checking hashes and is ~10x faster.
Parameters
----------
data : (n, m) array
Data to group
require_count : None or int
Only return groups of a specified length, eg:
require_count = 2
[[1,2], [3,4], [1,2]] will return [[0,2]]
digits : None or int
If data is floating point how many decimals
to consider, or calculated from tol.merge
Returns
----------
groups : sequence (*,) int
Indices from in indicating identical rows.
"""
def group_dict():
"""
Simple hash table based grouping.
The loop and appends make this rather slow on
large arrays but it works on irregular groups.
"""
observed = dict()
hashable = hashable_rows(data, digits=digits)
for index, key in enumerate(hashable):
key_string = key.tostring()
if key_string in observed:
observed[key_string].append(index)
else:
observed[key_string] = [index]
return np.array(list(observed.values()))
def group_slice():
# create a representation of the rows that can be sorted
hashable = hashable_rows(data, digits=digits)
# record the order of the rows so we can get the original indices back
# later
order = np.argsort(hashable)
# but for now, we want our hashes sorted
hashable = hashable[order]
# this is checking each neighbour for equality, example:
# example: hashable = [1, 1, 1]; dupe = [0, 0]
dupe = hashable[1:] != hashable[:-1]
# we want the first index of a group, so we can slice from that location
# example: hashable = [0 1 1]; dupe = [1,0]; dupe_idx = [0,1]
dupe_idx = np.append(0, np.nonzero(dupe)[0] + 1)
# if you wanted to use this one function to deal with non- regular groups
# you could use: np.array_split(dupe_idx)
# this is roughly 3x slower than using the group_dict method above.
start_ok = np.diff(
np.concatenate((dupe_idx, [len(hashable)]))) == require_count
groups = np.tile(dupe_idx[start_ok].reshape((-1, 1)),
require_count) + np.arange(require_count)
groups_idx = order[groups]
if require_count == 1:
return groups_idx.reshape(-1)
return groups_idx
if require_count is None:
return group_dict()
else:
return group_slice()
def boolean_rows(a, b, operation=np.intersect1d):
"""
Find the rows in two arrays which occur in both rows.
Parameters
---------
a: (n, d) int
Array with row vectors
b: (m, d) int
Array with row vectors
operation : function
Numpy boolean set operation function:
-np.intersect1d
-np.setdiff1d
Returns
--------
shared: (p, d) array containing rows in both a and b
"""
a = np.asanyarray(a, dtype=np.int64)
b = np.asanyarray(b, dtype=np.int64)
av = a.view([('', a.dtype)] * a.shape[1]).ravel()
bv = b.view([('', b.dtype)] * b.shape[1]).ravel()
shared = operation(av, bv).view(a.dtype).reshape(-1, a.shape[1])
return shared
def group_vectors(vectors,
angle=1e-4,
include_negative=False):
"""
Group vectors based on an angle tolerance, with the option to
include negative vectors.
Parameters
-----------
vectors : (n,3) float
Direction vector
angle : float
Group vectors closer than this angle in radians
include_negative : bool
If True consider the same:
[0,0,1] and [0,0,-1]
Returns
------------
new_vectors : (m,3) float
Direction vector
groups : (m,) sequence of int
Indices of source vectors
"""
vectors = np.asanyarray(vectors, dtype=np.float64)
angle = float(angle)
if include_negative:
vectors = util.vector_hemisphere(vectors)
spherical = util.vector_to_spherical(vectors)
angles, groups = group_distance(spherical, angle)
new_vectors = util.spherical_to_vector(angles)
return new_vectors, groups
def group_distance(values, distance):
"""
Find groups of points which have neighbours closer than radius,
where no two points in a group are farther than distance apart.
Parameters
---------
points : (n, d) float
Points of dimension d
distance : float
Max distance between points in a cluster
Returns
----------
unique : (m, d) float
Median value of each group
groups : (m) sequence of int
Indexes of points that make up a group
"""
values = np.asanyarray(values,
dtype=np.float64)
consumed = np.zeros(len(values),
dtype=np.bool)
tree = cKDTree(values)
# (n, d) set of values that are unique
unique = []
# (n) sequence of indices in values
groups = []
for index, value in enumerate(values):
if consumed[index]:
continue
group = np.array(tree.query_ball_point(value, distance),
dtype=np.int)
consumed[group] = True
unique.append(np.median(values[group], axis=0))
groups.append(group)
return np.array(unique), np.array(groups)
def clusters(points, radius):
"""
Find clusters of points which have neighbours closer than radius
Parameters
---------
points : (n, d) float
Points of dimension d
radius : float
Max distance between points in a cluster
Returns
----------
groups : (m,) sequence of int
Indices of points in a cluster
"""
from . import graph
tree = cKDTree(points)
# some versions return pairs as a set of tuples
pairs = tree.query_pairs(r=radius, output_type='ndarray')
# group connected components
groups = graph.connected_components(pairs)
return groups
def blocks(data,
min_len=2,
max_len=np.inf,
wrap=False,
digits=None,
only_nonzero=False):
"""
Find the indices in an array of contiguous blocks
of equal values.
Parameters
------------
data : (n,) array
Data to find blocks on
min_len : int
The minimum length group to be returned
max_len : int
The maximum length group to be retuurned
wrap : bool
Combine blocks on both ends of 1D array
digits : None or int
If dealing with floats how many digits to consider
only_nonzero : bool
Only return blocks of non- zero values
Returns
---------
blocks : (m) sequence of (*,) int
Indices referencing data
"""
data = float_to_int(data, digits=digits)
# find the inflection points
# AKA locations where the array goes from True to False.
infl = np.concatenate(([0],
np.nonzero(np.diff(data))[0] + 1,
[len(data)]))
infl_len = np.diff(infl)
# check the length of each group
infl_ok = np.logical_and(infl_len >= min_len,
infl_len <= max_len)
if only_nonzero:
# check to make sure the values of each contiguous block
# are True by checking the first value of each block
infl_ok = np.logical_and(
infl_ok, data[infl[:-1]])
# inflate start/end indexes into full ranges of values
blocks = [np.arange(infl[i], infl[i + 1])
for i, ok in enumerate(infl_ok) if ok]
if wrap:
# wrap only matters if first and last points are the same
if data[0] != data[-1]:
return blocks
# if we are only grouping nonzero things and
# the first and last point are zero we can exit
if only_nonzero and not bool(data[0]):
return blocks
# so now first point equals last point, so the cases are:
# - first and last point are in a block: combine two blocks
# - first OR last point are in block: add other point to block
# - neither are in a block: check if combined is eligible block
# first point is in a block
first = len(blocks) > 0 and blocks[0][0] == 0
# last point is in a block
last = len(blocks) > 0 and blocks[-1][-1] == (len(data) - 1)
# CASE: first and last point are BOTH in block: combine blocks
if first and last:
blocks[0] = np.append(blocks[-1], blocks[0])
blocks.pop()
else:
# combined length
combined = infl_len[0] + infl_len[-1]
# exit if lengths aren't OK
if combined < min_len or combined > max_len:
return blocks
# new block combines both ends
new_block = np.append(np.arange(infl[-2], infl[-1]),
np.arange(infl[0], infl[1]))
# we are in a first OR last situation now
if first:
# first was already in a block so replace it with combined
blocks[0] = new_block
elif last:
# last was already in a block so replace with superset
blocks[-1] = new_block
else:
# both are false
# combined length generated new block
blocks.append(new_block)
return blocks
def group_min(groups, data):
"""
Given a list of groups find the minimum element of data
within each group
Parameters
-----------
groups : (n,) sequence of (q,) int
Indexes of each group corresponding to each element in data
data : (m,)
The data that groups indexes reference
Returns
-----------
minimums : (n,)
Minimum value of data per group
"""
# sort with major key groups, minor key data
order = np.lexsort((data, groups))
groups = groups[order] # this is only needed if groups is unsorted
data = data[order]
# construct an index which marks borders between groups
index = np.empty(len(groups), 'bool')
index[0] = True
index[1:] = groups[1:] != groups[:-1]
return data[index]