""" grouping.py ------------- Functions for grouping values and rows. """ import numpy as np from . import util from .constants import log, tol try: from scipy.spatial import cKDTree except BaseException as E: # wrapping just ImportError fails in some cases # will raise the error when someone tries to use KDtree from . import exceptions cKDTree = exceptions.closure(E) def merge_vertices(mesh, merge_tex=False, merge_norm=False, digits_vertex=None, digits_norm=2, digits_uv=4, **kwargs): """ Removes duplicate vertices. By default, based on integer hashes of each row. Parameters ------------- mesh : Trimesh object Mesh to merge vertices on merge_tex : bool If True textured meshes with UV coordinates will have vertices merged regardless of UV coordinates merge_norm : bool If True, meshes with vertex normals will have vertices merged ignoring different normals digits_vertex : None or int Number of digits to consider for vertex position digits_norm : int Number of digits to consider for unit normals digits_uv : int Number of digits to consider for UV coordinates """ # use tol.merge if digit precision not passed if not isinstance(digits_vertex, int): digits_vertex = util.decimal_to_digits(tol.merge) # if we have a ton of unreferenced vertices it will # make the unique_rows call super slow so cull first if hasattr(mesh, 'faces') and len(mesh.faces) > 0: referenced = np.zeros(len(mesh.vertices), dtype=np.bool) referenced[mesh.faces] = True else: # this is used for geometry without faces referenced = np.ones(len(mesh.vertices), dtype=np.bool) # collect vertex attributes into sequence we can stack stacked = [mesh.vertices * (10 ** digits_vertex)] # UV texture visuals require us to update the # vertices and normals differently if (not merge_tex and mesh.visual.defined and mesh.visual.kind == 'texture' and mesh.visual.uv is not None and len(mesh.visual.uv) == len(mesh.vertices)): # get an array with vertices and UV coordinates # converted to integers at requested precision stacked.append(mesh.visual.uv * (10 ** digits_uv)) # check to see if we have vertex normals normals = mesh._cache['vertex_normals'] if not merge_norm and np.shape(normals) == mesh.vertices.shape: stacked.append(normals * (10 ** digits_norm)) # stack collected vertex properties and round to integer stacked = np.column_stack(stacked).round().astype(np.int64) # check unique rows of referenced vertices u, i = unique_rows(stacked[referenced]) # construct an inverse using the subset inverse = np.zeros(len(mesh.vertices), dtype=np.int64) inverse[referenced] = i # get the vertex mask mask = np.nonzero(referenced)[0][u] # run the update including normals and UV coordinates mesh.update_vertices(mask=mask, inverse=inverse) def group(values, min_len=0, max_len=np.inf): """ Return the indices of values that are identical Parameters ---------- values : (n,) int Values to group min_len : int The shortest group allowed All groups will have len >= min_length max_len : int The longest group allowed All groups will have len <= max_length Returns ---------- groups : sequence Contains indices to form groups IE [0,1,0,1] returns [[0,2], [1,3]] """ original = np.asanyarray(values) # save the sorted order and then apply it order = original.argsort() values = original[order] # find the indexes which are duplicates if values.dtype.kind == 'f': # for floats in a sorted array, neighbors are not duplicates # if the difference between them is greater than approximate zero nondupe = np.greater(np.abs(np.diff(values)), tol.zero) else: # for ints and strings we can check exact non- equality # for all other types this will only work if they defined # an __eq__ nondupe = values[1:] != values[:-1] dupe_idx = np.append(0, np.nonzero(nondupe)[0] + 1) dupe_len = np.diff(np.concatenate((dupe_idx, [len(values)]))) dupe_ok = np.logical_and(np.greater_equal(dupe_len, min_len), np.less_equal(dupe_len, max_len)) groups = [order[i:(i + j)] for i, j in zip(dupe_idx[dupe_ok], dupe_len[dupe_ok])] groups = np.array(groups) return groups def hashable_rows(data, digits=None): """ We turn our array into integers based on the precision given by digits and then put them in a hashable format. Parameters --------- data : (n, m) array Input data digits : int or None How many digits to add to hash if data is floating point If None, tol.merge will be used Returns --------- hashable : (n,) array Custom data type which can be sorted or used as hash keys """ # if there is no data return immediately if len(data) == 0: return np.array([]) # get array as integer to precision we care about as_int = float_to_int(data, digits=digits) # if it is flat integers already, return if len(as_int.shape) == 1: return as_int # if array is 2D and smallish, we can try bitbanging # this is significantly faster than the custom dtype if len(as_int.shape) == 2 and as_int.shape[1] <= 4: # time for some righteous bitbanging # can we pack the whole row into a single 64 bit integer precision = int(np.floor(64 / as_int.shape[1])) # if the max value is less than precision we can do this if np.abs(as_int).max() < 2**(precision - 1): # the resulting package hashable = np.zeros(len(as_int), dtype=np.int64) # loop through each column and bitwise xor to combine # make sure as_int is int64 otherwise bit offset won't work for offset, column in enumerate(as_int.astype(np.int64).T): # will modify hashable in place np.bitwise_xor(hashable, column << (offset * precision), out=hashable) return hashable # reshape array into magical data type that is weird but hashable dtype = np.dtype((np.void, as_int.dtype.itemsize * as_int.shape[1])) # make sure result is contiguous and flat hashable = np.ascontiguousarray(as_int).view(dtype).reshape(-1) return hashable def float_to_int(data, digits=None, dtype=np.int32): """ Given a numpy array of float/bool/int, return as integers. Parameters ------------- data : (n, d) float, int, or bool Input data digits : float or int Precision for float conversion dtype : numpy.dtype What datatype should result be returned as Returns ------------- as_int : (n, d) int Data as integers """ # convert to any numpy array data = np.asanyarray(data) # if data is already an integer or boolean we're done # if the data is empty we are also done if data.dtype.kind in 'ib' or data.size == 0: return data.astype(dtype) # populate digits from kwargs if digits is None: digits = util.decimal_to_digits(tol.merge) elif isinstance(digits, float) or isinstance(digits, np.float): digits = util.decimal_to_digits(digits) elif not (isinstance(digits, int) or isinstance(digits, np.integer)): log.warning('Digits were passed as %s!', digits.__class__.__name__) raise ValueError('Digits must be None, int, or float!') # data is float so convert to large integers data_max = np.abs(data).max() * 10**digits # ignore passed dtype if we have something large dtype = [np.int32, np.int64][int(data_max > 2**31)] # multiply by requested power of ten # then subtract small epsilon to avoid "go either way" rounding # then do the rounding and convert to integer as_int = np.round((data * 10 ** digits) - 1e-6).astype(dtype) return as_int def unique_ordered(data): """ Returns the same as np.unique, but ordered as per the first occurrence of the unique value in data. Examples --------- In [1]: a = [0, 3, 3, 4, 1, 3, 0, 3, 2, 1] In [2]: np.unique(a) Out[2]: array([0, 1, 2, 3, 4]) In [3]: trimesh.grouping.unique_ordered(a) Out[3]: array([0, 3, 4, 1, 2]) """ data = np.asanyarray(data) order = np.sort(np.unique(data, return_index=True)[1]) result = data[order] return result def unique_bincount(values, minlength=0, return_inverse=False, return_counts=False): """ For arrays of integers find unique values using bin counting. Roughly 10x faster for correct input than np.unique Parameters -------------- values : (n,) int Values to find unique members of minlength : int Maximum value that will occur in values (values.max()) return_inverse : bool If True, return an inverse such that unique[inverse] == values return_counts : bool If True, also return the number of times each unique item appears in values Returns ------------ unique : (m,) int Unique values in original array inverse : (n,) int, optional An array such that unique[inverse] == values Only returned if return_inverse is True counts : (m,) int, optional An array holding the counts of each unique item in values Only returned if return_counts is True """ values = np.asanyarray(values) if len(values.shape) != 1 or values.dtype.kind != 'i': raise ValueError('input must be 1D integers!') try: # count the number of occurrences of each value counts = np.bincount(values, minlength=minlength) except TypeError: # casting failed on 32 bit windows log.warning('casting failed, falling back!') # fall back to numpy unique return np.unique(values, return_inverse=return_inverse, return_counts=return_counts) # which bins are occupied at all # counts are integers so this works unique_bin = counts.astype(np.bool) # which values are unique # indexes correspond to original values unique = np.where(unique_bin)[0] ret = (unique,) if return_inverse: # find the inverse to reconstruct original inverse = (np.cumsum(unique_bin) - 1)[values] ret += (inverse,) if return_counts: unique_counts = counts[unique] ret += (unique_counts,) if len(ret) == 1: return ret[0] return ret def merge_runs(data, digits=None): """ Merge duplicate sequential values. This differs from unique_ordered in that values can occur in multiple places in the sequence, but only consecutive repeats are removed Parameters ----------- data: (n,) float or int Returns -------- merged: (m,) float or int Examples --------- In [1]: a Out[1]: array([-1, -1, -1, 0, 0, 1, 1, 2, 0, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 9]) In [2]: trimesh.grouping.merge_runs(a) Out[2]: array([-1, 0, 1, 2, 0, 3, 4, 5, 6, 7, 8, 9]) """ data = np.asanyarray(data) mask = np.abs(np.diff(data)) > tol.merge mask = np.concatenate((np.array([True]), mask)) return data[mask] def unique_float(data, return_index=False, return_inverse=False, digits=None): """ Identical to the numpy.unique command, except evaluates floating point numbers, using a specified number of digits. If digits isn't specified, the library default TOL_MERGE will be used. """ data = np.asanyarray(data) as_int = float_to_int(data, digits) _junk, unique, inverse = np.unique(as_int, return_index=True, return_inverse=True) if (not return_index) and (not return_inverse): return data[unique] result = [data[unique]] if return_index: result.append(unique) if return_inverse: result.append(inverse) return tuple(result) def unique_rows(data, digits=None): """ Returns indices of unique rows. It will return the first occurrence of a row that is duplicated: [[1,2], [3,4], [1,2]] will return [0,1] Parameters --------- data : (n, m) array Floating point data digits : int or None How many digits to consider Returns -------- unique : (j,) int Index in data which is a unique row inverse : (n,) int Array to reconstruct original Example: unique[inverse] == data """ hashes = hashable_rows(data, digits=digits) garbage, unique, inverse = np.unique( hashes, return_index=True, return_inverse=True) return unique, inverse def unique_value_in_row(data, unique=None): """ For a 2D array of integers find the position of a value in each row which only occurs once. If there are more than one value per row which occur once, the last one is returned. Parameters ---------- data : (n, d) int Data to check values unique : (m,) int List of unique values contained in data. Generated from np.unique if not passed Returns --------- result : (n, d) bool With one or zero True values per row. Examples ------------------------------------- In [0]: r = np.array([[-1, 1, 1], [-1, 1, -1], [-1, 1, 1], [-1, 1, -1], [-1, 1, -1]], dtype=np.int8) In [1]: unique_value_in_row(r) Out[1]: array([[ True, False, False], [False, True, False], [ True, False, False], [False, True, False], [False, True, False]], dtype=bool) In [2]: unique_value_in_row(r).sum(axis=1) Out[2]: array([1, 1, 1, 1, 1]) In [3]: r[unique_value_in_row(r)] Out[3]: array([-1, 1, -1, 1, 1], dtype=int8) """ if unique is None: unique = np.unique(data) data = np.asanyarray(data) result = np.zeros_like(data, dtype=np.bool, subok=False) for value in unique: test = np.equal(data, value) test_ok = test.sum(axis=1) == 1 result[test_ok] = test[test_ok] return result def group_rows(data, require_count=None, digits=None): """ Returns index groups of duplicate rows, for example: [[1,2], [3,4], [1,2]] will return [[0,2], [1]] Note that using require_count allows numpy advanced indexing to be used in place of looping and checking hashes and is ~10x faster. Parameters ---------- data : (n, m) array Data to group require_count : None or int Only return groups of a specified length, eg: require_count = 2 [[1,2], [3,4], [1,2]] will return [[0,2]] digits : None or int If data is floating point how many decimals to consider, or calculated from tol.merge Returns ---------- groups : sequence (*,) int Indices from in indicating identical rows. """ def group_dict(): """ Simple hash table based grouping. The loop and appends make this rather slow on large arrays but it works on irregular groups. """ observed = dict() hashable = hashable_rows(data, digits=digits) for index, key in enumerate(hashable): key_string = key.tostring() if key_string in observed: observed[key_string].append(index) else: observed[key_string] = [index] return np.array(list(observed.values())) def group_slice(): # create a representation of the rows that can be sorted hashable = hashable_rows(data, digits=digits) # record the order of the rows so we can get the original indices back # later order = np.argsort(hashable) # but for now, we want our hashes sorted hashable = hashable[order] # this is checking each neighbour for equality, example: # example: hashable = [1, 1, 1]; dupe = [0, 0] dupe = hashable[1:] != hashable[:-1] # we want the first index of a group, so we can slice from that location # example: hashable = [0 1 1]; dupe = [1,0]; dupe_idx = [0,1] dupe_idx = np.append(0, np.nonzero(dupe)[0] + 1) # if you wanted to use this one function to deal with non- regular groups # you could use: np.array_split(dupe_idx) # this is roughly 3x slower than using the group_dict method above. start_ok = np.diff( np.concatenate((dupe_idx, [len(hashable)]))) == require_count groups = np.tile(dupe_idx[start_ok].reshape((-1, 1)), require_count) + np.arange(require_count) groups_idx = order[groups] if require_count == 1: return groups_idx.reshape(-1) return groups_idx if require_count is None: return group_dict() else: return group_slice() def boolean_rows(a, b, operation=np.intersect1d): """ Find the rows in two arrays which occur in both rows. Parameters --------- a: (n, d) int Array with row vectors b: (m, d) int Array with row vectors operation : function Numpy boolean set operation function: -np.intersect1d -np.setdiff1d Returns -------- shared: (p, d) array containing rows in both a and b """ a = np.asanyarray(a, dtype=np.int64) b = np.asanyarray(b, dtype=np.int64) av = a.view([('', a.dtype)] * a.shape[1]).ravel() bv = b.view([('', b.dtype)] * b.shape[1]).ravel() shared = operation(av, bv).view(a.dtype).reshape(-1, a.shape[1]) return shared def group_vectors(vectors, angle=1e-4, include_negative=False): """ Group vectors based on an angle tolerance, with the option to include negative vectors. Parameters ----------- vectors : (n,3) float Direction vector angle : float Group vectors closer than this angle in radians include_negative : bool If True consider the same: [0,0,1] and [0,0,-1] Returns ------------ new_vectors : (m,3) float Direction vector groups : (m,) sequence of int Indices of source vectors """ vectors = np.asanyarray(vectors, dtype=np.float64) angle = float(angle) if include_negative: vectors = util.vector_hemisphere(vectors) spherical = util.vector_to_spherical(vectors) angles, groups = group_distance(spherical, angle) new_vectors = util.spherical_to_vector(angles) return new_vectors, groups def group_distance(values, distance): """ Find groups of points which have neighbours closer than radius, where no two points in a group are farther than distance apart. Parameters --------- points : (n, d) float Points of dimension d distance : float Max distance between points in a cluster Returns ---------- unique : (m, d) float Median value of each group groups : (m) sequence of int Indexes of points that make up a group """ values = np.asanyarray(values, dtype=np.float64) consumed = np.zeros(len(values), dtype=np.bool) tree = cKDTree(values) # (n, d) set of values that are unique unique = [] # (n) sequence of indices in values groups = [] for index, value in enumerate(values): if consumed[index]: continue group = np.array(tree.query_ball_point(value, distance), dtype=np.int) consumed[group] = True unique.append(np.median(values[group], axis=0)) groups.append(group) return np.array(unique), np.array(groups) def clusters(points, radius): """ Find clusters of points which have neighbours closer than radius Parameters --------- points : (n, d) float Points of dimension d radius : float Max distance between points in a cluster Returns ---------- groups : (m,) sequence of int Indices of points in a cluster """ from . import graph tree = cKDTree(points) # some versions return pairs as a set of tuples pairs = tree.query_pairs(r=radius, output_type='ndarray') # group connected components groups = graph.connected_components(pairs) return groups def blocks(data, min_len=2, max_len=np.inf, wrap=False, digits=None, only_nonzero=False): """ Find the indices in an array of contiguous blocks of equal values. Parameters ------------ data : (n,) array Data to find blocks on min_len : int The minimum length group to be returned max_len : int The maximum length group to be retuurned wrap : bool Combine blocks on both ends of 1D array digits : None or int If dealing with floats how many digits to consider only_nonzero : bool Only return blocks of non- zero values Returns --------- blocks : (m) sequence of (*,) int Indices referencing data """ data = float_to_int(data, digits=digits) # find the inflection points # AKA locations where the array goes from True to False. infl = np.concatenate(([0], np.nonzero(np.diff(data))[0] + 1, [len(data)])) infl_len = np.diff(infl) # check the length of each group infl_ok = np.logical_and(infl_len >= min_len, infl_len <= max_len) if only_nonzero: # check to make sure the values of each contiguous block # are True by checking the first value of each block infl_ok = np.logical_and( infl_ok, data[infl[:-1]]) # inflate start/end indexes into full ranges of values blocks = [np.arange(infl[i], infl[i + 1]) for i, ok in enumerate(infl_ok) if ok] if wrap: # wrap only matters if first and last points are the same if data[0] != data[-1]: return blocks # if we are only grouping nonzero things and # the first and last point are zero we can exit if only_nonzero and not bool(data[0]): return blocks # so now first point equals last point, so the cases are: # - first and last point are in a block: combine two blocks # - first OR last point are in block: add other point to block # - neither are in a block: check if combined is eligible block # first point is in a block first = len(blocks) > 0 and blocks[0][0] == 0 # last point is in a block last = len(blocks) > 0 and blocks[-1][-1] == (len(data) - 1) # CASE: first and last point are BOTH in block: combine blocks if first and last: blocks[0] = np.append(blocks[-1], blocks[0]) blocks.pop() else: # combined length combined = infl_len[0] + infl_len[-1] # exit if lengths aren't OK if combined < min_len or combined > max_len: return blocks # new block combines both ends new_block = np.append(np.arange(infl[-2], infl[-1]), np.arange(infl[0], infl[1])) # we are in a first OR last situation now if first: # first was already in a block so replace it with combined blocks[0] = new_block elif last: # last was already in a block so replace with superset blocks[-1] = new_block else: # both are false # combined length generated new block blocks.append(new_block) return blocks def group_min(groups, data): """ Given a list of groups find the minimum element of data within each group Parameters ----------- groups : (n,) sequence of (q,) int Indexes of each group corresponding to each element in data data : (m,) The data that groups indexes reference Returns ----------- minimums : (n,) Minimum value of data per group """ # sort with major key groups, minor key data order = np.lexsort((data, groups)) groups = groups[order] # this is only needed if groups is unsorted data = data[order] # construct an index which marks borders between groups index = np.empty(len(groups), 'bool') index[0] = True index[1:] = groups[1:] != groups[:-1] return data[index]