hub/venv/lib/python3.7/site-packages/trimesh/grouping.py

"""
grouping.py
-------------

Functions for grouping values and rows.
"""

import numpy as np

from . import util

from .constants import log, tol

try:
    from scipy.spatial import cKDTree
except BaseException as E:
    # wrapping just ImportError fails in some cases
    # will raise the error when someone tries to use KDtree
    from . import exceptions
    cKDTree = exceptions.closure(E)


def merge_vertices(mesh,
                   merge_tex=False,
                   merge_norm=False,
                   digits_vertex=None,
                   digits_norm=2,
                   digits_uv=4,
                   **kwargs):
    """
    Removes duplicate vertices. By default,  based on integer hashes of
    each row.

    Parameters
    -------------
    mesh : Trimesh object
      Mesh to merge vertices on
    merge_tex : bool
      If True textured meshes with UV coordinates will
      have vertices merged regardless of UV coordinates
    merge_norm : bool
      If True, meshes with vertex normals will have
      vertices merged ignoring different normals
    digits_vertex : None or int
      Number of digits to consider for vertex position
    digits_norm : int
      Number of digits to consider for unit normals
    digits_uv : int
      Number of digits to consider for UV coordinates
    """
    # use tol.merge if digit precision not passed
    if not isinstance(digits_vertex, int):
        digits_vertex = util.decimal_to_digits(tol.merge)

    # if we have a ton of unreferenced vertices it will
    # make the unique_rows call super slow so cull first
    if hasattr(mesh, 'faces') and len(mesh.faces) > 0:
        referenced = np.zeros(len(mesh.vertices), dtype=np.bool)
        referenced[mesh.faces] = True
    else:
        # this is used for geometry without faces
        referenced = np.ones(len(mesh.vertices), dtype=np.bool)

    # collect vertex attributes into sequence we can stack
    stacked = [mesh.vertices * (10 ** digits_vertex)]

    # UV texture visuals require us to update the
    # vertices and normals differently
    if (not merge_tex and
        mesh.visual.defined and
        mesh.visual.kind == 'texture' and
        mesh.visual.uv is not None and
            len(mesh.visual.uv) == len(mesh.vertices)):
        # get an array with vertices and UV coordinates
        # converted to integers at requested precision
        stacked.append(mesh.visual.uv * (10 ** digits_uv))

    # check to see if we have vertex normals
    normals = mesh._cache['vertex_normals']
    if not merge_norm and np.shape(normals) == mesh.vertices.shape:
        stacked.append(normals * (10 ** digits_norm))

    # stack collected vertex properties and round to integer
    stacked = np.column_stack(stacked).round().astype(np.int64)

    # check unique rows of referenced vertices
    u, i = unique_rows(stacked[referenced])

    # construct an inverse using the subset
    inverse = np.zeros(len(mesh.vertices), dtype=np.int64)
    inverse[referenced] = i
    # get the vertex mask
    mask = np.nonzero(referenced)[0][u]
    # run the update including normals and UV coordinates
    mesh.update_vertices(mask=mask, inverse=inverse)


def group(values, min_len=0, max_len=np.inf):
    """
    Return the indices of values that are identical

    Parameters
    ----------
    values : (n,) int
      Values to group
    min_len : int
      The shortest group allowed
      All groups will have len >= min_length
    max_len : int
      The longest group allowed
      All groups will have len <= max_length

    Returns
    ----------
    groups : sequence
      Contains indices to form groups
      IE [0,1,0,1] returns [[0,2], [1,3]]
    """
    original = np.asanyarray(values)

    # save the sorted order and then apply it
    order = original.argsort()
    values = original[order]

    # find the indexes which are duplicates
    if values.dtype.kind == 'f':
        # for floats in a sorted array, neighbors are not duplicates
        # if the difference between them is greater than approximate zero
        nondupe = np.greater(np.abs(np.diff(values)), tol.zero)
    else:
        # for ints and strings we can check exact non- equality
        # for all other types this will only work if they defined
        # an __eq__
        nondupe = values[1:] != values[:-1]

    dupe_idx = np.append(0, np.nonzero(nondupe)[0] + 1)
    dupe_len = np.diff(np.concatenate((dupe_idx, [len(values)])))
    dupe_ok = np.logical_and(np.greater_equal(dupe_len, min_len),
                             np.less_equal(dupe_len, max_len))
    groups = [order[i:(i + j)]
              for i, j in zip(dupe_idx[dupe_ok],
                              dupe_len[dupe_ok])]
    groups = np.array(groups)

    return groups


def hashable_rows(data, digits=None):
    """
    We turn our array into integers based on the precision
    given by digits and then put them in a hashable format.

    Parameters
    ---------
    data : (n, m) array
      Input data
    digits : int or None
      How many digits to add to hash if data is floating point
      If None, tol.merge will be used

    Returns
    ---------
    hashable : (n,) array
      Custom data type which can be sorted
      or used as hash keys
    """
    # if there is no data return immediately
    if len(data) == 0:
        return np.array([])

    # get array as integer to precision we care about
    as_int = float_to_int(data, digits=digits)

    # if it is flat integers already, return
    if len(as_int.shape) == 1:
        return as_int

    # if array is 2D and smallish, we can try bitbanging
    # this is significantly faster than the custom dtype
    if len(as_int.shape) == 2 and as_int.shape[1] <= 4:
        # time for some righteous bitbanging
        # can we pack the whole row into a single 64 bit integer
        precision = int(np.floor(64 / as_int.shape[1]))
        # if the max value is less than precision we can do this
        if np.abs(as_int).max() < 2**(precision - 1):
            # the resulting package
            hashable = np.zeros(len(as_int), dtype=np.int64)
            # loop through each column and bitwise xor to combine
            # make sure as_int is int64 otherwise bit offset won't work
            for offset, column in enumerate(as_int.astype(np.int64).T):
                # will modify hashable in place
                np.bitwise_xor(hashable,
                               column << (offset * precision),
                               out=hashable)
            return hashable

    # reshape array into magical data type that is weird but hashable
    dtype = np.dtype((np.void, as_int.dtype.itemsize * as_int.shape[1]))
    # make sure result is contiguous and flat
    hashable = np.ascontiguousarray(as_int).view(dtype).reshape(-1)
    return hashable


def float_to_int(data, digits=None, dtype=np.int32):
    """
    Given a numpy array of float/bool/int, return as integers.

    Parameters
    -------------
    data :  (n, d) float, int, or bool
      Input data
    digits : float or int
      Precision for float conversion
    dtype : numpy.dtype
      What datatype should result be returned as

    Returns
    -------------
    as_int : (n, d) int
      Data as integers
    """
    # convert to any numpy array
    data = np.asanyarray(data)

    # if data is already an integer or boolean we're done
    # if the data is empty we are also done
    if data.dtype.kind in 'ib' or data.size == 0:
        return data.astype(dtype)

    # populate digits from kwargs
    if digits is None:
        digits = util.decimal_to_digits(tol.merge)
    elif isinstance(digits, float) or isinstance(digits, np.float):
        digits = util.decimal_to_digits(digits)
    elif not (isinstance(digits, int) or isinstance(digits, np.integer)):
        log.warning('Digits were passed as %s!', digits.__class__.__name__)
        raise ValueError('Digits must be None, int, or float!')

    # data is float so convert to large integers
    data_max = np.abs(data).max() * 10**digits
    # ignore passed dtype if we have something large
    dtype = [np.int32, np.int64][int(data_max > 2**31)]
    # multiply by requested power of ten
    # then subtract small epsilon to avoid "go either way" rounding
    # then do the rounding and convert to integer
    as_int = np.round((data * 10 ** digits) - 1e-6).astype(dtype)

    return as_int


def unique_ordered(data):
    """
    Returns the same as np.unique, but ordered as per the
    first occurrence of the unique value in data.

    Examples
    ---------
    In [1]: a = [0, 3, 3, 4, 1, 3, 0, 3, 2, 1]

    In [2]: np.unique(a)
    Out[2]: array([0, 1, 2, 3, 4])

    In [3]: trimesh.grouping.unique_ordered(a)
    Out[3]: array([0, 3, 4, 1, 2])
    """
    data = np.asanyarray(data)
    order = np.sort(np.unique(data, return_index=True)[1])
    result = data[order]
    return result


def unique_bincount(values,
                    minlength=0,
                    return_inverse=False,
                    return_counts=False):
    """
    For arrays of integers find unique values using bin counting.
    Roughly 10x faster for correct input than np.unique

    Parameters
    --------------
    values : (n,) int
      Values to find unique members of
    minlength : int
      Maximum value that will occur in values (values.max())
    return_inverse : bool
      If True, return an inverse such that unique[inverse] == values
    return_counts : bool
      If True, also return the number of times each
      unique item appears in values

    Returns
    ------------
    unique : (m,) int
      Unique values in original array
    inverse : (n,) int, optional
      An array such that unique[inverse] == values
      Only returned if return_inverse is True
    counts : (m,) int, optional
      An array holding the counts of each unique item in values
      Only returned if return_counts is True
    """
    values = np.asanyarray(values)
    if len(values.shape) != 1 or values.dtype.kind != 'i':
        raise ValueError('input must be 1D integers!')

    try:
        # count the number of occurrences of each value
        counts = np.bincount(values, minlength=minlength)
    except TypeError:
        # casting failed on 32 bit windows
        log.warning('casting failed, falling back!')
        # fall back to numpy unique
        return np.unique(values,
                         return_inverse=return_inverse,
                         return_counts=return_counts)

    # which bins are occupied at all
    # counts are integers so this works
    unique_bin = counts.astype(np.bool)

    # which values are unique
    # indexes correspond to original values
    unique = np.where(unique_bin)[0]
    ret = (unique,)

    if return_inverse:
        # find the inverse to reconstruct original
        inverse = (np.cumsum(unique_bin) - 1)[values]
        ret += (inverse,)

    if return_counts:
        unique_counts = counts[unique]
        ret += (unique_counts,)

    if len(ret) == 1:
        return ret[0]
    return ret


def merge_runs(data, digits=None):
    """
    Merge duplicate sequential values. This differs from unique_ordered
    in that values can occur in multiple places in the sequence, but
    only consecutive repeats are removed

    Parameters
    -----------
    data: (n,) float or int

    Returns
    --------
    merged: (m,) float or int

    Examples
    ---------
    In [1]: a
    Out[1]:
    array([-1, -1, -1,  0,  0,  1,  1,  2,  0,
            3,  3,  4,  4,  5,  5,  6,  6,  7,
            7,  8,  8,  9,  9,  9])

    In [2]: trimesh.grouping.merge_runs(a)
    Out[2]: array([-1,  0,  1,  2,  0,  3,  4,  5,  6,  7,  8,  9])
    """
    data = np.asanyarray(data)
    mask = np.abs(np.diff(data)) > tol.merge
    mask = np.concatenate((np.array([True]), mask))

    return data[mask]


def unique_float(data,
                 return_index=False,
                 return_inverse=False,
                 digits=None):
    """
    Identical to the numpy.unique command, except evaluates floating point
    numbers, using a specified number of digits.

    If digits isn't specified, the library default TOL_MERGE will be used.
    """
    data = np.asanyarray(data)
    as_int = float_to_int(data, digits)
    _junk, unique, inverse = np.unique(as_int,
                                       return_index=True,
                                       return_inverse=True)

    if (not return_index) and (not return_inverse):
        return data[unique]

    result = [data[unique]]

    if return_index:
        result.append(unique)
    if return_inverse:
        result.append(inverse)
    return tuple(result)


def unique_rows(data, digits=None):
    """
    Returns indices of unique rows. It will return the
    first occurrence of a row that is duplicated:
    [[1,2], [3,4], [1,2]] will return [0,1]

    Parameters
    ---------
    data : (n, m) array
      Floating point data
    digits : int or None
      How many digits to consider

    Returns
    --------
    unique :  (j,) int
      Index in data which is a unique row
    inverse : (n,) int
      Array to reconstruct original
      Example: unique[inverse] == data
    """
    hashes = hashable_rows(data, digits=digits)
    garbage, unique, inverse = np.unique(
        hashes,
        return_index=True,
        return_inverse=True)

    return unique, inverse


def unique_value_in_row(data, unique=None):
    """
    For a 2D array of integers find the position of a
    value in each row which only occurs once.

    If there are more than one value per row which
    occur once, the last one is returned.

    Parameters
    ----------
    data :   (n, d) int
      Data to check values
    unique : (m,) int
      List of unique values contained in data.
      Generated from np.unique if not passed

    Returns
    ---------
    result : (n, d) bool
      With one or zero True values per row.


    Examples
    -------------------------------------
    In [0]: r = np.array([[-1,  1,  1],
                          [-1,  1, -1],
                          [-1,  1,  1],
                          [-1,  1, -1],
                          [-1,  1, -1]], dtype=np.int8)

    In [1]: unique_value_in_row(r)
    Out[1]:
           array([[ True, False, False],
                  [False,  True, False],
                  [ True, False, False],
                  [False,  True, False],
                  [False,  True, False]], dtype=bool)

    In [2]: unique_value_in_row(r).sum(axis=1)
    Out[2]: array([1, 1, 1, 1, 1])

    In [3]: r[unique_value_in_row(r)]
    Out[3]: array([-1,  1, -1,  1,  1], dtype=int8)
    """
    if unique is None:
        unique = np.unique(data)
    data = np.asanyarray(data)
    result = np.zeros_like(data, dtype=np.bool, subok=False)
    for value in unique:
        test = np.equal(data, value)
        test_ok = test.sum(axis=1) == 1
        result[test_ok] = test[test_ok]
    return result


def group_rows(data, require_count=None, digits=None):
    """
    Returns index groups of duplicate rows, for example:
    [[1,2], [3,4], [1,2]] will return [[0,2], [1]]


    Note that using require_count allows numpy advanced
    indexing to be used in place of looping and
    checking hashes and is ~10x faster.


    Parameters
    ----------
    data : (n, m) array
      Data to group
    require_count : None or int
      Only return groups of a specified length, eg:
      require_count =  2
      [[1,2], [3,4], [1,2]] will return [[0,2]]
    digits : None or int
    If data is floating point how many decimals
    to consider, or calculated from tol.merge

    Returns
    ----------
    groups : sequence (*,) int
      Indices from in indicating identical rows.
    """

    def group_dict():
        """
        Simple hash table based grouping.
        The loop and appends make this rather slow on
        large arrays but it works on irregular groups.
        """
        observed = dict()
        hashable = hashable_rows(data, digits=digits)
        for index, key in enumerate(hashable):
            key_string = key.tostring()
            if key_string in observed:
                observed[key_string].append(index)
            else:
                observed[key_string] = [index]
        return np.array(list(observed.values()))

    def group_slice():
        # create a representation of the rows that can be sorted
        hashable = hashable_rows(data, digits=digits)
        # record the order of the rows so we can get the original indices back
        # later
        order = np.argsort(hashable)
        # but for now, we want our hashes sorted
        hashable = hashable[order]
        # this is checking each neighbour for equality, example:
        # example: hashable = [1, 1, 1]; dupe = [0, 0]
        dupe = hashable[1:] != hashable[:-1]
        # we want the first index of a group, so we can slice from that location
        # example: hashable = [0 1 1]; dupe = [1,0]; dupe_idx = [0,1]
        dupe_idx = np.append(0, np.nonzero(dupe)[0] + 1)
        # if you wanted to use this one function to deal with non- regular groups
        # you could use: np.array_split(dupe_idx)
        # this is roughly 3x slower than using the group_dict method above.
        start_ok = np.diff(
            np.concatenate((dupe_idx, [len(hashable)]))) == require_count
        groups = np.tile(dupe_idx[start_ok].reshape((-1, 1)),
                         require_count) + np.arange(require_count)
        groups_idx = order[groups]
        if require_count == 1:
            return groups_idx.reshape(-1)
        return groups_idx

    if require_count is None:
        return group_dict()
    else:
        return group_slice()


def boolean_rows(a, b, operation=np.intersect1d):
    """
    Find the rows in two arrays which occur in both rows.

    Parameters
    ---------
    a: (n, d) int
        Array with row vectors
    b: (m, d) int
        Array with row vectors
    operation : function
        Numpy boolean set operation function:
          -np.intersect1d
          -np.setdiff1d

    Returns
    --------
    shared: (p, d) array containing rows in both a and b
    """
    a = np.asanyarray(a, dtype=np.int64)
    b = np.asanyarray(b, dtype=np.int64)

    av = a.view([('', a.dtype)] * a.shape[1]).ravel()
    bv = b.view([('', b.dtype)] * b.shape[1]).ravel()
    shared = operation(av, bv).view(a.dtype).reshape(-1, a.shape[1])

    return shared


def group_vectors(vectors,
                  angle=1e-4,
                  include_negative=False):
    """
    Group vectors based on an angle tolerance, with the option to
    include negative vectors.

    Parameters
    -----------
    vectors : (n,3) float
        Direction vector
    angle : float
        Group vectors closer than this angle in radians
    include_negative : bool
        If True consider the same:
        [0,0,1] and [0,0,-1]

    Returns
    ------------
    new_vectors : (m,3) float
        Direction vector
    groups : (m,) sequence of int
        Indices of source vectors
    """

    vectors = np.asanyarray(vectors, dtype=np.float64)
    angle = float(angle)

    if include_negative:
        vectors = util.vector_hemisphere(vectors)

    spherical = util.vector_to_spherical(vectors)
    angles, groups = group_distance(spherical, angle)
    new_vectors = util.spherical_to_vector(angles)
    return new_vectors, groups


def group_distance(values, distance):
    """
    Find groups of points which have neighbours closer than radius,
    where no two points in a group are farther than distance apart.

    Parameters
    ---------
    points :   (n, d) float
        Points of dimension d
    distance : float
        Max distance between points in a cluster

    Returns
    ----------
    unique : (m, d) float
        Median value of each group
    groups : (m) sequence of int
        Indexes of points that make up a group

    """
    values = np.asanyarray(values,
                           dtype=np.float64)

    consumed = np.zeros(len(values),
                        dtype=np.bool)
    tree = cKDTree(values)

    # (n, d) set of values that are unique
    unique = []
    # (n) sequence of indices in values
    groups = []

    for index, value in enumerate(values):
        if consumed[index]:
            continue
        group = np.array(tree.query_ball_point(value, distance),
                         dtype=np.int)
        consumed[group] = True
        unique.append(np.median(values[group], axis=0))
        groups.append(group)
    return np.array(unique), np.array(groups)


def clusters(points, radius):
    """
    Find clusters of points which have neighbours closer than radius

    Parameters
    ---------
    points : (n, d) float
        Points of dimension d
    radius : float
        Max distance between points in a cluster

    Returns
    ----------
    groups : (m,) sequence of int
        Indices of points in a cluster

    """
    from . import graph
    tree = cKDTree(points)

    # some versions return pairs as a set of tuples
    pairs = tree.query_pairs(r=radius, output_type='ndarray')
    # group connected components
    groups = graph.connected_components(pairs)

    return groups


def blocks(data,
           min_len=2,
           max_len=np.inf,
           wrap=False,
           digits=None,
           only_nonzero=False):
    """
    Find the indices in an array of contiguous blocks
    of equal values.

    Parameters
    ------------
    data : (n,) array
      Data to find blocks on
    min_len : int
      The minimum length group to be returned
    max_len : int
      The maximum length group to be retuurned
    wrap : bool
      Combine blocks on both ends of 1D array
    digits : None or int
      If dealing with floats how many digits to consider
    only_nonzero : bool
      Only return blocks of non- zero values

    Returns
    ---------
    blocks : (m) sequence of (*,) int
      Indices referencing data
    """
    data = float_to_int(data, digits=digits)

    # find the inflection points
    # AKA locations where the array goes from True to False.
    infl = np.concatenate(([0],
                           np.nonzero(np.diff(data))[0] + 1,
                           [len(data)]))
    infl_len = np.diff(infl)
    # check the length of each group
    infl_ok = np.logical_and(infl_len >= min_len,
                             infl_len <= max_len)

    if only_nonzero:
        # check to make sure the values of each contiguous block
        # are True by checking the first value of each block
        infl_ok = np.logical_and(
            infl_ok, data[infl[:-1]])

    # inflate start/end indexes into full ranges of values
    blocks = [np.arange(infl[i], infl[i + 1])
              for i, ok in enumerate(infl_ok) if ok]

    if wrap:
        # wrap only matters if first and last points are the same
        if data[0] != data[-1]:
            return blocks
        # if we are only grouping nonzero things and
        # the first and last point are zero we can exit
        if only_nonzero and not bool(data[0]):
            return blocks

        # so now first point equals last point, so the cases are:
        # - first and last point are in a block: combine two blocks
        # - first OR last point are in block: add other point to block
        # - neither are in a block: check if combined is eligible block

        # first point is in a block
        first = len(blocks) > 0 and blocks[0][0] == 0
        # last point is in a block
        last = len(blocks) > 0 and blocks[-1][-1] == (len(data) - 1)

        # CASE: first and last point are BOTH in block: combine blocks
        if first and last:
            blocks[0] = np.append(blocks[-1], blocks[0])
            blocks.pop()
        else:
            # combined length
            combined = infl_len[0] + infl_len[-1]
            # exit if lengths aren't OK
            if combined < min_len or combined > max_len:
                return blocks
            # new block combines both ends
            new_block = np.append(np.arange(infl[-2], infl[-1]),
                                  np.arange(infl[0], infl[1]))
            # we are in a first OR last situation now
            if first:
                # first was already in a block so replace it with combined
                blocks[0] = new_block
            elif last:
                # last was already in a block so replace with superset
                blocks[-1] = new_block
            else:
                # both are false
                # combined length generated new block
                blocks.append(new_block)

    return blocks


def group_min(groups, data):
    """
    Given a list of groups find the minimum element of data
    within each group

    Parameters
    -----------
    groups : (n,) sequence of (q,) int
        Indexes of each group corresponding to each element in data
    data : (m,)
        The data that groups indexes reference

    Returns
    -----------
    minimums : (n,)
        Minimum value of data per group

    """
    # sort with major key groups, minor key data
    order = np.lexsort((data, groups))
    groups = groups[order]  # this is only needed if groups is unsorted
    data = data[order]
    # construct an index which marks borders between groups
    index = np.empty(len(groups), 'bool')
    index[0] = True
    index[1:] = groups[1:] != groups[:-1]
    return data[index]