Source code for environmentaltools.spatial.analysis

"""
Spatial Analysis Utilities
===========================

Utilities for spatial data analysis including coordinate transformations, 
interpolation, profile extraction, Voronoi diagrams, and geometric operations.

"""

import copy
import math
import sys

import geopandas as gpd
import matplotlib.tri as tri
import numpy as np
import pandas as pd
import scipy.spatial as sp
import shapely.speedups
from scipy.interpolate import griddata
from shapely.geometry import LineString, Polygon



[docs]
def select_data(data, corners):
    """Select spatial data within specified rectangular boundaries.
    
    Filters a DataFrame to include only points within the rectangular region
    defined by corner coordinates. Supports both dictionary and DataFrame
    corner specifications.

    Parameters
    ----------
    data : pd.DataFrame
        Spatial data containing at least 'x' and 'y' coordinate columns
    corners : dict or pd.DataFrame
        Rectangular boundary definition. If dict, format should be:
        {'x': [xmin, xmax], 'y': [ymin, ymax]}.
        If DataFrame, should have 'x' and 'y' columns with [min, max] values.

    Returns
    -------
    pd.DataFrame
        Filtered data containing only points within the specified boundaries.
        Returns a copy of the input data with rows outside the boundary removed.

    Raises
    ------
    ValueError
        If corners is not a dictionary or DataFrame

    Examples
    --------
    >>> import pandas as pd
    >>> data = pd.DataFrame({'x': [0, 5, 10], 'y': [0, 5, 10], 'z': [1, 2, 3]})
    >>> corners = {'x': [2, 8], 'y': [2, 8]}
    >>> filtered = select_data(data, corners)
    >>> print(filtered)
       x  y  z
    1  5  5  2
    """
    if isinstance(corners, dict):
        # Filter data using dictionary boundaries
        data = data.loc[(data["x"] > corners["x"][0]) & (data["x"] < corners["x"][1])]
        data = data.loc[(data["y"] > corners["y"][0]) & (data["y"] < corners["y"][1])]
    elif isinstance(corners, pd.DataFrame):
        # Filter data using DataFrame boundaries
        data = data.loc[(data["x"] > corners.x[0]) & (data["x"] < corners.x[1])]
        data = data.loc[(data["y"] > corners.y[0]) & (data["y"] < corners.y[1])]
    else:
        raise ValueError("Corners should be given in dictionary or DataFrame.")

    return data




[docs]
def interp(base, data, dist=100, method="linear", fill_values=np.nan):
    """Interpolate spatial data onto a regular grid or specified points.
    
    Performs 2D spatial interpolation from scattered base data points to either
    a regular grid or specified target points. Uses scipy.interpolate.griddata
    for the interpolation.

    Parameters
    ----------
    base : pd.DataFrame
        Base spatial data for interpolation. Must contain columns 'x', 'y', and 'z'
        where z is the variable to interpolate.
    data : pd.DataFrame or dict
        Target interpolation domain. If DataFrame with two rows, defines rectangular
        bounds [xmin, xmax], [ymin, ymax] for regular grid creation. If dict with
        'x' and 'y' keys, contains target point coordinates for interpolation.
    dist : float, optional
        Grid spacing in same units as coordinates when creating regular grid.
        Only used if data is DataFrame with bounds. Default is 100.
    method : str, optional
        Interpolation method: 'linear', 'nearest', or 'cubic'. Default is 'linear'.
        See scipy.interpolate.griddata for details.
    fill_values : float, optional
        Value used to fill points outside the convex hull of base data.
        Default is np.nan.

    Returns
    -------
    x : np.ndarray
        X coordinates of interpolation points (2D meshgrid or flattened array)
    y : np.ndarray
        Y coordinates of interpolation points (2D meshgrid or flattened array)
    z : np.ndarray
        Interpolated values at (x, y) locations
    df : pd.DataFrame
        Interpolation results as DataFrame with columns ['x', 'y', 'z']

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> # Create base data
    >>> base = pd.DataFrame({'x': [0, 1, 2], 'y': [0, 1, 2], 'z': [10, 20, 30]})
    >>> # Define interpolation bounds
    >>> bounds = pd.DataFrame({'x': [0, 2], 'y': [0, 2]})
    >>> x, y, z, df = interp(base, bounds, dist=0.5)
    """
    if isinstance(data, pd.DataFrame):
        # Create regular grid from bounds
        nx, ny = (
            int((data.x[1] - data.x[0]) / dist),
            int((data.y[1] - data.y[0]) / dist),
        )
        x, y = np.meshgrid(
            np.linspace(data.x[0] + dist / 2, data.x[1] - dist / 2, nx),
            np.linspace(data.y[0] + dist / 2, data.y[1] - dist / 2, ny),
        )
    else:
        # Use provided coordinates
        x, y = data["x"], data["y"]
        if len(np.shape(x)) == 1:
            nx, ny = len(x), 1
        else:
            nx, ny = np.shape(x)
    
    # Perform spatial interpolation
    z = griddata(
        base.loc[:, ["x", "y"]].values,
        base.loc[:, "z"].values,
        (x, y),
        method=method,
        fill_value=fill_values,
    )
    
    # Format results as DataFrame
    df = pd.DataFrame(
        np.asarray([np.ravel(x), np.ravel(y), np.ravel(z)]).T,
        index=np.arange(int(nx * ny)),
        columns=["x", "y", "z"],
    )

    return x, y, z, df




[docs]
def fillna(data, var_="z", method="nearest"):
    """Fill missing values in spatial data using interpolation from valid data.
    
    Replaces NaN values in specified variable using spatial interpolation based
    on non-missing neighboring values. Useful for gap-filling in spatial datasets.

    Parameters
    ----------
    data : pd.DataFrame
        Spatial data containing columns 'x', 'y', and the variable to fill.
        Missing values should be NaN.
    var_ : str, optional
        Name of the variable column to fill. Default is 'z'.
    method : str, optional
        Interpolation method: 'nearest', 'linear', or 'cubic'. Default is 'nearest'.
        'nearest' is robust and works well for filling gaps.
        See scipy.interpolate.griddata for details.

    Returns
    -------
    pd.DataFrame
        Input data with NaN values filled in the 'z' column using spatial
        interpolation from valid data points.

    Notes
    -----
    The function uses scipy.interpolate.griddata to perform the spatial interpolation.
    Only locations with NaN values are updated; existing valid data is preserved.
    
    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> data = pd.DataFrame({
    ...     'x': [0, 1, 2, 0, 1, 2],
    ...     'y': [0, 0, 0, 1, 1, 1],
    ...     'z': [10, np.nan, 30, 40, 50, np.nan]
    ... })
    >>> filled = fillna(data, var_='z', method='nearest')
    """
    # Create mask for valid (non-NaN) data points
    mask = pd.notna(data.loc[:, var_])
    
    # Interpolate values at NaN locations using valid data
    z = griddata(
        data.loc[mask, ["x", "y"]].values,
        data.loc[mask, "z"].values,
        (data.x, data.y),
        method=method,
    )
    
    # Update the z column with filled values
    data.z = z
    return data




[docs]
def normal_profiles(topobat, info, n_points=1000):
    """Extract normal profiles from spatial topography/bathymetry data.
    
    Interpolates elevation/depth values along multiple profile lines defined
    by start and end coordinates. Each profile is sampled at equally-spaced
    points along the line connecting its endpoints.

    Parameters
    ----------
    topobat : pd.DataFrame
        Topography/bathymetry data containing columns 'x', 'y', and 'z'.
        'z' represents elevation (positive above datum) or depth (negative below).
    info : dict
        Dictionary defining profile locations with keys:
        - 'x': list of [x_start, x_end] coordinate pairs for each profile
        - 'y': list of [y_start, y_end] coordinate pairs for each profile
        Each pair defines a profile line endpoint.
    n_points : int, optional
        Number of equally-spaced points to sample along each profile.
        Default is 1000. Higher values provide finer resolution but increase
        computation time.

    Returns
    -------
    x : list of np.ndarray
        X coordinates along each profile (n_points per profile)
    y : list of np.ndarray  
        Y coordinates along each profile (n_points per profile)
    z : list of np.ndarray
        Interpolated elevation/depth values along each profile.
        NaN values indicate locations outside the convex hull of input data.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> # Create sample bathymetry data
    >>> topobat = pd.DataFrame({
    ...     'x': [0, 10, 20, 0, 10, 20],
    ...     'y': [0, 0, 0, 10, 10, 10],
    ...     'z': [-5, -10, -15, 0, -2, -5]
    ... })
    >>> # Define two profile lines
    >>> info = {'x': [[0, 20], [5, 15]], 'y': [[0, 10], [0, 10]]}
    >>> x, y, z = normal_profiles(topobat, info, n_points=500)
    >>> print(f"Number of profiles: {len(x)}, points per profile: {len(x[0])}")
    Number of profiles: 2, points per profile: 500
    """
    x, y, z = [], [], []
    
    # Extract profile along each defined line
    for i, j in enumerate(info["x"]):
        # Create equally-spaced points along the profile line
        x.append(np.linspace(info["x"][i][0], info["x"][i][1], n_points))
        y.append(np.linspace(info["y"][i][0], info["y"][i][1], n_points))
        
        # Interpolate elevation/depth values at profile points
        z.append(
            griddata(
                topobat.loc[:, ["x", "y"]].values,
                topobat.loc[:, "z"].values,
                np.vstack([x[i], y[i]]).T,
                method="linear",
                fill_value=np.nan,
            )
        )
    return x, y, z




[docs]
def rotate_coords(x, y, angle):
    """Rotate coordinates by specified angle around the origin.
    
    Performs 2D coordinate rotation transformation using angle specified in degrees.
    Rotation is counterclockwise for positive angles.

    Parameters
    ----------
    x : float or np.ndarray
        X coordinate(s) to rotate
    y : float or np.ndarray
        Y coordinate(s) to rotate
    angle : float
        Rotation angle in degrees. Positive values rotate counterclockwise.

    Returns
    -------
    x_rot : float or np.ndarray
        Rotated X coordinate(s)
    y_rot : float or np.ndarray
        Rotated Y coordinate(s)

    Examples
    --------
    >>> import numpy as np
    >>> x, y = 1.0, 0.0
    >>> x_rot, y_rot = rotate_coords(x, y, 90)  # Rotate 90 degrees
    >>> print(f"({x_rot:.2f}, {y_rot:.2f})")
    (0.00, 1.00)
    """
    # Calculate distance from origin and angle for each point
    d = np.sqrt(x**2 + y**2)
    angles = np.arctan2(y, x)
    
    # Decompose into x and y components
    dx, dy = d * np.cos(angles), d * np.sin(angles)
    
    # Apply rotation transformation
    x = dx * np.cos(np.deg2rad(angle)) - dy * np.sin(np.deg2rad(angle))
    y = dx * np.sin(np.deg2rad(angle)) + dy * np.cos(np.deg2rad(angle))
    
    return x, y




[docs]
def global_to_local_coords(x_glob, y_glob, alpha, lon_0_glob, lat_0_glob):
    """Transform global coordinates to local rotated coordinate system.
    
    Converts coordinates from a global reference system to a local coordinate
    system defined by an origin point and rotation angle. Useful for analyzing
    data in a local shore-normal/shore-parallel reference frame.

    Parameters
    ----------
    x_glob : float or np.ndarray
        Global X coordinates (e.g., UTM easting or longitude)
    y_glob : float or np.ndarray
        Global Y coordinates (e.g., UTM northing or latitude)
    alpha : float
        Rotation angle in degrees defining local coordinate system orientation
    lon_0_glob : float
        Global X coordinate of local system origin
    lat_0_glob : float
        Global Y coordinate of local system origin

    Returns
    -------
    x_loc : float or np.ndarray
        Local X coordinates (along rotated axis)
    y_loc : float or np.ndarray
        Local Y coordinates (perpendicular to rotated axis)

    Notes
    -----
    The transformation applies:
    1. Translation to move origin to (lon_0_glob, lat_0_glob)
    2. Rotation by angle alpha

    Examples
    --------
    >>> x_glob, y_glob = 100.0, 200.0
    >>> x_loc, y_loc = global_to_local_coords(x_glob, y_glob, 45, 0, 0)
    """
    # Apply rotation transformation with origin translation
    x_loc = (x_glob - lon_0_glob) * np.cos(math.radians(alpha)) + (
        y_glob - lat_0_glob
    ) * np.sin(math.radians(alpha))
    y_loc = (y_glob - lat_0_glob) * np.cos(math.radians(alpha)) - (
        x_glob - lon_0_glob
    ) * np.sin(math.radians(alpha))

    return x_loc, y_loc




[docs]
def local_to_global_coords(x_loc, y_loc, alpha, lon_0_glob, lat_0_glob):
    """Transform local rotated coordinates back to global coordinate system.
    
    Inverse transformation of global_to_local_coords. Converts coordinates from
    local rotated reference system back to the global coordinate system.

    Parameters
    ----------
    x_loc : float or np.ndarray
        Local X coordinates (along rotated axis)
    y_loc : float or np.ndarray
        Local Y coordinates (perpendicular to rotated axis)
    alpha : float
        Rotation angle in degrees that defines the local coordinate system
    lon_0_glob : float
        Global X coordinate of local system origin
    lat_0_glob : float
        Global Y coordinate of local system origin

    Returns
    -------
    x_glob : float or np.ndarray
        Global X coordinates (e.g., UTM easting or longitude)
    y_glob : float or np.ndarray
        Global Y coordinates (e.g., UTM northing or latitude)

    Notes
    -----
    The transformation applies:
    1. Inverse rotation by angle -alpha
    2. Translation to restore original origin

    This function is the mathematical inverse of global_to_local_coords.

    Examples
    --------
    >>> x_loc, y_loc = 70.71, 70.71
    >>> x_glob, y_glob = local_to_global_coords(x_loc, y_loc, 45, 0, 0)
    >>> print(f"Global: ({x_glob:.2f}, {y_glob:.2f})")
    """
    # Apply inverse rotation transformation with origin translation
    x_glob = (
        ((x_loc) / (np.cos(math.radians(alpha))))
        / (1 + (np.tan(math.radians(alpha))) * (np.tan(math.radians(alpha))))
        - (
            ((y_loc * np.sin(math.radians(alpha))))
            / (np.cos(math.radians(alpha)) * np.cos(math.radians(alpha)))
        )
        / (1 + (np.tan(math.radians(alpha))) * (np.tan(math.radians(alpha))))
        + lon_0_glob
    )
    y_glob = (
        (y_loc) / (np.cos(math.radians(alpha)))
        + (
            (np.sin(math.radians(alpha)))
            * (x_loc - y_loc * np.tan(math.radians(alpha)))
        )
        / (
            (np.cos(math.radians(alpha)))
            * (np.cos(math.radians(alpha)))
            * (1 + (np.tan(math.radians(alpha))) * (np.tan(math.radians(alpha))))
        )
        + lat_0_glob
    )

    return x_glob, y_glob




[docs]
def continuous_line(data: pd.DataFrame, limiting_distance: float = 1e9):
    """Create continuous line from scattered points by connecting nearest neighbors.
    
    Constructs an ordered sequence of points forming a continuous line by
    iteratively connecting each point to its nearest unconnected neighbor.
    Algorithm starts from the leftmost point (minimum x coordinate) and
    proceeds by selecting the closest remaining point at each step.

    Parameters
    ----------
    data : pd.DataFrame
        Scattered point data containing at least 'x' and 'y' coordinate columns.
        Additional columns are preserved in the output.
    limiting_distance : float, optional
        Maximum allowed distance between consecutive points. If the nearest
        remaining point exceeds this distance, the algorithm terminates.
        Default is 1e9 (effectively unlimited). Units match input coordinates.

    Returns
    -------
    pd.DataFrame
        Ordered points forming a continuous line. Points are sorted by
        connection order starting from the leftmost point. Contains same
        columns as input data. Points that couldn't be connected within
        the limiting distance are excluded.

    Notes
    -----
    This greedy nearest-neighbor algorithm works well for creating ordered
    lines from scattered data along approximately linear features (coastlines,
    transects, etc.). For complex or branching geometries, results may not
    be optimal.

    The algorithm modifies the input DataFrame in place (removes points as
    they are processed).

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> # Create scattered points along a curve
    >>> data = pd.DataFrame({
    ...     'x': [0, 2, 1, 3, 4],
    ...     'y': [0, 1, 0.5, 2, 3]
    ... })
    >>> line = continuous_line(data.copy(), limiting_distance=2.0)
    >>> print(line[['x', 'y']])
    """
    cols = data.columns
    
    # Initialize output DataFrame with same structure as input
    cont_line = pd.DataFrame(-1, index=np.arange(len(data.index)), columns=cols)
    
    # Start from leftmost point
    index0 = data.index[data.x.argmin()]
    cont_line.loc[0] = data.loc[index0]
    data.drop(index0, inplace=True)
    cont_line["dist"] = 0

    # Iteratively connect nearest neighbors
    k = 0
    while not data.empty:
        # Calculate distances from current point to all remaining points
        distance = np.sqrt(
            (cont_line.loc[k, "x"] - data["x"]) ** 2
            + (cont_line.loc[k, "y"] - data["y"]) ** 2
        )

        # Find nearest remaining point
        idx = data.index[np.argmin(distance)]
        
        # Check if distance exceeds limit
        if np.min(distance) > limiting_distance:
            break
        
        # Add nearest point to line
        cont_line.loc[k + 1, cols] = data.loc[idx]
        cont_line.loc[k + 1, "dist"] = distance[idx]
        data.drop(idx, inplace=True)
        k += 1

    # Clean up: remove distance column and unconnected points
    cont_line.drop(columns="dist", inplace=True)
    cont_line.drop(cont_line[cont_line["x"] == -1].index, inplace=True)

    return cont_line




[docs]
def create_polygon(data, crs="epsg:25830", sides=[]):
    """Create a closed polygon from coordinates and optional additional sides.
    
    Constructs a polygon geometry from a primary set of coordinates, optionally
    adding additional side segments. Useful for creating polygons from multiple
    line segments or closing open geometries.

    Parameters
    ----------
    data : pd.DataFrame
        Primary polygon coordinates containing columns 'x' and 'y'.
        These points define the main boundary of the polygon.
    crs : str, optional
        Coordinate reference system in EPSG format (e.g., 'epsg:25830' for
        ETRS89 UTM Zone 30N, 'epsg:4326' for WGS84). Default is 'epsg:25830'.
    sides : list of pd.DataFrame or pd.DataFrame, optional
        Additional side segment(s) to append to the polygon boundary.
        Each DataFrame should contain 'x' and 'y' coordinate columns.
        Coordinates are appended in the order provided. Default is empty list.

    Returns
    -------
    gpd.GeoDataFrame
        GeoDataFrame containing single polygon geometry with specified CRS.
        Has one row with the polygon geometry.

    Notes
    -----
    - Coordinates are concatenated in order: data, then each side in sides list
    - The shapely Polygon constructor automatically closes the polygon
    - Ensure coordinate order forms a valid polygon (no self-intersections)
    - All input DataFrames should use the same coordinate system

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> # Define main boundary
    >>> data = pd.DataFrame({'x': [0, 10, 10, 0], 'y': [0, 0, 10, 10]})
    >>> # Create closed polygon
    >>> poly = create_polygon(data, crs='epsg:4326')
    >>> print(poly.geometry[0])
    POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))
    
    >>> # Add additional side
    >>> side = pd.DataFrame({'x': [10, 5], 'y': [10, 15]})
    >>> poly = create_polygon(data, crs='epsg:4326', sides=[side])
    """
    # Ensure sides is a list
    if not isinstance(sides, list):
        sides = [sides]

    # Extract main boundary coordinates
    coords = data.loc[:, ["x", "y"]].values
    
    # Append additional sides
    for side in sides:
        coords = np.vstack([coords, side.loc[:, ["x", "y"]].values])

    # Create polygon geometry and GeoDataFrame
    polygon = Polygon(coords)
    polygon = gpd.GeoDataFrame(index=[0], crs=crs, geometry=[polygon])

    return polygon




[docs]
def confidence_interval_2d(data, loc0=None, angle=None, n_points=1000):
    """Calculate 2D confidence intervals and statistics for profile data.
    
    Computes mean, standard deviation, minimum, and maximum values across
    multiple 2D profiles by rotating, interpolating, and analyzing the data
    in a common coordinate system. Useful for analyzing variability in
    spatial transects or time series of profiles.

    Parameters
    ----------
    data : dict of pd.DataFrame
        Dictionary where keys identify different profiles and values are
        DataFrames containing 'x' and 'y' columns representing profile coordinates.
        All profiles should span similar spatial extent.
    loc0 : list of float, optional
        Origin point [x0, y0] for coordinate rotation. If None, uses the
        minimum x location from the first profile. Default is None.
    angle : float, optional
        Rotation angle in radians to align profiles. If None, automatically
        calculated from data geometry. Default is None.
    n_points : int, optional
        Number of points for the regular interpolation grid. Default is 1000.
        Higher values provide finer resolution but increase computation time.

    Returns
    -------
    pd.DataFrame
        Statistical summary with index representing x-coordinates and columns:
        - 'mean': Mean y-value across all profiles
        - 'std': Standard deviation of y-values
        - 'min': Minimum y-value across all profiles
        - 'max': Maximum y-value across all profiles
        - 'x': X coordinates (in original coordinate system)
        - 'ini': Y-values from first profile
        - 'end': Y-values from last profile

    Notes
    -----
    The function performs the following steps:
    1. Rotates all profiles to a common reference frame
    2. Interpolates each profile onto a regular n_points-grid
    3. Calculates statistics across all interpolated profiles
    4. Rotates results back to original coordinate system

    Requires all profiles to have comparable spatial extent for meaningful results.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> # Create sample profile data
    >>> data = {
    ...     1: pd.DataFrame({'x': [0, 1, 2], 'y': [0, 1, 2]}),
    ...     2: pd.DataFrame({'x': [0, 1, 2], 'y': [0, 1.5, 3]}),
    ...     3: pd.DataFrame({'x': [0, 1, 2], 'y': [0, 0.5, 1]})
    ... }
    >>> stats = confidence_interval_2d(data, n_points=500)
    >>> print(stats[['mean', 'std']].head())
    """
    rot = data.copy()
    
    # Set origin point if not provided
    if loc0 == None:
        ind_min = data[1].x.argmin()
        loc0 = [data[1].loc[ind_min, "x"], data[1].loc[ind_min, "y"]]

    # Calculate rotation angle if not provided
    if angle == None:
        angle = np.arctan2(data.y, data.x)

    # Initialize array for interpolated y-values
    y = np.zeros([len(data), n_points])
    
    # Process each profile
    for ind_, key in enumerate(data.keys()):
        # Sort by x coordinate
        data[key].sort_values(by="x", inplace=True)
        
        # Rotate coordinates to common reference frame
        rot[key].x, rot[key].y = rotate_coords(
            data[key].x - loc0[0], data[key].y - loc0[1], -angle
        )

        # Define interpolation grid from first profile
        if ind_ == 0:
            x = np.linspace(rot[key].x.min(), rot[key].x.max(), n_points)
            init_key = key

        if ind_ == len(data) - 1:
            end_key = key

        # Interpolate profile onto regular grid
        y[ind_, :] = np.interp(x, rot[key].x, data[key].y)

    # Calculate statistics across all profiles
    mean_ = np.mean(y, axis=0)
    std_ = np.std(y, axis=0)
    min_ = np.min(y, axis=0)
    max_ = np.max(y, axis=0)
    
    # Create statistics DataFrame
    stats = pd.DataFrame(
        np.vstack([mean_, std_, min_, max_]).T,
        index=x,
        columns=["mean", "std", "min", "max"],
    )

    # Rotate statistics back to original coordinate system
    for cols_ in ["mean", "min", "max"]:
        stats["x"], stats[cols_] = rotate_coords(x, stats[cols_].values, angle)
        stats["x"], stats[cols_] = stats["x"] + loc0[0], stats[cols_] + loc0[1]

    # Add first and last profile to results
    _, stats["ini"] = rotate_coords(x, y[0, :], angle)
    stats["ini"] = stats["ini"] + loc0[1]
    _, stats["end"] = rotate_coords(x, y[-1, :], angle)
    stats["end"] = stats["end"] + loc0[1]
    
    return stats





[docs]
def generate_CVD(points, iterations, bounding_box):
    """Generate Centroidal Voronoi Diagram using Lloyd's algorithm.
    
    Iteratively computes a Centroidal Voronoi Diagram (CVD) where each Voronoi
    cell's generator point converges to the centroid of its cell. This creates
    a more uniform and optimized spatial tessellation compared to standard
    Voronoi diagrams.

    Parameters
    ----------
    points : np.ndarray
        Initial generator points as (N, 2) array of [x, y] coordinates
    iterations : int
        Number of Lloyd's algorithm iterations to perform. More iterations
        produce better convergence to true centroids. Typical values: 5-50.
    bounding_box : tuple or list
        Rectangular bounds as [xmin, xmax, ymin, ymax] defining the region
        for the Voronoi diagram

    Returns
    -------
    scipy.spatial.Voronoi
        Bounded Voronoi diagram object with additional attributes:
        - filtered_points: generator points within bounding box
        - filtered_regions: region indices corresponding to filtered points

    Notes
    -----
    Lloyd's algorithm (1982) iteratively:
    1. Constructs bounded Voronoi diagram from current points
    2. Calculates centroid of each Voronoi cell
    3. Moves generator points to cell centroids
    4. Repeats until convergence or max iterations reached

    Reference: https://www.py4u.net/discuss/21901

    Examples
    --------
    >>> import numpy as np
    >>> # Random initial points
    >>> points = np.random.rand(20, 2) * 100
    >>> # Generate CVD within 100x100 box
    >>> vor = generate_CVD(points, iterations=10, bounding_box=[0, 100, 0, 100])
    """
    p = copy.copy(points)

    # Perform Lloyd's algorithm iterations
    for i in range(iterations):
        # Generate bounded Voronoi diagram
        vor = bounded_voronoi(p, bounding_box)
        centroids = []

        # Calculate centroid of each Voronoi region
        for region in vor.filtered_regions:
            # Extract vertices for this region (close polygon by repeating first vertex)
            vertices = vor.vertices[region + [region[0]], :]
            centroid = centroid_region(vertices)
            centroids.append(list(centroid[0, :]))

        # Update points to region centroids
        p = np.array(centroids)

    # Return final bounded Voronoi diagram
    return bounded_voronoi(p, bounding_box)



# Machine epsilon for numerical stability
eps = sys.float_info.epsilon



[docs]
def in_box(towers, bounding_box):
    """Check if points are within rectangular bounding box.

    Parameters
    ----------
    towers : np.ndarray
        Point coordinates as (N, 2) array of [x, y] values
    bounding_box : tuple or list
        Rectangular bounds as [xmin, xmax, ymin, ymax]

    Returns
    -------
    np.ndarray
        Boolean array of length N indicating which points are inside the box
        (True if inside, False if outside)

    Examples
    --------
    >>> import numpy as np
    >>> points = np.array([[0, 0], [5, 5], [15, 15]])
    >>> bbox = [0, 10, 0, 10]
    >>> mask = in_box(points, bbox)
    >>> print(mask)
    [True, True, False]
    """
    return np.logical_and(
        np.logical_and(
            bounding_box[0] <= towers[:, 0], towers[:, 0] <= bounding_box[1]
        ),
        np.logical_and(
            bounding_box[2] <= towers[:, 1], towers[:, 1] <= bounding_box[3]
        ),
    )




[docs]
def bounded_voronoi(towers, bounding_box):
    """Generate bounded Voronoi diagram with finite regions.
    
    Creates a Voronoi diagram constrained to a rectangular bounding box by
    mirroring edge points outside the box. This ensures all Voronoi regions
    within the box are finite and bounded.

    Parameters
    ----------
    towers : np.ndarray
        Generator points as (N, 2) array of [x, y] coordinates
    bounding_box : tuple or list
        Rectangular bounds as [xmin, xmax, ymin, ymax]

    Returns
    -------
    scipy.spatial.Voronoi
        Voronoi diagram object with additional attributes:
        - filtered_points: generator points within bounding box
        - filtered_regions: list of region vertex indices for points in box

    Notes
    -----
    The algorithm mirrors points across each boundary (left, right, top, bottom)
    to create "virtual" points outside the box. This forces edge Voronoi cells
    to close at the boundaries rather than extending to infinity.

    Only the first 1/5 of regions (corresponding to original points) are retained
    in filtered_regions, as the remaining regions belong to mirrored points.

    Examples
    --------
    >>> import numpy as np
    >>> points = np.array([[25, 25], [75, 25], [50, 75]])
    >>> vor = bounded_voronoi(points, [0, 100, 0, 100])
    >>> print(f"Number of filtered regions: {len(vor.filtered_regions)}")
    """
    # Select points inside the bounding box
    i = in_box(towers, bounding_box)
    points_center = towers[i, :]

    # Mirror points across left boundary
    points_left = np.copy(points_center)
    points_left[:, 0] = bounding_box[0] - (points_left[:, 0] - bounding_box[0])

    # Mirror points across right boundary
    points_right = np.copy(points_center)
    points_right[:, 0] = bounding_box[1] + (bounding_box[1] - points_right[:, 0])

    # Mirror points across bottom boundary
    points_down = np.copy(points_center)
    points_down[:, 1] = bounding_box[2] - (points_down[:, 1] - bounding_box[2])

    # Mirror points across top boundary
    points_up = np.copy(points_center)
    points_up[:, 1] = bounding_box[3] + (bounding_box[3] - points_up[:, 1])

    # Combine all points: original + mirrored
    points = np.append(
        points_center,
        np.append(
            np.append(points_left, points_right, axis=0),
            np.append(points_down, points_up, axis=0),
            axis=0,
        ),
        axis=0,
    )

    # Compute Voronoi diagram
    vor = sp.spatial.Voronoi(points)

    # Add custom attributes for filtered results
    vor.filtered_points = points_center  # Original points within bounding box
    vor.filtered_regions = np.array(vor.regions)[
        vor.point_region[: vor.npoints // 5]
    ]  # Regions corresponding to original points (first 1/5 of all regions)

    return vor




[docs]
def centroid_region(vertices):
    """Calculate centroid of a polygon region.
    
    Computes the geometric centroid (center of mass) of a polygon defined
    by ordered vertices. Uses the standard polygon centroid formula based
    on signed area.

    Parameters
    ----------
    vertices : np.ndarray
        Polygon vertices as (N, 2) array of [x, y] coordinates.
        First and last vertex should be identical (closed polygon).

    Returns
    -------
    np.ndarray
        Centroid coordinates as (1, 2) array [[x_centroid, y_centroid]]

    Notes
    -----
    The formula computes the centroid using:
    
    .. math::
        C_x = \\frac{1}{6A} \\sum_{i=0}^{n-1} (x_i + x_{i+1})(x_i y_{i+1} - x_{i+1} y_i)
        
        C_y = \\frac{1}{6A} \\sum_{i=0}^{n-1} (y_i + y_{i+1})(x_i y_{i+1} - x_{i+1} y_i)
    
    where A is the signed polygon area.

    Examples
    --------
    >>> import numpy as np
    >>> # Square polygon
    >>> vertices = np.array([[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]])
    >>> centroid = centroid_region(vertices)
    >>> print(centroid)
    [[0.5, 0.5]]
    """
    # Initialize accumulation variables
    A = 0    # Signed area
    C_x = 0  # Centroid x-coordinate
    C_y = 0  # Centroid y-coordinate
    
    # Accumulate using trapezoidal formula
    for i in range(0, len(vertices) - 1):
        # Cross product term for this edge
        s = vertices[i, 0] * vertices[i + 1, 1] - vertices[i + 1, 0] * vertices[i, 1]
        A = A + s
        C_x = C_x + (vertices[i, 0] + vertices[i + 1, 0]) * s
        C_y = C_y + (vertices[i, 1] + vertices[i + 1, 1]) * s
    
    # Finalize calculations
    A = 0.5 * A
    C_x = (1.0 / (6.0 * A)) * C_x
    C_y = (1.0 / (6.0 * A)) * C_y
    
    return np.array([[C_x, C_y]])




[docs]
def triangulation(x, y):
    """Create Delaunay triangulation from coordinate arrays.

    Parameters
    ----------
    x : array-like
        X coordinates of points
    y : array-like
        Y coordinates of points

    Returns
    -------
    matplotlib.tri.Triangulation
        Delaunay triangulation object
    """

    # Create the Triangulation; no triangles specified so Delaunay triangulation created
    triangles = tri.Triangulation(x, y)
    xmid = x.values[triangles.triangles].mean(axis=1)

    # Code for removing specific triangles (currently disabled)
    # triangles2remove = [0, 1, ...]
    # mask = np.ones(len(xmid))
    # for i in range(len(xmid)):
    #     if i not in triangles2remove:
    #         mask[i] = False
    # triangles.set_mask(mask)
    
    return triangles