Source code for environmentaltools.common.save

import json

import numpy as np
import pandas as pd


from pyproj import CRS
import rasterio
from affine import Affine
import shapefile



[docs]
def npy2json(params: dict):
    """Convert dictionary with numpy arrays to JSON format and save to file.

    Serializes numpy arrays to lists and performs custom transformations for
    specific parameter structures before saving to JSON file.

    Args:
        params (dict): Dictionary containing parameters to transform. Must include
            'fname' key for output filename. Arrays are converted to lists, 'mode'
            values to integers, and handles nested structures in 'all' and 'fun' keys.

    Returns:
        None
    """
    for key in params.keys():
        if isinstance(params[key], np.ndarray):
            params[key] = list(params[key])

    for loc, mode in enumerate(params["mode"]):
        params["mode"][loc] = int(mode)

    if "all" in params.keys():
        for loc, mode in enumerate(params["all"]):
            params["all"][loc] = [str(mode[0]), float(mode[1]), mode[2].tolist()]

    for loc, fun in enumerate(params["fun"]):
        if not isinstance(fun, str):
            params["fun"][loc] = params["fun"][loc].name

    to_json(params, params["fname"])

    return




[docs]
def to_json(params: dict, file_name: str, numpy_array_serialization: bool = False):
    """Save dictionary to JSON file with optional numpy array serialization.

    Exports data to JSON format with optional automatic conversion of numpy
    arrays to lists for JSON compatibility.

    Args:
        params (dict): Data dictionary to save.
        file_name (str): Output file path.
        numpy_array_serialization (bool): If True, recursively converts numpy arrays
            to lists in nested dictionaries. Defaults to False.

    Returns:
        None
    """
    with open(f"{str(file_name)}", "w") as f:
        if numpy_array_serialization:
            for key in params.keys():
                if isinstance(params[key], dict):
                    for subkey in params[key].keys():
                        try:
                            params[key][subkey] = params[key][subkey].tolist()
                        except (AttributeError, TypeError):
                            pass
                else:
                    try:
                        params[key] = params[key].tolist()
                    except (AttributeError, TypeError):
                        pass
        json.dump(params, f, ensure_ascii=False, indent=4)

    return




[docs]
def to_csv(data: pd.DataFrame, file_name: str, compression: str = "infer"):
    """Save DataFrame to CSV file with optional compression.

    Exports data to CSV format with automatic compression detection or
    explicit zip compression.

    Args:
        data (pd.DataFrame): Data to save.
        file_name (str): Output file path.
        compression (str): Compression type ('infer', 'zip', 'gzip', etc.).
            Defaults to 'infer' (auto-detect from extension).

    Returns:
        None
    """
    if ".zip" in file_name:
        data.to_csv(file_name, compression="zip")
    else:
        data.to_csv(file_name, compression=compression)

    return




[docs]
def to_npy(data: np.ndarray, file_name: str):
    """Save numpy array to binary .npy file.

    Serializes numpy array to binary format for efficient storage and loading.

    Args:
        data (np.ndarray): Array data to save.
        file_name (str): Output file path (without extension).

    Returns:
        None
    """
    np.save(f"{str(file_name)}.npy", data)
    return




[docs]
def to_xlsx(data: pd.DataFrame, file_name: str):
    """Save DataFrame to formatted Excel file with styled headers and rows.

    Exports data to Excel with alternating row colors and formatted headers
    for improved readability.

    Args:
        data (pd.DataFrame): Data to save.
        file_name (str): Output Excel file path.

    Returns:
        None
    """

    wbook, wsheet = cwriter(str(file_name))

    # Writting the header
    if data.index.name is not None:
        wsheet.write(0, 0, data.index.name, formats(wbook, "header"))
    else:
        wsheet.write(0, 0, "Index", formats(wbook, "header"))

    for col_num, value in enumerate(data.columns.values):
        wsheet.write(0, col_num + 1, value, formats(wbook, "header"))

    # Adding data
    k = 1
    for i in data.index:
        if k % 2 == 0:
            fmt = "even"
        else:
            fmt = "odd"
        wsheet.write_row(k, 0, np.append(i, data.loc[i, :]), formats(wbook, fmt))
        k += 1

    wbook.close()
    return




[docs]
def cwriter(file_out: str):
    """Create Excel workbook and worksheet for writing.

    Initializes an Excel file with xlsxwriter engine for formatted output.

    Args:
        file_out (str): Output file path.

    Returns:
        tuple: (workbook, worksheet) - Excel writer objects for formatting.
    """
    writer = pd.ExcelWriter(
        file_out,
        engine="xlsxwriter",
        engine_kwargs={"options": {"nan_inf_to_errors": True}},
    )
    df = pd.DataFrame([0])
    df.to_excel(writer, index=False, sheet_name="Sheet1", startrow=1, header=False)
    wsheet = writer.sheets["Sheet1"]
    wbook = writer.book
    return wbook, wsheet




[docs]
def formats(wbook, style):
    """Apply predefined formatting styles to Excel workbook.

    Provides styling presets for headers and alternating rows.

    Args:
        wbook (xlsxwriter.Workbook): Excel workbook object.
        style (str): Style name ('header', 'even', or 'odd').

    Returns:
        xlsxwriter.Format: Format object with specified styling.
    """
    fmt = {
        "header": {
            "bold": True,
            "text_wrap": True,
            "valign": "center",
            "font_color": "#ffffff",
            "fg_color": "#5983B0",
            "border": 1,
        },
        "even": {
            "bold": False,
            "text_wrap": False,
            "valign": "center",
            "fg_color": "#DEE6EF",
            "border": 1,
        },
        "odd": {
            "bold": False,
            "text_wrap": False,
            "valign": "center",
            "fg_color": "#FFFFFF",
            "border": 1,
        },
    }

    return wbook.add_format(fmt[style])




[docs]
def to_esriascii(
    data: np.ndarray,
    ncols: int,
    nrows: int,
    cellsize: float,
    file_name: str,
    x0: float = 0,
    y0: float = 0,
    nodata_value: float = -9999
):
    """Save gridded data to ESRI ASCII raster format.

    Exports 2D array data to ESRI ASCII Grid format (.asc) with header
    information including grid dimensions, origin, cell size, and no-data value.

    Args:
        data (np.ndarray): 2D array of grid values to save.
        ncols (int): Number of columns in the grid.
        nrows (int): Number of rows in the grid.
        cellsize (float): Cell size (resolution) in spatial units.
        file_name (str): Output file path.
        x0 (float): X-coordinate of lower-left corner. Defaults to 0.
        y0 (float): Y-coordinate of lower-left corner. Defaults to 0.
        nodata_value (float): Value representing missing/no data. Defaults to -9999.

    Returns:
        None
    """
    fid = open(str(file_name), "w")
    fid.write("ncols    {}\n".format(ncols))
    fid.write("nrows    {}\n".format(nrows))
    fid.write("xllcorner    {}\n".format(x0))
    fid.write("yllcorner    {}\n".format(y0))
    fid.write("cellsize    {}\n".format(cellsize))
    fid.write("NODATA_value    {}\n".format(nodata_value))
    fid.close()

    with open(str(file_name), "ab") as file:
        np.savetxt(file, data, fmt="%8.3f", newline="\n")
    fid.close()
    return




[docs]
def as_float_bool(obj: dict):
    """Convert string values in dictionary to appropriate types.

    Performs type conversion on dictionary values: converts numeric strings to
    floats/integers and boolean strings ('True', 'False') to bool type.

    Args:
        obj (dict): Dictionary with string values to convert.

    Returns:
        dict: Dictionary with values converted to appropriate types (float, int, or bool).
    """
    for keys in obj.keys():
        try:
            obj[keys] = float(obj[keys])
            # Convert to int if value is a whole number
            if obj[keys] == np.round(obj[keys]):
                obj[keys] = int(obj[keys])
        except (ValueError, TypeError):
            pass

        # Convert string representations of booleans
        if obj[keys] == "True":
            obj[keys] = True
        elif obj[keys] == "False":
            obj[keys] = False

    return obj




[docs]
def to_geotiff(
    data: np.ndarray,
    file_name: str,
    profile: dict = None,
    transform: Affine = None,
    auxiliary: dict = None
):
    """Save georeferenced raster data to GeoTIFF format.

    Exports 2D array to GeoTIFF with spatial reference information. Profile can
    be provided directly or constructed from auxiliary parameters.

    Args:
        data (np.ndarray): 2D array of raster values.
        file_name (str): Output GeoTIFF file path.
        profile (dict, optional): Rasterio profile dictionary with metadata (driver,
            dtype, nodata, dimensions, CRS, transform). If None, built from auxiliary.
        transform (Affine, optional): Affine transformation matrix. Ignored if profile
            provided. Defaults to None.
        auxiliary (dict, optional): Dictionary with keys: 'corners' (origin [x, y]),
            'dx', 'dy' (cell sizes), 'angle' (rotation), 'driver', 'dtype', 'nodata',
            'nodesx', 'nodesy' (dimensions), 'count' (bands), 'crsno' (EPSG code).
            Required if profile is None.

    Returns:
        None
    """
    if profile is None:
        # Build affine transform from auxiliary parameters
        transform = (
            Affine.translation(auxiliary["corners"][0], auxiliary["corners"][1])
            * Affine.scale(auxiliary["dx"], auxiliary["dy"])
            * Affine.rotation(auxiliary["angle"])
        )

        # Construct profile from auxiliary dictionary
        profile = {
            "driver": auxiliary["driver"],
            "dtype": auxiliary["dtype"],
            "nodata": auxiliary["nodata"],
            "width": auxiliary["nodesy"],
            "height": auxiliary["nodesx"],
            "count": auxiliary["count"],
            "crs": CRS.from_epsg(auxiliary["crsno"]),
            "transform": transform,
            "tiled": False,
            "interleave": "band",
        }

    with rasterio.Env():
        # Update profile with output specifications
        profile.update(dtype=rasterio.float32, count=1, compress="lzw")

        with rasterio.open(str(file_name), "w", **profile) as dst:
            dst.write(data.astype(rasterio.float32), 1)
    return




[docs]
def to_txt(data: pd.DataFrame, file_name: str, fmt: str = "%9.3f"):
    """Save DataFrame to text file with custom formatting.

    Exports data to plain text file using numpy savetxt with specified format.

    Args:
        data (pd.DataFrame): Data to save.
        file_name (str): Output file path.
        fmt (str): Format string for numeric values (e.g., '%9.3f' for 9-character
            width with 3 decimal places). Defaults to "%9.3f".

    Returns:
        None
    """
    np.savetxt(str(file_name), data, delimiter="", fmt=fmt)
    return




[docs]
def to_shp(
    file_name: str,
    lon: pd.Series,
    lat: pd.Series,
    geometry_type: str = "point",
    values: pd.Series = None,
):
    """Save spatial data to ESRI shapefile format.

    Creates shapefiles with point, multi-point, line, or multi-line geometries
    from coordinate data.

    Args:
        file_name (str): Output shapefile path (without .shp extension).
        lon (pd.Series or list): Longitude or X coordinates.
        lat (pd.Series or list): Latitude or Y coordinates.
        geometry_type (str): Geometry type to create. Options:
            - 'point': Single point
            - 'multi-point': Multiple separate points
            - 'line': Single polyline
            - 'multi-line': Multiple polylines (requires values parameter)
            Defaults to 'point'.
        values (pd.Series, optional): Values to group coordinates for multi-line
            geometries. Each unique value creates a separate line. Defaults to None.

    Returns:
        None

    Raises:
        ImportError: If pyshp package is not installed.
        ValueError: If geometry_type is not recognized.
    """
    iofile = shapefile.Writer(str(file_name))
    iofile.field("id")

    if geometry_type == "point":
        if isinstance(lon, list):
            for i, j in enumerate(lon):
                iofile.point(j, lat[i])
                iofile.record(str(int(i + 1)))
        else:
            iofile.point(lon, lat)
            iofile.record("1")
    elif geometry_type == "multi-point":
        for ind_, lon_key in enumerate(lon):
            iofile.point(lon_key, lat[ind_])
            iofile.record(str(ind_))
    elif geometry_type == "line":
        coords = [[]]
        for ind_, lon_key in enumerate(lon):
            coords[0].append([lon_key, lat[ind_]])
        iofile.line(coords)
        iofile.record("1")
    elif geometry_type == "multi-line":
        unique_values = values.unique()
        coords = [[] for _ in unique_values]
        for k_index, k in enumerate(unique_values):
            mask = values == k
            for ind_, lon_key in enumerate(lon[mask]):
                coords[k_index].append([lon_key, lat[ind_]])
            iofile.line([coords[k_index]])
            iofile.record(str(k))
    else:
        raise ValueError(
            "Geometry type '{}' not implemented. Options are: point, multi-point, line, or multi-line.".format(
                geometry_type
            )
        )

    iofile.close()
    return




[docs]
def to_netcdf(data: pd.DataFrame, file_path: str):
    """Save DataFrame to NetCDF4 file format.

    Exports time series data to NetCDF format for efficient storage and
    compatibility with climate/oceanographic data standards.

    Args:
        data (pd.DataFrame): Time series or gridded data to save.
        file_path (str): Output file path (without .nc extension).

    Returns:
        None
    """
    data.to_netcdf(str(file_path) + ".nc")
    return