Source code for environmentaltools.download.google_earth_engine

"""Google Earth Engine satellite data download utilities.

This module provides functions to download Sentinel-2 satellite imagery and calculate
vegetation indices using Google Earth Engine API. Designed for salt marsh and coastal
vegetation analysis.

Features:
    - Sentinel-2 image collection filtering and download
    - Vegetation indices calculation (SAVI, MSI, LSWI)
    - Robust error handling and retry mechanisms
    - Memory-efficient batch processing
    - Progress tracking and validation
"""

import datetime
import os
from pathlib import Path
from typing import Any
from loguru import logger

# Set HOME environment variable for cross-platform compatibility
if 'HOME' not in os.environ:
    os.environ['HOME'] = os.path.expanduser("~")

# Optional dependencies for Google Earth Engine
import ee
import geemap
HAS_GEE = True



[docs] def initialize_earth_engine(project_id: str) -> None: """Initialize Google Earth Engine with authentication. Attempts to initialize Earth Engine with existing credentials. If that fails, triggers the authentication flow and then initializes. Args: project_id (str): Google Earth Engine project ID. Raises: RuntimeError: If Earth Engine initialization fails after authentication. Example: >>> initialize_earth_engine("my-gee-project") Earth Engine initialized successfully """ logger.info("Initializing Google Earth Engine...") try: # Try to initialize with existing credentials ee.Initialize(project=project_id) logger.info(f"Earth Engine initialized with project: {project_id}") return except Exception as e: logger.info("Authentication required...") # Check for common error types and provide helpful messages error_str = str(e) if "not registered to use Earth Engine" in error_str: logger.error(f"❌ Project '{project_id}' is not registered for Earth Engine") logger.error("📋 To fix this:") logger.error(f" 1. Visit: https://console.cloud.google.com/earth-engine/configuration?project={project_id}") logger.error(" 2. Register your project for Earth Engine access") logger.error(" 3. See: https://developers.google.com/earth-engine/guides/access") raise RuntimeError(f"Earth Engine project not registered: {project_id}") if "PERMISSION_DENIED" in error_str: logger.error(f"❌ Permission denied for project '{project_id}'") logger.error("📋 To fix this:") logger.error(" 1. Check if you have access to this Google Cloud project") logger.error(" 2. Ensure Earth Engine API is enabled") logger.error(" 3. Run: earthengine authenticate") raise RuntimeError(f"Permission denied for Earth Engine project: {project_id}") # If initialization fails, try authentication first try: logger.info("Attempting authentication...") ee.Authenticate() ee.Initialize(project=project_id) logger.info(f"Earth Engine authenticated and initialized: {project_id}") except Exception as e: error_str = str(e) if "not registered to use Earth Engine" in error_str: logger.error(f"❌ Project '{project_id}' is not registered for Earth Engine") logger.error("📋 To fix this:") logger.error(f" 1. Visit: https://console.cloud.google.com/earth-engine/configuration?project={project_id}") logger.error(" 2. Register your project for Earth Engine access") raise RuntimeError(f"Earth Engine project not registered: {project_id}") raise RuntimeError(f"Earth Engine initialization failed: {e}")
[docs] def create_study_area_geometry(coordinates: list[list[list[float]]]) -> ee.Geometry: """Create Earth Engine geometry from coordinate list. Args: coordinates (list): Nested list of coordinates defining polygon vertices. Format: [[[lon1, lat1], [lon2, lat2], ...]] Returns: ee.Geometry: Earth Engine polygon geometry. Example: >>> coords = [[[-6.1, 36.8], [-6.0, 36.8], [-6.0, 36.9], [-6.1, 36.9]]] >>> geometry = create_study_area_geometry(coords) """ return ee.Geometry.Polygon(coordinates)
[docs] def calculate_vegetation_indices(image: ee.Image) -> ee.Image: """Calculate vegetation and moisture indices for salt marsh analysis. Computes three key indices from Sentinel-2 bands: - SAVI: Soil-Adjusted Vegetation Index (reduces soil background effects) - MSI: Moisture Stress Index (indicates plant water stress) - LSWI: Land Surface Water Index (detects surface water content) Args: image (ee.Image): Sentinel-2 image with B4 (Red), B8 (NIR), and B11 (SWIR1) bands. Returns: ee.Image: Image with selected bands (B4, B8, B11) plus calculated indices (SAVI, MSI, LSWI) and QA60 quality band. Note: SAVI uses L=0.5 for moderate vegetation coverage typical in salt marshes. Example: >>> sentinel_image = ee.Image("COPERNICUS/S2_HARMONIZED/...") >>> with_indices = calculate_vegetation_indices(sentinel_image) """ # SAVI (Soil-Adjusted Vegetation Index) # Formula: ((NIR - RED) / (NIR + RED + L)) * (1 + L) # L = 0.5 for moderate vegetation coverage L = 0.5 savi = image.expression( "((NIR - RED) / (NIR + RED + L)) * (1 + L)", { "NIR": image.select("B8"), "RED": image.select("B4"), "L": L } ).rename("SAVI") # MSI (Moisture Stress Index) # Formula: SWIR1 / NIR # Higher values indicate greater moisture stress msi = image.expression( "SWIR1 / NIR", { "SWIR1": image.select("B11"), "NIR": image.select("B8") } ).rename("MSI") # LSWI (Land Surface Water Index) # Formula: (NIR - SWIR1) / (NIR + SWIR1) # Higher values indicate more surface water lswi = image.normalizedDifference(["B8", "B11"]).rename("LSWI") # Add quality assessment band qa = image.select('QA60') # Return image with essential bands only to reduce file size return image.select(['B4', 'B8', 'B11']).addBands([savi, msi, lswi, qa])
[docs] def create_sentinel2_collection( study_area: ee.Geometry, start_date: str = "2015-01-01", end_date: str = "2024-01-01", cloud_percentage: float = 15.0, ) -> ee.ImageCollection: """Create filtered Sentinel-2 image collection with vegetation indices. Filters Sentinel-2 imagery based on spatial extent, date range, and cloud cover, then calculates vegetation indices for each image. Args: study_area (ee.Geometry): Geographic area of interest. start_date (str, optional): Start date in 'YYYY-MM-DD' format. Defaults to "2015-01-01". end_date (str, optional): End date in 'YYYY-MM-DD' format. Defaults to "2024-01-01". cloud_percentage (float, optional): Maximum cloud cover percentage. Defaults to 15.0. Returns: ee.ImageCollection: Filtered collection with vegetation indices. Raises: ValueError: If no images match the filtering criteria. Example: >>> area = ee.Geometry.Point([-6.0, 36.8]).buffer(10000) >>> collection = create_sentinel2_collection(area, "2020-01-01", "2020-12-31") """ logger.info("Creating Sentinel-2 image collection...") # Create base collection with filters collection = ( ee.ImageCollection("COPERNICUS/S2_HARMONIZED") .filterBounds(study_area) .filterDate(start_date, end_date) .filterMetadata("CLOUDY_PIXEL_PERCENTAGE", "less_than", cloud_percentage) .select(['B4', 'B8', 'B11', 'QA60']) # Select only needed bands ) collection_size = collection.size().getInfo() logger.info(f"Found {collection_size} images matching criteria") if collection_size == 0: raise ValueError("No images found matching the criteria") # Calculate vegetation indices for all images logger.info("Calculating vegetation indices...") indices_collection = collection.map(calculate_vegetation_indices) return indices_collection
[docs] def download_image_with_geemap( image: ee.Image, output_file: Path, study_area: ee.Geometry, scale: int = 10, ) -> bool: """Download Earth Engine image to GeoTIFF using geemap. Uses geemap's download function to export an Earth Engine image to a local file. Validates that the downloaded file exists and has reasonable size. Args: image (ee.Image): Earth Engine image to download. output_file (Path): Output file path for the GeoTIFF. study_area (ee.Geometry): Study area for clipping the image. scale (int, optional): Spatial resolution in meters. Defaults to 10 (Sentinel-2 native resolution). Returns: bool: True if download successful and validated, False otherwise. Example: >>> img = ee.Image("COPERNICUS/S2_HARMONIZED/20200601T105619_20200601T110145_T30STG") >>> area = ee.Geometry.Point([-6.0, 36.8]).buffer(10000) >>> success = download_image_with_geemap(img, Path("output.tif"), area) """ try: # Download using geemap geemap.download_ee_image( image, str(output_file), scale=scale, region=study_area, ) # Verify file was created and has reasonable size if output_file.exists() and output_file.stat().st_size > 1000: # At least 1KB file_size_mb = output_file.stat().st_size / (1024 * 1024) logger.info(f"Successfully downloaded: {output_file.name} ({file_size_mb:.2f} MB)") return True else: logger.warning(f"File created but appears incomplete: {output_file}") if output_file.exists(): output_file.unlink() return False except Exception as e: logger.error(f"Download failed: {e}") return False
[docs] def download_single_sentinel2_image( image_info: dict[str, Any], image_index: int, total_images: int, collection: ee.ImageCollection, study_area: ee.Geometry, output_directory: Path, scale: int = 10, ) -> bool: """Download a single Sentinel-2 image with vegetation indices. Extracts an image from a collection, clips it to the study area, and downloads it using geemap. Skips download if file already exists. Args: image_info (dict): Image metadata from Earth Engine containing properties like 'system:index' and 'system:time_start'. image_index (int): Current image index (for progress tracking). total_images (int): Total number of images to process. collection (ee.ImageCollection): Pre-created collection containing the image. study_area (ee.Geometry): Study area geometry for clipping. output_directory (Path): Directory where files will be saved. scale (int, optional): Spatial resolution in meters. Defaults to 10. Returns: bool: True if download successful or file exists, False otherwise. Example: >>> info = {'properties': {'system:index': 'T30STG_20200601', ...}} >>> success = download_single_sentinel2_image( ... info, 0, 10, collection, area, Path("./output") ... ) """ # Extract image metadata image_id = image_info['properties']['system:index'] acquisition_date = image_info['properties'].get('system:time_start', 'unknown') # Convert timestamp to readable date if acquisition_date != 'unknown': try: date_readable = datetime.datetime.fromtimestamp( acquisition_date / 1000 ).strftime('%Y-%m-%d') except Exception: date_readable = str(acquisition_date) else: date_readable = 'unknown' # Create output filename output_file = output_directory / f"sentinel2_indices_{image_id}.tif" # Skip if file already exists if output_file.exists() and output_file.stat().st_size > 1000: file_size_mb = output_file.stat().st_size / (1024 * 1024) logger.info( f"[{image_index+1}/{total_images}] Skipping existing: {image_id} " f"({file_size_mb:.2f} MB)" ) return True logger.info(f"[{image_index+1}/{total_images}] Downloading: {image_id}") logger.info(f"Acquisition date: {date_readable}") # Reconstruct image from collection try: # Filter collection for specific image target_image = ee.Image( collection.filter(ee.Filter.eq('system:index', image_id)).first() ) # Clip to study area to reduce size target_image = target_image.clip(study_area) # Verify image exists image_properties = target_image.getInfo() if not image_properties: logger.error(f"Could not retrieve image: {image_id}") return False except Exception as e: logger.error(f"Error reconstructing image {image_id}: {e}") return False # Download using geemap return download_image_with_geemap(target_image, output_file, study_area, scale)
[docs] def download_sentinel2_images( config: dict[str, Any] ) -> dict[str, int]: """Download time series of Sentinel-2 imagery with vegetation indices. Main function that orchestrates the complete download workflow: initialization, collection creation, and batch downloading of images. Args: config (dict): Configuration dictionary containing: - project_id (str): Google Earth Engine project ID - study_area_coords (list): Coordinates defining the study area polygon - output_directory (str or Path): Directory for output files - scale (int, optional): Spatial resolution in meters. Default: 10 - start_date (str, optional): Start date 'YYYY-MM-DD'. Default: "2015-01-01" - end_date (str, optional): End date 'YYYY-MM-DD'. Default: "2024-01-01" - cloud_percentage (float, optional): Max cloud cover %. Default: 15.0 Returns: dict: Summary statistics with keys: - successful (int): Number of successful downloads - failed (int): Number of failed downloads - total (int): Total number of images processed Raises: RuntimeError: If Earth Engine initialization fails. ValueError: If no images match the filtering criteria. Example: >>> config = { ... 'project_id': 'my-gee-project', ... 'study_area_coords': [[[-6.1, 36.8], [-6.0, 36.8], ...]], ... 'output_directory': './satellite_images', ... 'scale': 10, ... 'start_date': '2020-01-01', ... 'end_date': '2020-12-31', ... 'cloud_percentage': 10.0 ... } >>> results = download_sentinel2_images(config) >>> print(f"Downloaded {results['successful']} images") """ # Extract configuration project_id = config['project_id'] study_area_coords = config['study_area_coords'] output_directory = Path(config['output_directory']) scale = config.get('scale', 10) start_date = config.get('start_date', '2015-01-01') end_date = config.get('end_date', '2024-01-01') cloud_percentage = config.get('cloud_percentage', 15.0) logger.info("=" * 70) logger.info("SENTINEL-2 SATELLITE DATA DOWNLOAD") logger.info("=" * 70) # Initialize Earth Engine initialize_earth_engine(project_id) # Create study area geometry study_area = create_study_area_geometry(study_area_coords) area_km2 = study_area.area().divide(1000000).getInfo() logger.info(f"Study area: {area_km2:.2f} km²") # Create output directory output_directory.mkdir(parents=True, exist_ok=True) logger.info(f"Output directory: {output_directory}") # Create image collection collection = create_sentinel2_collection( study_area, start_date, end_date, cloud_percentage ) # Get collection metadata collection_list = collection.toList(collection.size()).getInfo() total_images = len(collection_list) logger.info("Collection Summary:") logger.info(f" • Total images: {total_images}") logger.info(f" • Date range: {start_date} to {end_date}") logger.info(f" • Max cloud cover: {cloud_percentage}%") logger.info(f" • Spatial resolution: {scale}m") logger.info(f" • Indices: SAVI, MSI, LSWI") if total_images == 0: raise ValueError("No images found. Check date range and study area.") # Download images logger.info("Starting download process...") successful_downloads = 0 for i, image_info in enumerate(collection_list): if download_single_sentinel2_image( image_info, i, total_images, collection, study_area, output_directory, scale ): successful_downloads += 1 # Print summary logger.info("=" * 70) logger.info("DOWNLOAD COMPLETED") logger.info(f"Successful downloads: {successful_downloads}") logger.info(f"Files saved to: {output_directory}") logger.info("=" * 70) return { 'successful': successful_downloads, 'total': total_images, }