Source code for ecoscope.analysis.percentile

import os
import typing
from dataclasses import dataclass, field

import geopandas as gpd
import numpy as np
import rasterio
import rasterio.features
from shapely.geometry import shape
from shapely.geometry.multipolygon import MultiPolygon



[docs]
@dataclass
class PercentileAreaProfile:
    input_raster: typing.Union[str, bytes, os.PathLike]
    percentile_levels: typing.List = field(default_factory=[50.0])
    subject_id: str = ""




[docs]
class PercentileArea:

[docs]
    @staticmethod
    def _multipolygon(shapes, percentile):
        return MultiPolygon([shape(geom) for geom, value in shapes if np.isclose(value, percentile)])



[docs]
    @classmethod
    def calculate_percentile_area(cls, profile: PercentileAreaProfile):
        """
        Parameters
        ----------
        profile:  PercentileAreaProfile
            dataclass object with information for percentile-area calculation

        Returns
        -------
        GeodataFrame
        """

        assert type(profile) is PercentileAreaProfile
        shapes = []

        # open raster
        with rasterio.open(profile.input_raster) as src:
            crs = src.crs.to_wkt()

            for percentile in profile.percentile_levels:
                data_array = src.read(1).astype(np.float32)

                # Mask no-data values
                data_array[data_array == src.nodata] = np.nan

                # calculate percentile value
                # percentile_val = np.percentile(data_array[~np.isnan(data_array)], 100.0 - percentile)
                values = np.sort(data_array[~np.isnan(data_array)]).flatten()
                csum = np.cumsum(values)
                percentile_val = values[np.argmin(np.abs(csum[-1] * (1 - percentile / 100) - csum))]

                # TODO: make a more explicit comparison for less than and greater than

                # Set any vals less than the cutoff to be nan
                data_array[data_array < percentile_val] = np.nan

                # Mask any vals that are less than the cutoff percentile
                data_array[data_array >= percentile_val] = percentile

                shapes.extend(rasterio.features.shapes(data_array, transform=src.transform))

        return gpd.GeoDataFrame(
            [
                [profile.subject_id, percentile, cls._multipolygon(shapes, percentile)]
                for percentile in sorted(profile.percentile_levels, reverse=True)
            ],
            columns=["subject_id", "percentile", "geometry"],
            crs=crs,
        )





[docs]
def get_percentile_area(
    percentile_levels: typing.List,
    raster_path: typing.Union[str, bytes, os.PathLike],
    subject_id: str = "",
):
    """
    Parameters
    ----------
    percentile_levels: Typing.List[Int]
        list of k-th percentile scores.
    raster_path: str or os.PathLike
        file path to where the raster is stored.
    subject_id: str
        unique identifier for the subject

    Returns
    -------
    GeoDataFrame

    """
    percentile_profile = PercentileAreaProfile(
        percentile_levels=percentile_levels,
        input_raster=raster_path,
        subject_id=subject_id,
    )
    return PercentileArea.calculate_percentile_area(profile=percentile_profile)