Source code for ecoscope.analysis.percentile
import os
import typing
from dataclasses import dataclass, field
import geopandas as gpd
import numpy as np
import rasterio
import rasterio.features
from shapely.geometry import shape
from shapely.geometry.multipolygon import MultiPolygon
[docs]@dataclass
class PercentileAreaProfile:
input_raster: typing.Union[str, bytes, os.PathLike]
percentile_levels: typing.List = field(default_factory=[50.0])
subject_id: str = ""
[docs]class PercentileArea:
[docs] @staticmethod
def _multipolygon(shapes, percentile):
return MultiPolygon([shape(geom) for geom, value in shapes if np.isclose(value, percentile)])
[docs] @classmethod
def calculate_percentile_area(cls, profile: PercentileAreaProfile):
"""
Parameters
----------
profile: PercentileAreaProfile
dataclass object with information for percentile-area calculation
Returns
-------
GeodataFrame
"""
assert type(profile) is PercentileAreaProfile
shapes = []
# open raster
with rasterio.open(profile.input_raster) as src:
crs = src.crs.to_wkt()
for percentile in profile.percentile_levels:
data_array = src.read(1).astype(np.float32)
# Mask no-data values
data_array[data_array == src.nodata] = np.nan
# calculate percentile value
# percentile_val = np.percentile(data_array[~np.isnan(data_array)], 100.0 - percentile)
values = np.sort(data_array[~np.isnan(data_array)]).flatten()
csum = np.cumsum(values)
percentile_val = values[np.argmin(np.abs(csum[-1] * (1 - percentile / 100) - csum))]
# TODO: make a more explicit comparison for less than and greater than
# Set any vals less than the cutoff to be nan
data_array[data_array < percentile_val] = np.nan
# Mask any vals that are less than the cutoff percentile
data_array[data_array >= percentile_val] = percentile
shapes.extend(rasterio.features.shapes(data_array, transform=src.transform))
return gpd.GeoDataFrame(
[
[profile.subject_id, percentile, cls._multipolygon(shapes, percentile)]
for percentile in sorted(profile.percentile_levels, reverse=True)
],
columns=["subject_id", "percentile", "geometry"],
crs=crs,
)
[docs]def get_percentile_area(
percentile_levels: typing.List,
raster_path: typing.Union[str, bytes, os.PathLike],
subject_id: str = "",
):
"""
Parameters
----------
percentile_levels: Typing.List[Int]
list of k-th percentile scores.
raster_path: str or os.PathLike
file path to where the raster is stored.
subject_id: str
unique identifier for the subject
Returns
-------
GeoDataFrame
"""
percentile_profile = PercentileAreaProfile(
percentile_levels=percentile_levels,
input_raster=raster_path,
subject_id=subject_id,
)
return PercentileArea.calculate_percentile_area(profile=percentile_profile)