Source code for ecoscope.analysis.percentile
import typing
import geopandas as gpd
import numpy as np
import rasterio
import rasterio.features
from shapely.geometry import shape
from shapely.geometry.multipolygon import MultiPolygon
from ecoscope.io import raster
[docs]
def _multipolygon(shapes, percentile):
return MultiPolygon([shape(geom) for geom, value in shapes if np.isclose(value, percentile)])
[docs]
def get_percentile_area(
percentile_levels: typing.List,
raster_data: raster.RasterData,
subject_id: str = "",
) -> gpd.GeoDataFrame:
"""
Parameters
----------
percentile_levels: Typing.List[Int]
list of k-th percentile scores.
raster_data: raster.RasterData
array of raster values
subject_id: str
unique identifier for the subject
Returns
-------
GeoDataFrame
"""
shapes = []
for percentile in percentile_levels:
data_array = raster_data.data.copy()
# calculate percentile value
values = np.sort(data_array[~np.isnan(data_array)]).flatten()
if len(values) == 0:
continue
csum = np.cumsum(values)
percentile_val = values[np.argmin(np.abs(csum[-1] * (1 - percentile / 100) - csum))]
# TODO: make a more explicit comparison for less than and greater than
# Set any vals less than the cutoff to be nan
data_array[data_array < percentile_val] = np.nan
# Mask any vals that are less than the cutoff percentile
data_array[data_array >= percentile_val] = percentile
shapes.extend(rasterio.features.shapes(data_array, transform=raster_data.transform))
return gpd.GeoDataFrame(
[
[subject_id, percentile, _multipolygon(shapes, percentile)]
for percentile in sorted(percentile_levels, reverse=True)
],
columns=["subject_id", "percentile", "geometry"],
crs=raster_data.crs,
)