Source code for ecoscope.analysis.percentile

import typing

import geopandas as gpd
import numpy as np
import rasterio
import rasterio.features
from shapely.geometry import shape
from shapely.geometry.multipolygon import MultiPolygon

from ecoscope.io import raster


[docs] def _multipolygon(shapes, percentile): return MultiPolygon([shape(geom) for geom, value in shapes if np.isclose(value, percentile)])
[docs] def get_percentile_area( percentile_levels: typing.List, raster_data: raster.RasterData, subject_id: str = "", ) -> gpd.GeoDataFrame: """ Parameters ---------- percentile_levels: Typing.List[Int] list of k-th percentile scores. raster_data: raster.RasterData array of raster values subject_id: str unique identifier for the subject Returns ------- GeoDataFrame """ shapes = [] for percentile in percentile_levels: data_array = raster_data.data.copy() # calculate percentile value values = np.sort(data_array[~np.isnan(data_array)]).flatten() if len(values) == 0: continue csum = np.cumsum(values) percentile_val = values[np.argmin(np.abs(csum[-1] * (1 - percentile / 100) - csum))] # TODO: make a more explicit comparison for less than and greater than # Set any vals less than the cutoff to be nan data_array[data_array < percentile_val] = np.nan # Mask any vals that are less than the cutoff percentile data_array[data_array >= percentile_val] = percentile shapes.extend(rasterio.features.shapes(data_array, transform=raster_data.transform)) return gpd.GeoDataFrame( [ [subject_id, percentile, _multipolygon(shapes, percentile)] for percentile in sorted(percentile_levels, reverse=True) ], columns=["subject_id", "percentile", "geometry"], crs=raster_data.crs, )