#!/usr/bin/env python
# -*- coding: utf-8 -*-
# diversity.py
# definitons of diversity characters
import numpy as np
import pandas as pd
import scipy as sp
from tqdm import tqdm # progress bar
__all__ = ["Range", "Theil", "Simpson", "Gini"]
[docs]class Range:
"""
Calculates the range of values within neighbours defined in `spatial_weights`.
Uses `scipy.stats.iqr` under the hood.
.. math::
Parameters
----------
gdf : GeoDataFrame
GeoDataFrame containing morphological tessellation
values : str, list, np.array, pd.Series
the name of the dataframe column, np.array, or pd.Series where is stored character value.
spatial_weights : libpysal.weights
spatial weights matrix
unique_id : str
name of the column with unique id used as spatial_weights index
rng : Two-element sequence containing floats in range of [0,100], optional
Percentiles over which to compute the range. Each must be
between 0 and 100, inclusive. The order of the elements is not important.
**kwargs : keyword arguments
optional arguments for `scipy.stats.iqr`
Attributes
----------
series : Series
Series containing resulting values
gdf : GeoDataFrame
original GeoDataFrame
values : Series
Series containing used values
sw : libpysal.weights
spatial weights matrix
id : Series
Series containing used unique ID
rng : tuple
range
kwargs : dict
kwargs
References
----------
Dibble J, Prelorendjos A, Romice O, et al. (2017) On the origin of spaces: Morphometric foundations of urban form evolution.
Environment and Planning B: Urban Analytics and City Science 46(4): 707–730.
Examples
--------
>>> sw = momepy.sw_high(k=3, gdf=tessellation_df, ids='uID')
>>> tessellation_df['area_IQR_3steps'] = mm.Range(tessellation_df, 'area', sw, 'uID', rng=(25, 75)).series
100%|██████████| 144/144 [00:00<00:00, 722.50it/s]
"""
[docs] def __init__(self, gdf, values, spatial_weights, unique_id, rng=(0, 100), **kwargs):
self.gdf = gdf
self.sw = spatial_weights
self.id = gdf[unique_id]
self.rng = rng
self.kwargs = kwargs
data = gdf.copy()
if values is not None:
if not isinstance(values, str):
data["mm_v"] = values
values = "mm_v"
self.values = data[values]
data = data.set_index(unique_id)
results_list = []
for index, row in tqdm(data.iterrows(), total=data.shape[0]):
neighbours = spatial_weights.neighbors[index].copy()
if neighbours:
neighbours.append(index)
else:
neighbours = [index]
values_list = data.loc[neighbours][values]
results_list.append(sp.stats.iqr(values_list, rng=rng, **kwargs))
self.series = pd.Series(results_list, index=gdf.index)
[docs]class Theil:
"""
Calculates the Theil measure of inequality of values within neighbours defined in `spatial_weights`.
Uses `inequality.theil.Theil` under the hood. Requires 'inequality' or 'pysal' package.
.. math::
Parameters
----------
gdf : GeoDataFrame
GeoDataFrame containing morphological tessellation
values : str, list, np.array, pd.Series
the name of the dataframe column, np.array, or pd.Series where is stored character value.
spatial_weights : libpysal.weights
spatial weights matrix
unique_id : str
name of the column with unique id used as spatial_weights index
rng : Two-element sequence containing floats in range of [0,100], optional
Percentiles over which to compute the range. Each must be
between 0 and 100, inclusive. The order of the elements is not important.
Attributes
----------
series : Series
Series containing resulting values
gdf : GeoDataFrame
original GeoDataFrame
values : Series
Series containing used values
sw : libpysal.weights
spatial weights matrix
id : Series
Series containing used unique ID
rng : tuple, optional
range
Examples
--------
>>> sw = momepy.sw_high(k=3, gdf=tessellation_df, ids='uID')
>>> tessellation_df['area_Theil'] = mm.Theil(tessellation_df, 'area', sw, 'uID').series
100%|██████████| 144/144 [00:00<00:00, 597.37it/s]
"""
[docs] def __init__(self, gdf, values, spatial_weights, unique_id, rng=None):
try:
from inequality.theil import Theil
except ImportError:
try:
from pysal.explore.inequality.theil import Theil
except ImportError:
raise ImportError("The 'inequality' or 'pysal' package is required.")
self.gdf = gdf
self.sw = spatial_weights
self.id = gdf[unique_id]
self.rng = rng
data = gdf.copy()
if values is not None:
if not isinstance(values, str):
data["mm_v"] = values
values = "mm_v"
self.values = data[values]
data = data.set_index(unique_id)
results_list = []
for index, row in tqdm(data.iterrows(), total=data.shape[0]):
neighbours = spatial_weights.neighbors[index].copy()
if neighbours:
neighbours.append(index)
else:
neighbours = [index]
values_list = data.loc[neighbours][values]
if rng:
from momepy import limit_range
values_list = limit_range(values_list, rng=rng)
results_list.append(Theil(values_list).T)
self.series = pd.Series(results_list, index=gdf.index)
[docs]class Simpson:
"""
Calculates the Simpson\'s diversity index of values within neighbours defined in `spatial_weights`.
Uses `mapclassify.classifiers` under the hood for binning. Requires `mapclassify>=.2.1.0` dependency
or `pysal`.
.. math::
Parameters
----------
objects : GeoDataFrame
GeoDataFrame containing morphological tessellation
values : str, list, np.array, pd.Series
the name of the dataframe column, np.array, or pd.Series where is stored character value.
spatial_weights : libpysal.weights, optional
spatial weights matrix - If None, Queen contiguity matrix of set order will be calculated
based on objects.
order : int
order of Queen contiguity
binning : str
One of mapclassify classification schemes
Options are BoxPlot, EqualInterval, FisherJenks,
FisherJenksSampled, HeadTailBreaks, JenksCaspall,
JenksCaspallForced, JenksCaspallSampled, MaxPClassifier,
MaximumBreaks, NaturalBreaks, Quantiles, Percentiles, StdMean,
UserDefined
**classification_kwds : dict
Keyword arguments for classification scheme
For details see mapclassify documentation:
https://pysal.org/mapclassify
Attributes
----------
series : Series
Series containing resulting values
gdf : GeoDataFrame
original GeoDataFrame
values : Series
Series containing used values
sw : libpysal.weights
spatial weights matrix
id : Series
Series containing used unique ID
binning : str
binning method
bins : mapclassify.classifiers.Classifier
generated bins
classification_kwds : dict
classification_kwds
References
----------
Feliciotti A (2018) RESILIENCE AND URBAN DESIGN:A SYSTEMS APPROACH TO THE STUDY OF RESILIENCE
IN URBAN FORM. LEARNING FROM THE CASE OF GORBALS. Glasgow.
Examples
--------
>>> sw = momepy.sw_high(k=3, gdf=tessellation_df, ids='uID')
>>> tessellation_df['area_Simpson'] = mm.Simpson(tessellation_df, 'area', sw, 'uID').series
100%|██████████| 144/144 [00:00<00:00, 455.83it/s]
"""
[docs] def __init__(
self,
gdf,
values,
spatial_weights,
unique_id,
binning="HeadTailBreaks",
**classification_kwds
):
try:
import mapclassify.classifiers as classifiers
except ImportError:
try:
import pysal.viz.mapclassify.classifiers as classifiers
except ImportError:
raise ImportError("The 'mapclassify' or 'pysal' package is required")
schemes = {}
for classifier in classifiers.CLASSIFIERS:
schemes[classifier.lower()] = getattr(classifiers, classifier)
binning = binning.lower()
if binning not in schemes:
raise ValueError(
"Invalid binning. Binning must be in the" " set: %r" % schemes.keys()
)
self.gdf = gdf
self.sw = spatial_weights
self.id = gdf[unique_id]
self.binning = binning
self.classification_kwds = classification_kwds
data = gdf.copy()
if values is not None:
if not isinstance(values, str):
data["mm_v"] = values
values = "mm_v"
self.values = data[values]
self.bins = schemes[binning](data[values], **classification_kwds).bins
data = data.set_index(unique_id)
results_list = []
for index, row in tqdm(data.iterrows(), total=data.shape[0]):
neighbours = spatial_weights.neighbors[index].copy()
if neighbours:
neighbours.append(index)
else:
neighbours = [index]
values_list = data.loc[neighbours][values]
sample_bins = classifiers.UserDefined(values_list, self.bins)
counts = dict(zip(self.bins, sample_bins.counts))
results_list.append(self._simpson_di(counts))
self.series = pd.Series(results_list, index=gdf.index)
def _simpson_di(self, data):
""" Given a hash { 'species': count } , returns the Simpson Diversity Index
>>> simpson_di({'a': 10, 'b': 20, 'c': 30,})
0.3888888888888889
https://gist.github.com/martinjc/f227b447791df8c90568
"""
def p(n, N):
""" Relative abundance """
if n == 0:
return 0
return float(n) / N
N = sum(data.values())
return sum(p(n, N) ** 2 for n in data.values() if n != 0)
[docs]class Gini:
"""
Calculates the Gini index of values within neighbours defined in `spatial_weights`.
Uses `inequality.gini.Gini` under the hood. Requires 'inequality' or 'pysal' package.
Parameters
----------
gdf : GeoDataFrame
GeoDataFrame containing morphological tessellation
values : str, list, np.array, pd.Series
the name of the dataframe column, np.array, or pd.Series where is stored character value.
spatial_weights : libpysal.weights
spatial weights matrix
unique_id : str
name of the column with unique id used as spatial_weights index
rng : Two-element sequence containing floats in range of [0,100], optional
Percentiles over which to compute the range. Each must be
between 0 and 100, inclusive. The order of the elements is not important.
Attributes
----------
series : Series
Series containing resulting values
gdf : GeoDataFrame
original GeoDataFrame
values : Series
Series containing used values
sw : libpysal.weights
spatial weights matrix
id : Series
Series containing used unique ID
rng : tuple
range
Examples
--------
>>> sw = momepy.sw_high(k=3, gdf=tessellation_df, ids='uID')
>>> tessellation_df['area_Gini'] = mm.Gini(tessellation_df, 'area', sw, 'uID').series
100%|██████████| 144/144 [00:00<00:00, 597.37it/s]
"""
[docs] def __init__(self, gdf, values, spatial_weights, unique_id, rng=None):
try:
from inequality.gini import Gini
except ImportError:
try:
from pysal.explore.inequality.gini import Gini
except ImportError:
raise ImportError("The 'inequality' or 'pysal' package is required.")
self.gdf = gdf
self.sw = spatial_weights
self.id = gdf[unique_id]
self.rng = rng
data = gdf.copy()
if values is not None:
if not isinstance(values, str):
data["mm_v"] = values
values = "mm_v"
self.values = data[values]
if self.values.min() < 0:
raise ValueError(
"Values contain negative numbers. Normalise data before"
"using momepy.Gini."
)
data = data.set_index(unique_id)
results_list = []
for index, row in tqdm(data.iterrows(), total=data.shape[0]):
neighbours = spatial_weights.neighbors[index].copy()
if neighbours:
neighbours.append(index)
values_list = data.loc[neighbours][values].values
if rng:
from momepy import limit_range
values_list = np.array(limit_range(values_list, rng=rng))
results_list.append(Gini(values_list).g)
else:
results_list.append(0)
self.series = pd.Series(results_list, index=gdf.index)