Source code for pygeogrids.shapefile

# Copyright (c) 2022, TU Wien, Department of Geodesy and Geoinformation
# All rights reserved.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#    * Redistributions of source code must retain the above copyright
#      notice, this list of conditions and the following disclaimer.
#    * Redistributions in binary form must reproduce the above copyright
#      notice, this list of conditions and the following disclaimer in the
#      documentation and/or other materials provided with the distribution.
#    * Neither the name of TU Wien, Department of Geodesy and Geoinformation
#      nor the names of its contributors may be used to endorse or promote
#      products derived from this software without specific prior written
#      permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL TU WIEN, DEPARTMENT OF GEODESY AND
# GEOINFORMATION BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

"""
Module for extracting grid points from global administrative areas
"""
import os
import numpy as np
from typing import Union, Optional
import pandas as pd
from pygeogrids.grids import CellGrid

try:
    from osgeo import ogr
    ogr_installed = True
except ImportError:
    ogr_installed = False

path_shp_countries = os.path.join(
    os.path.dirname(__file__), 'shapefiles', 'ne_110m_admin_0_countries.shp')

[docs]def get_gad_grid_points(grid, gadm_shp_path, level, name=None, oid=None):
    """
    Returns all grid points located in a administrative area. For this
    function the files from
    http://biogeo.ucdavis.edu/data/gadm2.8/gadm28_levels.shp.zip
    need to be available in the folder gadm_shp_path
    Optinal as coordinates. Currently only works in WGS84.

    Parameters
    ----------
    grid: object
    gadm_shp_path: path
        Location to GADM28 shapefiles
    level: int
        Global Administrative Database Level
        0 : country
        1 : province/county/state/region/municipality/...
        2 : municipality/District/county/...
    name: str
        name of region at indicated level. For countries the english name
    oid: int
        OBJECTID of feature. This only works with the correct level shp.

    Returns
    -------
    grid : BasicGrid
            Subgrid.

    Raises
    ------
    ValueError: If name or oid are not found in shapefile of given level
    ImportError: If gdal or osgeo are not installed
    """
    if ogr_installed:
        drv = ogr.GetDriverByName("ESRI Shapefile")
        ds_in = drv.Open(os.path.join(gadm_shp_path, f"gadm28_adm{level}.shp"))
        lyr_in = ds_in.GetLayer(0)
        if name:
            if level == 0:
                lyr_in.SetAttributeFilter("NAME_ENGLI = '%s'" % (name))
            else:
                lyr_in.SetAttributeFilter("NAME_%s = '%s'" % (level, name))

        if oid:
            lyr_in.SetAttributeFilter("OBJECTID = '%s'" % (oid))
        if lyr_in.GetFeatureCount() > 0:
            feature = lyr_in.GetNextFeature()
            ply = feature.GetGeometryRef()
            return grid.get_shp_grid_points(ply)
        else:
            raise ValueError("Requested Object not found in shapefile.")

    else:
        raise ImportError("Could not import ogr from osgeo. "
                          "Please install them via `conda install geos gdal` first.")


[docs]class ShpReader:
    """
    Wrapper around gdal to read geometries from passed shapefile
    and filter them by fields for certain attributes (e.g. country names).
    NOTE: The default implementation using `fields=None` will load ALL
    available metadata from the shapefile. This can be slow!
    If you know the field that contains the values you want to filter,
    pass it as the `field`, this will also speed up the search.
    """
    def __init__(
            self,
            shp_path: str,
            fields: Optional[Union[list, str]] = None,
            driver: str = 'ESRI Shapefile'
    ):
        """
        Read shp-file and create feature table

        Parameters
        ----------
        shp_path: str
            Path to shapefile
        fields: list[str], str or None
            Shapefile fields to read from attribute table. If None is passed,
            all fields are read.
        driver: str, optional
            Driver to use for reading shapefile. Default is ESRI Shapefile.

        Attributes
        ----------
        features: pd.DataFrame
            Feature table where each row is a polygon in the shp file and
            each column represents a field. Fields contain attributes used
            to filter out the relevant polygons.
        """
        if not ogr_installed:
            raise ImportError("Could not import ogr from osgeo. "
                              "Please install them via `conda install geos gdal` first.")
        self.shp_path = shp_path
        self.driver = driver
        self._init_open_shp()

        self.fields = None if fields is None else np.atleast_1d(fields)
        all_fields = [field.name for field in self.layer.schema]

        if self.fields is None:
            self.fields = all_fields
        else:
            # check each element of fields is in all_fields
            for field in self.fields:
                if field not in all_fields:
                    raise ValueError(f"Field {field} not in shapefile")

        # The feature table contains all geometries in the shp file and
        # their attributes from shp fields as columns.
        # Attributes are used to select relevant features, e.g. countries
        # by name.
        self.features = self._init_build_feature_table()

    def __repr__(self):
        name = self.__class__.__name__
        return f"shp_path: {self.shp_path}\n" \
               f"See `{name}.features` for the full feature table.\n" \
               f"----------------------------------------------------\n" \
               f"{len(self.features.index)} features with Fields: " \
               f"{self.fields}"

    def _init_open_shp(self):
        """
        Open shapefile and get layer
        """
        self.driver = ogr.GetDriverByName(self.driver)
        self.ds = self.driver.Open(self.shp_path)
        self.layer = self.ds.GetLayer()
        self.srs = self.layer.GetSpatialRef()

    def _init_build_feature_table(self):
        """
        Extract names of features in the relevant fields and stores them
        in a pandas dataframe.
        Each row in the data frame refers to the same feature in the shapefile.
        The index is the id under which the feature is found.
        Columns can be used to find the id(s) for a given name.

        Returns
        -------
        df: pd.DataFrame
            Dataframe with feature ids and names for features in passed fields

        """
        ids = []
        features = {}

        for n in range(self.layer.GetFeatureCount()):
            feature = self.layer.GetFeature(n)
            for field in self.fields:
                if field not in features:
                    features[field] = []
                features[field].append(feature.GetField(field))
            ids.append(n)

        return pd.DataFrame(index=ids, data=features)

[docs]    def lookup_id(self, names: Union[str, list, np.ndarray]) -> np.ndarray:
        """
        Lookup ids for passed names in passed field
        """
        names = np.atleast_1d(names)
        rows = np.unique(np.where(np.isin(self.features.values, names))[0])
        return self.features.index.values[rows]

[docs]    def geom(self, id) -> 'ogr.Geometry':
        """
        Get geometry of feature with passed id
        """
        feature = self.layer.GetFeature(id)
        geom = feature.geometry().Clone()
        return geom

[docs]def subgrid_for_shp(grid, values, shp_path=path_shp_countries,
                    field=None, shp_driver='ESRI Shapefile',
                    verbose=False):
    """
    Cut grid to selected shape(s) from passed shapefile.

    Parameters
    ----------
    grid: CellGrid
        Grid that should be cut to shape(s) in passed shapefile
    values: np.ndarray or list
        Values in field that are used to select the shape(s) to cut the grid
        to. Usually e.g. a list of country names or continent names.
        The passed values are looked up in all loaded fields, i.e. in all
        columns of the feature table. A polygon is selected if the value
        appears in ANY of the columns (fields) of the feature table.
    shp_path: str, optional (default: ./shapefiles/ne_10m_admin_0_countries.shp)
        Path to shapefile. By default we use the 110m resolution country
        shape file provided in this package. In theory any shapefile should
        work as long as the structure is the same (the GADM shapefiles
        should work as well).
        For the default shp file it is recommended to filter by the fields
        "NAME" (full country name) and "CONTINENT" (continent name).
        ----------------------------------------------------------------------
        The default file contains the following country names (field: 'NAME'):
        .......................................................................
        'Afghanistan', 'Albania', 'Algeria', 'Angola', 'Antarctica',
        'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan',
        'Bahamas', 'Bangladesh','Belarus', 'Belgium', 'Belize', 'Benin',
        'Bhutan', 'Bolivia', 'Bosnia and Herz.', 'Botswana', 'Brazil',
        'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia',
        'Cameroon', 'Canada', 'Central African Rep.', 'Chad', 'Chile','China',
        'Colombia', 'Congo', 'Costa Rica', 'Croatia', 'Cuba', 'Cyprus',
        'Czechia', "Côte d'Ivoire", 'Dem. Rep. Congo', 'Denmark', 'Djibouti',
        'Dominican Rep.', 'Ecuador', 'Egypt', 'El Salvador', 'Eq. Guinea',
        'Eritrea', 'Estonia', 'Ethiopia', 'Falkland Is.', 'Fiji', 'Finland',
        'Fr. S. Antarctic Lands', 'France', 'French Guiana', 'Gabon', 'Gambia',
        'Georgia', 'Germany', 'Ghana', 'Greece', 'Greenland', 'Guatemala',
        'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Honduras', 'Hungary',
        'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel',
        'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Kosovo',
        'Kuwait', 'Kyrgyzstan', 'Laos', 'Latvia', 'Lebanon', 'Lesotho',
        'Liberia', 'Libya', 'Lithuania', 'Luxembourg', 'Macedonia',
        'Madagascar', 'Malawi', 'Malaysia', 'Mali', 'Mauritania', 'Mexico',
        'Moldova', 'Mongolia', 'Montenegro', 'Morocco', 'Mozambique',
        'Myanmar', 'N. Cyprus', 'Namibia', 'Nepal', 'Netherlands',
        'New Caledonia', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria',
        'North Korea', 'Norway', 'Oman', 'Pakistan', 'Palestine', 'Panama',
        'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Poland',
        'Portugal', 'Puerto Rico', 'Qatar', 'Romania', 'Russia',
        'Rwanda', 'S. Sudan', 'Saudi Arabia', 'Senegal',
        'Serbia', 'Sierra Leone', 'Slovakia', 'Slovenia', 'Solomon Is.',
        'Somalia', 'Somaliland', 'South Africa', 'South Korea', 'Spain',
        'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland', 'Syria',
        'Taiwan', 'Tajikistan', 'Tanzania', 'Thailand', 'Timor-Leste', 'Togo',
        'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Turkmenistan', 'Uganda',
        'Ukraine', 'United Arab Emirates', 'United Kingdom',
        'United States of America', 'Uruguay', 'Uzbekistan', 'Vanuatu',
        'Venezuela', 'Vietnam', 'W. Sahara', 'Yemen', 'Zambia', 'Zimbabwe',
        'eSwatini'
        ----------------------------------------------------------------------
        and the following continent names (field: 'CONTINENT'):
        .......................................................................
        'Africa', 'Antarctica', 'Asia', 'Europe', 'North America', 'Oceania',
        'Seven seas (open ocean)', 'South America'
    field: str or list[str], optional (default: None)
        Shapefile field(s) to use for value search. Each field is a column
        in the feature table. If None is passed, all fields are used (slow).
        Limiting the fields leads to faster lookup times and is recommended
        especially for complex shapefiles.
        For the default countries shp file it is suggested to use the fields:
        "NAME" and "CONTINENT" to search for countries and continents by their
        names.
    shp_driver: str, optional (default: 'ESRI Shapefile')
        Driver to use for reading vector shapefile. Default is ESRI Shapefile.
    verbose: bool, optional (default: False)
        If True, print some information while processing. This also prints
        all available fields and the attribute table after loading the file.

    Returns
    -------
    subgrid: CellGrid
        Subgrid that is cut to the shape(s) in the shapefile
    """
    shp_reader = ShpReader(shp_path, fields=field, driver=shp_driver)

    if verbose:
        print(shp_reader)

    if verbose:
        print("--------------")
        print("Feature table:")
        print(shp_reader.features)

    ids = np.unique(shp_reader.lookup_id(values))

    if len(ids) == 0:
        raise ValueError(f"No features found for {values} in "
                         f"fields {shp_reader.fields}")

    gpis = np.array([], dtype=int)
    for i, id in enumerate(ids):
        if verbose:
            print('Creating subset {} of {} ... to speed this up '
                  'improve pygeogrids.grids.get_shp_grid_points()'.format(
                   i + 1, ids.size))

        subgrid = grid.get_shp_grid_points(shp_reader.geom(id))
        if subgrid is not None:
            poly_gpis = subgrid.activegpis
            gpis = np.append(gpis, poly_gpis)
        else:
            pass

    if gpis.size == 0:
        empty_arr = np.array([])
        return CellGrid(lon=empty_arr, lat=empty_arr, cells=empty_arr)
    else:
        return grid.subgrid_from_gpis(np.unique(gpis))