# Copyright (c) 2022, TU Wien, Department of Geodesy and Geoinformation
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of TU Wien, Department of Geodesy and Geoinformation
# nor the names of its contributors may be used to endorse or promote
# products derived from this software without specific prior written
# permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL TU WIEN, DEPARTMENT OF GEODESY AND
# GEOINFORMATION BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
Module for saving grid to netCDF.
"""
from netCDF4 import Dataset
import numpy as np
import os
from datetime import datetime
from pygeogrids import CellGrid, BasicGrid
[docs]def save_lonlat(
filename,
arrlon,
arrlat,
geodatum,
arrcell=None,
gpis=None,
subsets={},
global_attrs=None,
format="NETCDF4",
zlib=False,
complevel=4,
shuffle=True,
):
"""
saves grid information to netCDF file
Parameters
----------
filename : string
name of file
arrlon : numpy.array
array of longitudes
arrlat : numpy.array
array of latitudes
geodatum : object
pygeogrids.geodetic_datum.GeodeticDatum object associated with lon/lat
arrcell : numpy.array, optional
array of cell numbers
gpis : numpy.array, optional
gpi numbers if not index of arrlon, arrlat
subsets : dict of dicts, optional
keys : long_name of the netcdf variables
values : dict with the following keys: points, meaning
e.g. subsets = {'subset_flag': {'points': numpy.array,
'value': int,
'meaning': 'water, land'}}
global_attrs : dict, optional
if given will be written as global attributs into netCDF file
format: string, optional
choose either from one of these NetCDF formats
'NETCDF4'
'NETCDF4_CLASSIC'
'NETCDF3_CLASSIC'
'NETCDF3_64BIT_OFFSET'
zlib: boolean, optional
see netCDF documentation
shuffle: boolean, optional
see netCDF documentation
complevel: int, opational
see netCDF documentation
"""
with Dataset(filename, "w", format=format) as ncfile:
if (
global_attrs is not None
and "shape" in global_attrs
and type(global_attrs["shape"]) is not int
and len(global_attrs["shape"]) == 2
):
latsize = global_attrs["shape"][0]
lonsize = global_attrs["shape"][1]
ncfile.createDimension("lat", latsize)
ncfile.createDimension("lon", lonsize)
gpisize = global_attrs["shape"][0] * global_attrs["shape"][1]
if gpis is None:
gpivalues = np.arange(gpisize, dtype=np.int32).reshape(latsize, lonsize)
else:
gpivalues = gpis.reshape(latsize, lonsize)
lons = arrlon.reshape(latsize, lonsize)
lats = arrlat.reshape(latsize, lonsize)
# sort arrlon, arrlat and gpis
arrlon_sorted, arrlat_sorted, gpivalues = sort_for_netcdf(
lons, lats, gpivalues
)
# sorts arrlat descending
arrlat_store = np.unique(arrlat_sorted)[::-1]
arrlon_store = np.unique(arrlon_sorted)
else:
ncfile.createDimension("gp", arrlon.size)
gpisize = arrlon.size
if gpis is None:
gpivalues = np.arange(arrlon.size, dtype=np.int32)
else:
gpivalues = gpis
arrlon_store = arrlon
arrlat_store = arrlat
dim = list(ncfile.dimensions.keys())
crs = ncfile.createVariable(
"crs",
np.dtype("int32").char,
shuffle=shuffle,
zlib=zlib,
complevel=complevel,
)
setattr(crs, "grid_mapping_name", "latitude_longitude")
setattr(crs, "longitude_of_prime_meridian", 0.0)
setattr(crs, "semi_major_axis", geodatum.geod.a)
setattr(crs, "inverse_flattening", 1.0 / geodatum.geod.f)
setattr(crs, "ellipsoid_name", geodatum.name)
gpi = ncfile.createVariable(
"gpi",
np.dtype("int32").char,
dim,
shuffle=shuffle,
zlib=zlib,
complevel=complevel,
)
if gpis is None:
gpi[:] = gpivalues
setattr(gpi, "long_name", "Grid point index")
setattr(gpi, "units", "")
setattr(gpi, "valid_range", [0, gpisize])
gpidirect = 0x1B
else:
gpi[:] = gpivalues
setattr(gpi, "long_name", "Grid point index")
setattr(gpi, "units", "")
setattr(gpi, "valid_range", [np.min(gpivalues), np.max(gpivalues)])
gpidirect = 0x0B
latitude = ncfile.createVariable(
"lat",
np.dtype("float64").char,
dim[0],
shuffle=shuffle,
zlib=zlib,
complevel=complevel,
)
latitude[:] = arrlat_store
setattr(latitude, "long_name", "Latitude")
setattr(latitude, "units", "degree_north")
setattr(latitude, "standard_name", "latitude")
setattr(latitude, "valid_range", [-90.0, 90.0])
if len(dim) == 2:
londim = dim[1]
else:
londim = dim[0]
longitude = ncfile.createVariable(
"lon",
np.dtype("float64").char,
londim,
shuffle=shuffle,
zlib=zlib,
complevel=complevel,
)
longitude[:] = arrlon_store
setattr(longitude, "long_name", "Longitude")
setattr(longitude, "units", "degree_east")
setattr(longitude, "standard_name", "longitude")
setattr(longitude, "valid_range", [-180.0, 180.0])
if arrcell is not None:
cell = ncfile.createVariable(
"cell",
np.dtype("int32").char,
dim,
shuffle=shuffle,
zlib=zlib,
complevel=complevel,
)
if len(dim) == 2:
arrcell = arrcell.reshape(latsize, lonsize)
_, _, arrcell = sort_for_netcdf(lons, lats, arrcell)
cell[:] = arrcell
setattr(cell, "long_name", "Cell")
setattr(cell, "units", "")
setattr(cell, "valid_range", [np.min(arrcell), np.max(arrcell)])
if subsets:
for subset_name in subsets.keys():
flag = ncfile.createVariable(
subset_name,
np.dtype("int8").char,
dim,
shuffle=shuffle,
zlib=zlib,
complevel=complevel,
)
# create flag array based on shape of data
lf = np.zeros_like(gpivalues)
if len(dim) == 2:
lf = lf.flatten()
value = subsets[subset_name]["value"]
lf[subsets[subset_name]["points"]] = value
if len(dim) == 2:
lf = lf.reshape(latsize, lonsize)
_, _, lf = sort_for_netcdf(lons, lats, lf)
flag[:] = lf
setattr(flag, "long_name", subset_name)
setattr(flag, "units", "")
setattr(flag, "coordinates", "lat lon")
setattr(flag, "flag_values", np.arange(2, dtype=np.int8))
setattr(flag, "flag_meanings", subsets[subset_name]["meaning"])
setattr(flag, "valid_range", [0, value])
s = "%Y-%m-%d %H:%M:%S"
date_created = datetime.now().strftime(s)
attr = {
"Conventions": "CF-1.6",
"id": os.path.split(filename)[1], # file name
"date_created": date_created,
"geospatial_lat_min": np.round(np.min(arrlat), 4),
"geospatial_lat_max": np.round(np.max(arrlat), 4),
"geospatial_lon_min": np.round(np.min(arrlon), 4),
"geospatial_lon_max": np.round(np.max(arrlon), 4),
"gpidirect": gpidirect,
}
ncfile.setncatts(attr)
if global_attrs is not None:
ncfile.setncatts(global_attrs)
[docs]def sort_for_netcdf(lons, lats, values):
"""
Sort an 2D array for storage in a netCDF file.
This mans that the latitudes are stored from
90 to -90 and the longitudes from -180 to 180.
Input arrays have to have shape latdim, londim
which would mean for a global 10 degree grid (18, 36).
Parameters
----------
lons: numpy.ndarray
2D numpy array of longitudes
lats: numpy.ndarray
2D numpy array of latitudes
values: numpy.ndarray
2D numpy array of values to sort
Returns
-------
lons: numpy.ndarray
2D numpy array of longitudes, sorted
lats: numpy.ndarray
2D numpy array of latitudes, sorted
values: numpy.ndarray
2D numpy array of values to sort, sorted
"""
arrlat = lats.flatten()
arrlon = lons.flatten()
arrval = values.flatten()
idxlatsrt = np.argsort(arrlat)[::-1]
idxlat = np.argsort(arrlat[idxlatsrt].reshape(lats.shape), axis=0)[::-1]
idxlon = np.argsort(
arrlon[idxlatsrt].reshape(lons.shape)[idxlat, np.arange(lons.shape[1])], axis=1
)
values = arrval[idxlatsrt].reshape(*lons.shape)[idxlat, np.arange(lons.shape[1])][
np.arange(lons.shape[0])[:, None], idxlon
]
lons = arrlon[idxlatsrt].reshape(*lons.shape)[idxlat, np.arange(lons.shape[1])][
np.arange(lons.shape[0])[:, None], idxlon
]
lats = arrlat[idxlatsrt].reshape(*lons.shape)[idxlat, np.arange(lons.shape[1])][
np.arange(lons.shape[0])[:, None], idxlon
]
return lons, lats, values
[docs]def save_grid(
filename,
grid,
subset_name="subset_flag",
subset_value=1.0,
subset_meaning="water land",
global_attrs=None,
):
"""
save a BasicGrid or CellGrid to netCDF
it is assumed that a subset should be used as land_points
Parameters
----------
filename : string
name of file
grid : BasicGrid or CellGrid object
grid whose definition to save to netCDF
subset_name : string, optional (default: 'subset_flag')
long_name of the netcdf variable
if the subset symbolises something other than a land/sea mask
subset_value: float, optional (default: 1.)
Value that that the subgrid is written down as in the file.
subset_meaning : string, optional (default: 'water land')
will be written into flag_meanings metadata of variable 'subset_name'
global_attrs : dict, optional
if given will be written as global attributes into netCDF file
"""
try:
arrcell = grid.arrcell
except AttributeError:
arrcell = None
gpis = grid.gpis
if grid.shape is not None:
if global_attrs is None:
global_attrs = {}
global_attrs["shape"] = grid.shape
if grid.subset is not None:
subsets = {
subset_name: {
"points": grid.subset,
"meaning": subset_meaning,
"value": subset_value,
}
}
else:
subsets = None
save_lonlat(
filename,
grid.arrlon,
grid.arrlat,
grid.geodatum,
arrcell=arrcell,
gpis=gpis,
subsets=subsets,
zlib=True,
global_attrs=global_attrs,
)
[docs]def load_grid(
filename,
subset_flag="subset_flag",
subset_value=1,
location_var_name="gpi",
**grid_kwargs
):
"""
load a grid from netCDF file
Parameters
----------
filename : string
filename
subset_flag : string, optional (default: 'subset_flag')
name of the subset to load.
subset_value : int or list, optional (default: 1)
Value(s) of the subset variable that points are loaded for.
location_var_name: string, optional (default: 'gpi')
variable name under which the grid point locations
are stored
**grid_kwargs: additional kwargs that are passed to BasicGrid or CellGrid
Returns
-------
grid : BasicGrid or CellGrid instance
grid instance initialized with the loaded data
"""
with Dataset(filename, "r") as nc_data:
# determine if it is a cell grid or a basic grid
arrcell = None
if "cell" in nc_data.variables.keys():
arrcell = np.array(nc_data.variables["cell"][:].flatten())
gpis = np.array(nc_data.variables[location_var_name][:].flatten())
shape = None
if hasattr(nc_data, "shape"):
try:
shape = tuple(nc_data.shape)
except TypeError as e:
try:
length = len(nc_data.shape)
except TypeError:
length = nc_data.shape.size
if length == 1:
shape = tuple([nc_data.shape])
else:
raise e
subset = None
# some old grid do not have a shape attribute
# this meant that they had shape of len 1
if shape is None:
shape = tuple([len(nc_data.variables["lon"][:])])
# check if grid has regular shape
if len(shape) == 2:
lons, lats = np.meshgrid(
np.array(nc_data.variables["lon"][:]),
np.array(nc_data.variables["lat"][:]),
)
lons = lons.flatten()
lats = lats.flatten()
if subset_flag in nc_data.variables.keys():
subset = np.where(
np.isin(nc_data.variables[subset_flag][:].flatten(), subset_value)
)[0]
elif len(shape) == 1:
lons = np.array(nc_data.variables["lon"][:])
lats = np.array(nc_data.variables["lat"][:])
# determine if it has a subset
if subset_flag in nc_data.variables.keys():
subset = np.where(
np.isin(
np.array(nc_data.variables[subset_flag][:].flatten()),
subset_value,
)
)[0]
if "crs" in nc_data.variables:
geodatumName = nc_data.variables["crs"].getncattr("ellipsoid_name")
else:
# ellipsoid information is missing, use WGS84 by default
geodatumName = "WGS84"
if arrcell is None:
# BasicGrid
return BasicGrid(
lons,
lats,
gpis=gpis,
geodatum=geodatumName,
subset=subset,
shape=shape,
**grid_kwargs
)
else:
# CellGrid
return CellGrid(
lons,
lats,
arrcell,
gpis=gpis,
geodatum=geodatumName,
subset=subset,
shape=shape,
**grid_kwargs
)