diff --git a/plasticparcels/constructors.py b/plasticparcels/constructors.py index 0281d21..caf6e97 100644 --- a/plasticparcels/constructors.py +++ b/plasticparcels/constructors.py @@ -220,6 +220,14 @@ def create_particleset_from_map(fieldset, settings): # Load release type information release_type = settings['release']['initialisation_type'] # TODO: MAKE THIS PART BETTER! + + ### DEALING WITH GRID vs POINT + if 'release_style' in settings['release'].keys(): + release_style = settings['release']['release_style'] + else: + release_style = 'point' + + ### - can we use the fieldset.U.grid? And do the rotations ourself? we have the xi and yi for the grid release_quantity_names = { 'coastal': 'MPW_Cell', diff --git a/plasticparcels/scripts/create_release_maps.py b/plasticparcels/scripts/create_release_maps.py index 621daf8..62d106e 100644 --- a/plasticparcels/scripts/create_release_maps.py +++ b/plasticparcels/scripts/create_release_maps.py @@ -17,6 +17,7 @@ from utils import distance, get_coords_from_polygon +# Function definitions # Function definitions def create_coastal_mpw_jambeck_release_map(mask_coast_filepath, coords_filepath, gpw_filepath, distance_threshhold=50., grid_range=0.083, @@ -83,12 +84,16 @@ def create_coastal_mpw_jambeck_release_map(mask_coast_filepath, coords_filepath, data_mask_coast = xr.open_dataset(mask_coast_filepath) coords = xr.open_dataset(coords_filepath, decode_cf=False) - lats_coast = data_mask_coast['lat'].data[np.where(data_mask_coast['mask_coast'])] - lons_coast = data_mask_coast['lon'].data[np.where(data_mask_coast['mask_coast'])] + # Indices of all coastal grid cells + coast_grid_id = np.asarray(np.where(data_mask_coast['mask_coast'])) + + # Longitudes and latitudes of coastal grid cells + lats_coast = data_mask_coast['lat'].data[coast_grid_id[0],coast_grid_id[1]] + lons_coast = data_mask_coast['lon'].data[coast_grid_id[0],coast_grid_id[1]] # Compute the area of each grid cell cell_areas = coords['e1t'][0] * coords['e2t'][0]/10e6 # in km**2 - coastal_cell_areas = cell_areas.data[np.where(data_mask_coast['mask_coast'])] + coastal_cell_areas = cell_areas.data[coast_grid_id[0],coast_grid_id[1]] # Loop through all countries from Natural Earth dataset coastal_density_list = [] @@ -119,6 +124,7 @@ def create_coastal_mpw_jambeck_release_map(mask_coast_filepath, coords_filepath, coastal_id = all_coastal_indices[i] coastal_point = [lons_coast[coastal_id], lats_coast[coastal_id]] coastal_cell_area = coastal_cell_areas[coastal_id] + coastal_grid_cell_id = coast_grid_id[:, coastal_id] # Compute the population density as the maximum population density around the coastal cell # Because the grid is ordered longitude ascending, latitude descending, the slice ordering is swapped for lat @@ -134,6 +140,7 @@ def create_coastal_mpw_jambeck_release_map(mask_coast_filepath, coords_filepath, 'Country': country_name, 'Longitude': coastal_point[0], 'Latitude': coastal_point[1], + 'GridCellID': coastal_grid_cell_id, 'Area[km2]': coastal_cell_area, 'PopulationDensity': population_density}) @@ -236,8 +243,13 @@ def create_rivers_meijer_release_map(mask_coast_filepath, river_filepath): # Load in coast mask data_mask_coast = xr.open_dataset(mask_coast_filepath) - lats_coast = data_mask_coast['lat'].data[np.where(data_mask_coast['mask_coast'])] - lons_coast = data_mask_coast['lon'].data[np.where(data_mask_coast['mask_coast'])] + + # Indices of all coastal grid cells + coast_grid_id = np.asarray(np.where(data_mask_coast['mask_coast'])) + + # Longitudes and latitudes of coastal grid cells + lats_coast = data_mask_coast['lat'].data[coast_grid_id[0],coast_grid_id[1]] + lons_coast = data_mask_coast['lon'].data[coast_grid_id[0],coast_grid_id[1]] # Load Natural Earth dataset for attaching country information to river source shpfilename = shpreader.natural_earth(resolution='50m', @@ -286,6 +298,7 @@ def create_rivers_meijer_release_map(mask_coast_filepath, river_filepath): 'Country': coastal_df['Country'].iloc[closest_country_id], 'Longitude': lons_coast[closest_coast_id], 'Latitude': lats_coast[closest_coast_id], + 'GridCellID': coast_grid_id[:, closest_coast_id], 'Emissions': output}) river_emissions_df = pd.DataFrame.from_records(river_emissions_list) @@ -372,8 +385,12 @@ def create_fisheries_gfwv2_release_map(fisheries_filepath, mask_land_filepath): model_agg_data_fisheries_info = agg_data_fisheries_info.copy(deep=True) data_mask_land = xr.open_dataset(mask_land_filepath) - lats_ocean = data_mask_land['lat'].data[np.where(~data_mask_land['mask_land'])] - lons_ocean = data_mask_land['lon'].data[np.where(~data_mask_land['mask_land'])] + # Indices of all coastal grid cells + ocean_grid_id = np.asarray(np.where(~data_mask_land['mask_land'])) + + # Longitudes and latitudes of ocean grid cells + lats_ocean = data_mask_land['lat'].data[ocean_grid_id[0], ocean_grid_id[1]] + lons_ocean = data_mask_land['lon'].data[ocean_grid_id[0], ocean_grid_id[1]] # A list of ocean points ocean_points = np.array([lons_ocean, lats_ocean]).T @@ -383,13 +400,15 @@ def create_fisheries_gfwv2_release_map(fisheries_filepath, mask_land_filepath): distances_deg, indices = spatial.KDTree(ocean_points).query(fishing_points) mapped_ocean_points = ocean_points[indices] + mapped_ocean_grid_ids = ocean_grid_id[:, indices] # Add these mapped points to the fisheries data as extra columns model_agg_data_fisheries_info.insert(0, "ModelLongitude", mapped_ocean_points[:, 0]) model_agg_data_fisheries_info.insert(1, "ModelLatitude", mapped_ocean_points[:, 1]) + model_agg_data_fisheries_info.insert(2, "GridCellID", mapped_ocean_grid_ids) # Create a data set where longitude and latitude are from the model - model_agg_data_fisheries_info = model_agg_data_fisheries_info.groupby(['ModelLongitude', 'ModelLatitude', 'Flag', 'Geartype', 'Month', 'Continent', 'Region', 'Subregion', 'Country'])['fishing_hours'].agg('sum').reset_index() + model_agg_data_fisheries_info = model_agg_data_fisheries_info.groupby(['ModelLongitude', 'ModelLatitude', 'GridCellID', 'Flag', 'Geartype', 'Month', 'Continent', 'Region', 'Subregion', 'Country'])['fishing_hours'].agg('sum').reset_index() model_agg_data_fisheries_info = model_agg_data_fisheries_info.rename(columns={'ModelLatitude': 'Latitude', 'ModelLongitude': 'Longitude'}) # Return both raw and model datasets @@ -415,8 +434,12 @@ def create_global_concentrations_kaandorp_release_map(mask_land_filepath, mask_c # Load in coast mask and model coordinates data_mask_coast = xr.open_dataset(mask_coast_filepath) - lats_coast = data_mask_coast['lat'].data[np.where(data_mask_coast['mask_coast'])] - lons_coast = data_mask_coast['lon'].data[np.where(data_mask_coast['mask_coast'])] + # Indices of all coastal grid cells + coast_grid_id = np.asarray(np.where(data_mask_coast['mask_coast'])) + + # Longitudes and latitudes of coastal grid cells + lats_coast = data_mask_coast['lat'].data[coast_grid_id[0],coast_grid_id[1]] + lons_coast = data_mask_coast['lon'].data[coast_grid_id[0],coast_grid_id[1]] # Tackle the beach concentrations first: # Create a list of lons, lats, concentration values @@ -474,6 +497,7 @@ def create_global_concentrations_kaandorp_release_map(mask_land_filepath, mask_c 'Country': coastal_df['Country'].iloc[closest_country_id], 'Longitude': lon, 'Latitude': lat, + 'GridCellID': coast_grid_id[:,i], 'Concentration': conc_beach[closest_beach_id], 'ConcentrationType': 'Beach'}) @@ -482,9 +506,14 @@ def create_global_concentrations_kaandorp_release_map(mask_land_filepath, mask_c # Now tackle the surface ocean concentrations: conc_ocean = np.power(10, ocean.values) # Values are in log10 space + # Indices of all coastal grid cells data_mask_land = xr.open_dataset(mask_land_filepath) - lats_ocean = data_mask_land['lat'].data[np.where(~data_mask_land['mask_land'])] - lons_ocean = data_mask_land['lon'].data[np.where(~data_mask_land['mask_land'])] + + ocean_grid_id = np.asarray(np.where(~data_mask_land['mask_land'])) + + # Longitudes and latitudes of ocean grid cells + lats_ocean = data_mask_land['lat'].data[ocean_grid_id[0], ocean_grid_id[1]] + lons_ocean = data_mask_land['lon'].data[ocean_grid_id[0], ocean_grid_id[1]] # Function to interpolate the ocean concentrations f_interp_conc_ocean = RegularGridInterpolator((ocean.lon, ocean.lat), conc_ocean.T, method='nearest', bounds_error=False, fill_value=None) @@ -502,6 +531,7 @@ def create_global_concentrations_kaandorp_release_map(mask_land_filepath, mask_c 'Country': 'N/A', 'Longitude': lons_ocean[i], 'Latitude': lats_ocean[i], + 'GridCellID': ocean_grid_id[:,i], 'Concentration': interp_conc_ocean[i], 'ConcentrationType': 'Ocean'}) ocean_concentration_df = pd.DataFrame.from_records(ocean_concentration_list) diff --git a/testing.ipynb b/testing.ipynb new file mode 100644 index 0000000..ce286f9 --- /dev/null +++ b/testing.ipynb @@ -0,0 +1,254 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import xarray as xr\n", + "import numpy as np\n", + "import pandas as pd\n", + "import urllib.request as urlr\n", + "import cartopy.io.shapereader as shpreader\n", + "import geopandas as gpd\n", + "import glob\n", + "from scipy import spatial\n", + "from scipy.interpolate import RegularGridInterpolator\n", + "\n", + "from plasticparcels.utils import distance, get_coords_from_polygon" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Function definitions\n", + "def create_coastal_mpw_jambeck_release_map(mask_coast_filepath, coords_filepath, gpw_filepath,\n", + " distance_threshhold=50., grid_range=0.083,\n", + " gpw_column_name='Population Density, v4.11 (2000, 2005, 2010, 2015, 2020): 2.5 arc-minutes',\n", + " gpw_raster_number=4,\n", + " jambeck_filepath=None,\n", + " jambeck_url='https://www.science.org/doi/suppl/10.1126/science.1260352/suppl_file/1260352_supportingfile_suppl._excel_seq1_v1.xlsx',\n", + " jambeck_url_backup='http://jambeck.engr.uga.edu/wp-content/uploads/2015/01/JambeckSData.xlsx'\n", + " ):\n", + " \"\"\"\n", + " Description\n", + " ----------\n", + "\n", + " A function to create a particle release map based on the Mismanaged Plastice Waste data from Jambeck et al. (2015).\n", + " We first load the Natural Earth coastline vector dataset. For each vertex in the vector dataset, we identify any\n", + " associated coastal cell within distance_threshhold km. We then assign a population density from the GPW dataset,\n", + " computed as the maximum population density within grid_range degrees. We then left-join the Jambeck mismanaged\n", + " plastic waste data to compute the total mismanaged plastic waste in kg/day per coastal grid cell.\n", + "\n", + "\n", + " Parameters\n", + " ----------\n", + "\n", + " mask_coast_filepath : str\n", + " File path to the coastal mask generated in ...py TODO\n", + " coords_filepath : str\n", + " File path to the model grid coordinates.\n", + " gpw_filepath : str\n", + " File path to the 'Gridded Population of the World (GPW)' dataset.\n", + " distance_threshhold : float, optional\n", + " The threshhold distance used to find coastal grid cells associated to Natural Earth vector vertices, by default 50\n", + " grid_range : float, optional\n", + " The approximate grid width to search for the maximum population density surrounding a coastal grid cell, by default 0.083\n", + " gpw_column_name : str, optional\n", + " The column name of the GPW dataset that corresponds to the population density, by default 'Population Density, v4.11 (2000, 2005, 2010, 2015, 2020): 2.5 arc-minutes'\n", + " gpw_raster_number : int, optional\n", + " The raster number of the dataset to use. When using the GPW v4.11 data, raster = 4 provides the 2020 population density.\n", + " jambeck_filepath : str, optional\n", + " The filepath to the Jambeck dataset if downloaded manually.\n", + " jambeck_url : str, optional\n", + " The URL of the Jambeck dataset from the Science article.\n", + " jambeck_url_backup : str, optional\n", + " The URL of the Jambeck dataset from Jenna Jambeck's website.\n", + "\n", + " Returns\n", + " -------\n", + " coastal_density_mpw_df\n", + " A pandas dataframe containing the coastal grid cells, associated countries, and associated mismanaged plastic waste in kg/day.\n", + " \"\"\"\n", + "\n", + " # Open the Natural Earth coastline vector dataset\n", + " shpfilename = shpreader.natural_earth(resolution='50m',\n", + " category='cultural',\n", + " name='admin_0_countries')\n", + "\n", + " reader = shpreader.Reader(shpfilename)\n", + " countries = reader.records()\n", + "\n", + " # Open the GPW dataset, and set NaNs to zeros (i.e. zero population density in the ocean)\n", + " gpw = xr.open_dataset(gpw_filepath)\n", + " gpw = gpw.fillna(0.)\n", + "\n", + " # Load in coast mask and model coordinates\n", + " data_mask_coast = xr.open_dataset(mask_coast_filepath)\n", + " coords = xr.open_dataset(coords_filepath, decode_cf=False)\n", + "\n", + " lats_coast = data_mask_coast['lat'].data[np.where(data_mask_coast['mask_coast'])]\n", + " lons_coast = data_mask_coast['lon'].data[np.where(data_mask_coast['mask_coast'])]\n", + "\n", + " # Compute the area of each grid cell\n", + " cell_areas = coords['e1t'][0] * coords['e2t'][0]/10e6 # in km**2\n", + " coastal_cell_areas = cell_areas.data[np.where(data_mask_coast['mask_coast'])]\n", + "\n", + " # Loop through all countries from Natural Earth dataset\n", + " coastal_density_list = []\n", + " for country in countries:\n", + " # Country information\n", + " continent = country.attributes['CONTINENT']\n", + " region_un = country.attributes['REGION_UN']\n", + " subregion = country.attributes['SUBREGION']\n", + " country_name = country.attributes['NAME_LONG']\n", + "\n", + " # Extract longitude and latitude of the coastal point\n", + " country_coords = get_coords_from_polygon(country.geometry)\n", + " country_lons, country_lats = country_coords[:, 0], country_coords[:, 1]\n", + "\n", + " # Loop through country points to find coastal points within distance_threshhold km\n", + " all_coastal_indices = []\n", + " for i in range(len(country_lons)):\n", + " distances = distance(np.repeat(country_lons[i], len(lons_coast)), np.repeat(country_lats[i], len(lats_coast)), lons_coast, lats_coast)\n", + " coastal_indices = np.where(distances <= distance_threshhold)[0] # Coastal indices are those that are within the thresshold distance\n", + " all_coastal_indices.append(coastal_indices)\n", + "\n", + " # Concatenate into one list and identify the unique coastal cells\n", + " all_coastal_indices = np.unique(np.hstack(all_coastal_indices))\n", + "\n", + " # For all coastal points assigned to the country, find the maximum population density around that point\n", + " country_coastal_density_list = []\n", + " for i in range(len(all_coastal_indices)):\n", + " coastal_id = all_coastal_indices[i]\n", + " coastal_point = [lons_coast[coastal_id], lats_coast[coastal_id]]\n", + " coastal_cell_area = coastal_cell_areas[coastal_id]\n", + "\n", + " # Compute the population density as the maximum population density around the coastal cell\n", + " # Because the grid is ordered longitude ascending, latitude descending, the slice ordering is swapped for lat\n", + " population_density = gpw[gpw_column_name].sel(longitude=slice(coastal_point[0] - grid_range, coastal_point[0] + grid_range),\n", + " latitude=slice(coastal_point[1] + grid_range, coastal_point[1] - grid_range),\n", + " raster=gpw_raster_number).max()\n", + "\n", + " population_density = np.float32(population_density)\n", + "\n", + " country_coastal_density_list.append({'Continent': continent,\n", + " 'Region': region_un,\n", + " 'Subregion': subregion,\n", + " 'Country': country_name,\n", + " 'Longitude': coastal_point[0],\n", + " 'Latitude': coastal_point[1],\n", + " 'Area[km2]': coastal_cell_area,\n", + " 'PopulationDensity': population_density})\n", + "\n", + " country_coastal_density_df = pd.DataFrame.from_records(country_coastal_density_list)\n", + " coastal_density_list.append(country_coastal_density_df)\n", + "\n", + " # Concatenate all the information into one dataframe\n", + " coastal_density_df = pd.concat(coastal_density_list)\n", + "\n", + " # Open the Jambeck dataset\n", + " if jambeck_filepath is not None:\n", + " MPW = pd.read_excel(jambeck_filepath, 'Jambeck et al. (2014)')\n", + " else:\n", + " try:\n", + " socket = urlr.urlopen(jambeck_url)\n", + " except:\n", + " socket = urlr.urlopen(jambeck_url_backup)\n", + " spreadsheet_from_url = pd.ExcelFile(socket.read())\n", + " MPW = pd.read_excel(spreadsheet_from_url, 'Jambeck et al. (2014)')\n", + "\n", + " # Perform some data cleaning steps\n", + " # 1. Rename the columns to make it easier to work with\n", + " MPW = MPW.rename(columns={'Economic status1': 'Economic status',\n", + " 'Mismanaged plastic waste [kg/person/day]7': 'Mismanaged plastic waste [kg/person/day]'})\n", + "\n", + " # 2. Set the bottom rows with black info to blank strings and only keep data with valid country names\n", + " MPW = MPW.fillna('')\n", + " MPW = MPW[MPW['Country'] != '']\n", + "\n", + " # 3. Remove all sub/superscripts and standardise the 'and' and ampersands.\n", + " MPW['Country'] = MPW['Country'].replace(regex='[0-9]', value='').str.replace('&', 'and')\n", + "\n", + " # 4. Manually rename some countries to match the Natural Earth Dataset\n", + " MPW['Country'] = MPW['Country'].replace('Brunei', 'Brunei Darussalam')\n", + " MPW['Country'] = MPW['Country'].replace('Curacao', 'Curaçao')\n", + " MPW['Country'] = MPW['Country'].replace('Congo, Dem rep. of', 'Democratic Republic of the Congo')\n", + " MPW['Country'] = MPW['Country'].replace('Korea, North', 'Dem. Rep. Korea')\n", + " MPW['Country'] = MPW['Country'].replace('Korea, South (Republic of Korea)', 'Republic of Korea')\n", + " MPW['Country'] = MPW['Country'].replace('Burma/Myanmar', 'Myanmar')\n", + " MPW['Country'] = MPW['Country'].replace('Micronesia', 'Federated States of Micronesia')\n", + " MPW['Country'] = MPW['Country'].replace('Faroe Islands', 'Faeroe Islands')\n", + " MPW['Country'] = MPW['Country'].replace('Falkland Islands', 'Falkland Islands / Malvinas')\n", + " MPW['Country'] = MPW['Country'].replace('Cote d\\'Ivoire', 'Côte d\\'Ivoire')\n", + " MPW['Country'] = MPW['Country'].replace('East Timor', 'Timor-Leste')\n", + " MPW['Country'] = MPW['Country'].replace('Russia', 'Russian Federation')\n", + " MPW['Country'] = MPW['Country'].replace('Saint Pierre', 'Saint Pierre and Miquelon')\n", + " MPW['Country'] = MPW['Country'].replace('Congo Rep of', 'Republic of the Congo')\n", + " MPW['Country'] = MPW['Country'].replace('Palestine (Gaza Strip is only part on the coast)', 'Palestine')\n", + " MPW['Country'] = MPW['Country'].replace('Saint Maarten, DWI', 'Sint Maarten')\n", + " MPW['Country'] = MPW['Country'].replace('USVI', 'United States Virgin Islands')\n", + " MPW['Country'] = MPW['Country'].replace('Sao Tome and Principe', 'São Tomé and Principe')\n", + "\n", + " # TODO: Sort out these countries...\n", + " # Below commented out because it obviously doubles up rows when left-joining.\n", + " # MPW['Country'] = MPW['Country'].replace('Northern Cyprus', 'Cyprus') # Combines the two sets\n", + " # MPW['Country'] = MPW['Country'].replace('Gibraltar', 'United Kingdom') # Add to UK\n", + " # MPW['Country'] = MPW['Country'].replace('French Guiana', 'France')\n", + " # MPW['Country'] = MPW['Country'].replace('Guadeloupe', 'France')\n", + " # MPW['Country'] = MPW['Country'].replace('Martinique', 'France')\n", + " # MPW['Country'] = MPW['Country'].replace('Christmas Island', 'Australia')\n", + " # MPW['Country'] = MPW['Country'].replace('Reunion', 'France')\n", + " # MPW['Country'] = MPW['Country'].replace('Netherlands Antilles', 'Netherlands')\n", + "\n", + " # Combine the coastal cell density data with the mismanaged plastic waste data, performing a left-join on the country column\n", + " coastal_density_mpw_df = pd.merge(coastal_density_df, MPW[['Country', 'Economic status', 'Mismanaged plastic waste [kg/person/day]']], on=\"Country\", how=\"left\")\n", + "\n", + " # Compute the mismanaged plastic waste in units of kg/day\n", + " coastal_density_mpw_df['MPW_Cell'] = coastal_density_mpw_df['Area[km2]']*coastal_density_mpw_df['PopulationDensity']*coastal_density_mpw_df['Mismanaged plastic waste [kg/person/day]']\n", + "\n", + " return coastal_density_mpw_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mask_land_filepath = '/Users/denes001/Research/Projects/PlasticParcels/PlasticParcels/data/output_data/masks/mask_land_NEMO0083.nc'\n", + "mask_coast_filepath = '/Users/denes001/Research/Projects/PlasticParcels/PlasticParcels/data/output_data/masks/mask_coast_NEMO0083.nc'\n", + "coords_filepath = '/Users/denes001/Research/Projects/PlasticParcels/PlasticParcels/data/input_data/MOi/domain_ORCA0083-N006/coordinates.nc'\n", + "\n", + "# Create coastal release data\n", + "gpw_filepath = '/Users/denes001/Research/Projects/PlasticParcels/PlasticParcels/data/release/GPWv4/gpw-v4-population-density-rev11_totpop_2pt5_min_nc/gpw_v4_population_density_rev11_2pt5_min.nc'" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "parcels-v3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}