-
Notifications
You must be signed in to change notification settings - Fork 0
/
single_member_statistics_temp.py
151 lines (123 loc) · 4.93 KB
/
single_member_statistics_temp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python
# coding: utf-8
#%%
import numpy as np
import xarray as xr
from tqdm import tqdm
import pickle
import sys
sys.path.append('../functions')
import hexbin_functions as hexfunc
def entropy(Pdf):
# Shannon entropy
# Pdf = Pdf / np.nansum(Pdf) # Normalize Pdf to sum to 1, ignoring NaNs
# Replace zeros with a very small number to avoid log(0)
Pdf_safe = np.where(Pdf > 0, Pdf, np.finfo(float).eps)
return -np.nansum(Pdf_safe * np.log2(Pdf_safe))
def calculate_probability_and_entropy(pset, hexbin_grid, entropy_function):
"""
Calculates probability and entropy for particle sets over a hexagonal grid.
Parameters
----------
pset : xarray.Dataset
Particle dataset containing longitude and latitude variables.
hexbin_grid : hexGrid object
An object representing the hexagonal grid, with a method count_2d to count particles within each hexbin.
subgroups : dict
Dictionary mapping t_gap values to indices of particles released at different times.
entropy_function : function
Function to calculate entropy given a probability distribution.
Returns
-------
probability_sets : dict
Dictionary of probability arrays for each t_gap, with dimensions (n_hex, obs_length).
entropy_sets : dict
Dictionary of entropy values for each t_gap, with dimension (obs_length).
"""
obs_length = len(pset.obs)
n_hex = hexbin_grid.n_hex
probability_set = np.zeros((n_hex, obs_length))
entropy_set = np.zeros(obs_length)
lons, lats = pset['lon'][:, :].values, pset['lat'][:, :].values
for t in range(obs_length):
probability_set[:, t] = hexbin_grid.count_2d(lons[:, t], lats[:, t], normalize=True)
entropy_set[t] = entropy_function(probability_set[:, t])
return probability_set, entropy_set
def create_dataframe(probability_set, entropy_set, hexints, time_range):
"""
Creates xarray Dataframe containing the probability and entropy data.
Parameters
----------
probability_sets : dict
Dictionary containing probability data arrays for each delta_t.
entropy_sets : dict
Dictionary containing entropy data arrays for each delta_t.
hexints : list
List of hexagonal bin indices.
obs_length : int
The length of the observation period.
filename : str
The filename to save the NetCDF file.
Returns
-------
ds : xarray.Dataset
The dataset containing the probability and entropy data.
"""
ds = xr.Dataset(
{
'probability': xr.DataArray(
probability_set,
dims=['hexint', 'time'],
coords={
'hexint': hexints,
'time': time_range
},
attrs={
'description': 'Probability of occurrence for each time step, hexagonal bin, and observation time',
'units': 'probability'
}
),
'entropy': xr.DataArray(
entropy_set,
dims=['time'],
coords={
'time': time_range
},
attrs={
'description': 'Entropy values for each time step and observation time',
'units': 'bits'
}
)
}
)
return ds
#%%
location = 'Cape_Hatteras'
member = 1 # memeber
week = 4 # Number of weeks
traj_considered = 7400
path = f"/storage/shared/oceanparcels/output_data/data_Claudio/NEMO_Ensemble/{location}/temporal_long/W_{week:01d}/{location}_W{week:01d}_m{member:03d}.zarr"
pset = xr.open_zarr(path)
obs_range = pset.obs.values # Number of time steps in the observation period
# Load the hexbin_grid for the domain
with open('../data/hexgrid_no_coast.pkl', 'rb') as f:
hexbin_grid = pickle.load(f)
hexbin_grid = hexfunc.hexGrid(hexbin_grid, h3_res=3)
###### Calculate for all memebers and delta_rs ####
week_ranges = [4, 12, 20] # np.arange(1, 7)
location = 'Cape_Hatteras'
members = np.arange(49, 51)
#%%
for member in tqdm(members):
for week in week_ranges:
print(f"\U0001F914 Member: {member:03d}, Week: {week}")
path = f"/storage/shared/oceanparcels/output_data/data_Claudio/NEMO_Ensemble/{location}/temporal_long/W_{week:01d}/{location}_W{week:01d}_m{member:03d}.zarr"
pset = xr.open_zarr(path)
print("--", len(pset.trajectory))
pset= pset.isel(trajectory=np.random.choice(pset.trajectory, traj_considered, replace=False))
print("++", len(pset.trajectory))
P_m, Ent_m = calculate_probability_and_entropy(pset, hexbin_grid, entropy)
DF_m = create_dataframe(P_m, Ent_m, hexbin_grid.hexint, obs_range)
save_path = f"/storage/shared/oceanparcels/output_data/data_Claudio/NEMO_Ensemble/analysis/prob_distribution/{location}_temporal_long/P_W{week:01d}_m{member:03d}.nc"
DF_m.to_netcdf(save_path)
# %%