#IORG is a python module for calculating the Organization Index (Tompkins and Semie, 2017) as used in RCEMIP (Wing et al., 2020). #This code, updated February 1, 2025, fixes an error in iorg where theoretical CDFs do not extend to x=1 if the largest NND is too small. #This caused underestimation of Iorg in some of the most organized scenes. #This code also better handles cases with only a single large organized cluster, # returning Iorg=nan in such cases rather than Iorg=0. #The ability to output the theoretical and observed nearest-neighbor distributions has also been added. #Code Authors: Catherine Stauffer, Graham O'Donnell #Contact: Allison Wing (awing@fsu.edu) #GLO def ecdf(x_inputs): """ Calculates the empirical cumulative distribution function of x_inputs Arguments x_inputs: 1D array, that the ECDF is calculated from Returns y_values: 1D array, the ECDF x_values: 1D array, the values ECDF is evaluated at using x_inputs """ import numpy as np x_sorted = np.sort(x_inputs) x_values = np.linspace(start=min(x_sorted), stop=max(x_sorted), num=len(x_sorted)) y_values = np.zeros(np.shape(x_values)) for i in range(len(x_values)): temp = x_inputs[x_inputs <= x_values[i]] value = np.float64(len(temp))/np.float64(len(x_inputs)) y_values[i] = value return(x_values,y_values) def iorg_crm(w,wthreshold=0, includeCDF=True): """ Calculates the Organization Index for a cartesian domain Arguments w: 2-D array of demensions (y,x) of OLR or 500hPa vertical acceleration RCEMIP (Wing et al., 2018) uses OLR wthreshold: integer, 0 for w being an array of OLR, 1 for vertical acceleration includeCDF: if set to True, also returns several other variables Returns iorg: IOrg (Optional, if includeCDF==True): y_values_ecdf: NNCDF values x_values_ecdf: NNCDF distances poisson: theoretical NN distances corresponding to x_values_ecdf """ from sklearn.neighbors import NearestNeighbors from scipy.ndimage import measurements from scipy.ndimage import label import numpy as np # create a masked array of convective entities if wthreshold != 1: if wthreshold != 0: print('incorrect threshold flag, using default (OLR<173)') wmask = (w<173)*1 if wthreshold == 1: wmask = (w>0.5)*1 # duplicates domain in all directions to account for convection on borders wmaskbig = np.concatenate([wmask,wmask,wmask],axis=0) wmaskbig = np.concatenate([wmaskbig,wmaskbig,wmaskbig],axis=1) # four point convective connection sss = [[0,1,0], [1,1,1], [0,1,0]] # finds connected convective entities, the number of clusters, # and the centroid of each cluster Conn = label(wmaskbig,structure=sss)[0] nentities = label(wmaskbig)[1] centroids = measurements.center_of_mass(wmaskbig,Conn,range(1,nentities+1)) nnd,IDX,num = [],[],0 #This line said nentities > 1 before edits. Some cases where a single cluster #spans the entire domain in a dimension exist in RCEMIP models (WRF-GCM only) #and result in nentities=3 (three copies of the same cluster after tiling) #with extremely large NND, causing iorg~0. if nentities > 4: # finds the nearest neighbor of each convective cluster classifier = NearestNeighbors(n_neighbors=1) for i in range(0,nentities): if centroids[i][0] >= np.shape(w)[0] \ and centroids[i][0] <= (np.shape(w)[0]*2)-1 \ and centroids[i][1] >= np.shape(w)[1] \ and centroids[i][1] <= (np.shape(w)[1]*2)-1: num += 1 classifier.fit(np.array(centroids[0:i]+centroids[i+1:])) m,n = classifier.kneighbors(np.reshape(centroids[i],(1,2))) IDX.append(n) nnd.append(m) if len(nnd) > 1: IDX = np.squeeze(np.array(IDX)) nnd = np.squeeze(np.array(nnd)) if len(nnd) <= 1: IDX = np.array(IDX) nnd = np.array(nnd) if len(nnd) > 0: # calculates ECDF of the nearest neighbors idstances x_values_ecdf,y_values_ecdf = ecdf(nnd) # calculates the poisson distribution of w lam = num/(np.shape(w)[0]*np.shape(w)[1]) dd = np.array(x_values_ecdf) poisson = 1-np.exp(-1*lam*np.pi*dd**2) cdf_theory = poisson #Edited from earlier versions: appending (1,1) ensures the integral is complete. #Earlier versions of this code did not account for cases where the observed CDF # reached 1 before the theoretical CDF, allowing the integral to be over the # interval (0, max_theoretical_cdf) rather than (0, 1) as it should poisson = np.append(poisson, [1]) y_values_ecdf = np.append(y_values_ecdf, [1]) # calculates the area under the plot of the poisson vs ECDF iorg = np.trapz(y_values_ecdf,poisson) #Edited from earlier versions: use np.nan rather than 0 in the event # of failures in the code, which are either caused by existence of a single # cluster (unlikely, but would cause Iorg to be poorly defined) or # more likely data errors (in which case a 0 should be excluded from # the averaging step) else: iorg = 0 y_values_ecdf,x_values_ecdf,poisson,iorg = [],[],[],np.nan else: iorg = 0 y_values_ecdf,x_values_ecdf,poisson,iorg = [],[],[],np.nan if includeCDF: return (y_values_ecdf,x_values_ecdf,poisson,iorg) else: return(iorg) def iorg_gcm(w,wthreshold=0,includeCDF=True): """ Calculates the Organization Index for a speherical domain Arguments w: 2-D array of demensions (y,x) of OLR or 500hPa vertical acceleration RCEMIP (Wing et al., 2018) uses OLR -Best used with data along a tropical band to ~30N/S, as it does not correct for grid cell size wthreshold: integer, 0 for w being an array of OLR, 1 for vertical acceleration includeCDF: if set to True, also returns the theoretical and observed nearest-neighbor CDFs Returns iorg: IOrg (Optional, if includeCDF==True): y_values_ecdf: NNCDF values x_values_ecdf: NNCDF distances poisson: theoretical NN distances corresponding to x_values_ecdf """ from sklearn.neighbors import NearestNeighbors from scipy.ndimage import measurements from scipy.ndimage import label import numpy as np # create a masked array of convective entities if wthreshold != 1: if wthreshold != 0: print('incorrect threshold flag, using default (OLR<173)') wmask = (w<173)*1 if wthreshold == 1: wmask = (w>0.5)*1 # duplicates domain around world # different from iorg_crm, as the GCM spherical geometry should not connect N and S poles wmaskbig = np.concatenate([wmask,wmask,wmask],axis=1) # four point convective connection sss = [[0,1,0], [1,1,1], [0,1,0]] # finds connected convective entities, the number of clusters, # and the centroid of each cluster Conn = label(wmaskbig,structure=sss)[0] nentities = label(wmaskbig)[1] centroids = measurements.center_of_mass(wmaskbig,Conn,range(1,nentities+1)) nnd,IDX,num = [],[],0 #This line said nentities > 1 before edits. Some cases where a single cluster #spans the entire domain in a dimension exist in RCEMIP models (WRF-GCM only) #and result in nentities=3 (three copies of the same cluster after tiling) #with extremely large NND, causing iorg~0. if nentities > 4: # finds the nearest neighbor of each convective cluster classifier = NearestNeighbors(n_neighbors=1) for i in range(0,nentities): if centroids[i][1] >= np.shape(w)[1] \ and centroids[i][1] <= (np.shape(w)[1]*2)-1: num += 1 classifier.fit(np.array(centroids[0:i]+centroids[i+1:])) m,n = classifier.kneighbors(np.reshape(centroids[i],(1,2))) IDX.append(n) nnd.append(m) if len(nnd) > 1: IDX = np.squeeze(np.array(IDX)) nnd = np.squeeze(np.array(nnd)) if len(nnd) <= 1: IDX = np.array(IDX) nnd = np.array(nnd) if len(nnd) > 0: # calculates ECDF of the nearest neighbors idstances x_values_ecdf,y_values_ecdf = ecdf(nnd) # calculates the poisson distribution of w lam = num/(np.shape(w)[0]*np.shape(w)[1]) dd = np.array(x_values_ecdf) poisson = 1-np.exp(-1*lam*np.pi*dd**2) cdf_theory = poisson # calculates the area under the plot of the poisson vs ECDF poisson = np.append(poisson, [1]) y_values_ecdf = np.append(y_values_ecdf, [1]) iorg = np.trapz(y_values_ecdf,poisson) #Edited from earlier versions: use np.nan rather than 0 in the event # of failures in the code, which are either caused by existence of a single # cluster (unlikely, but would cause Iorg to be poorly defined) or # more likely data errors (in which case a 0 should be excluded from # the averaging step). Both are set else: y_values_ecdf,x_values_ecdf,poisson,iorg = [],[],[],np.nan else: y_values_ecdf,x_values_ecdf,poisson,iorg = [],[],[],np.nan if includeCDF: return (y_values_ecdf,x_values_ecdf,poisson,iorg) else: return(iorg) def calc_iorg(w_f,geometry,threshold=0,tbeg=-999,tend=-999,includeCDF=False): """ Loops through a time range to calculate Iorg Arguments w_f: 3-D array (time, y, x) of the convection data geometry: string to determine which IORG calculation to use, acceptable input is 'cartesian' or 'spherical' threshold: optional integer (default=0) determining which type of convection data is being used, 0 for olr or 1 for vertical velocity tbeg: optional integer (default=-999) determining which time integer to start at, -999 uses 25 days from the end tend: optional integer (default=-999) determining which time integer to end at, -999 uses the end of the data includeCDF: if set to True, also returns the theoretical and observed nearest-neighbor CDFs Returns iorg_f: 1-D array of Iorg data from time index tbeg to tend If includeCDF is set to true, also returns: len(iorg_f): number of timesteps calculated d_f: 1-D array of nearest-neighbor distances cdf_nnd_f: CDF of nearest-neighbor distances less than or equal to d_f cdf_theory_f: theoretical CDF corresponding to the values of d_f indices: for space compression and avoidance of jagged arrays, this is a 1-D array of where each timestep's d_f, cdf_nnd_f, and cdf_theory_f begin e.g. if indices = [0, 8, 15, 27, ...], then: -NNDCDF for the first timestep is cdf_nnd_f[0:8] -NNDCDF for the second timestep is cdf_nnd_f[8:15] -NNDCDF for the third timestep is cdf_nnd_f[15:27], etc. Note len(indices) = len(iorg_f)+1, as it includes the end index of the final timestep """ import numpy as np if geometry != 'cartesian' and geometry != 'spherical': print('inapproriate geometry entry, choices are ``cartesian`` or ``spherical``') return(np.nan) else: if tend == -999: tend = np.shape(w_f)[0] if tbeg == -999: tbeg = np.shape(w_f)[0]-600 # loop through time range cdf_nnd_f,d_f,cdf_theory_f,iorg_f = [],[],[],[] for v in range(tbeg,tend): if v%24 == 0: print(' processing day %d of %i\r'%(v/24,int(tend/24)), end='') # calculate Iorg if geometry == 'cartesian': result = iorg_crm(w_f[v,:,:],threshold,includeCDF) if geometry == 'spherical': result = iorg_gcm(w_f[v,:,:],threshold,includeCDF) if includeCDF: cdf_nnd_f.append(result[0]) d = result[1] if np.array(d).ndim > 1: d = [np.squeeze(d)] d_f.append(d) cdf_theory_f.append(result[2]) iorg_f.append(result[3]) else: iorg_f.append(result) iorg_f = np.array(iorg_f) if includeCDF: indices = [0] maxLen = 0 for a in cdf_nnd_f: l = len(a) indices.append(indices[-1]+l) if l > maxLen: maxLen = l cdf_nnd_f = np.concatenate(cdf_nnd_f) d_f = np.concatenate(d_f) cdf_theory_f = np.concatenate(cdf_theory_f) if includeCDF: return(iorg_f,len(iorg_f),d_f,cdf_nnd_f, cdf_theory_f, indices) else: return(iorg_f) ###