Saving MHW Results to NetCDF
Robert W Schlegel
2024-08-30
Source:vignettes/MHW_to_nc.Rmd
MHW_to_nc.Rmd
Overview
In the previous vignette we saw how to detect marine heatwaves (MHWs) in gridded data. In this vignette we will use those gridded MHW results to see how to save them as a NetCDF file.
Loading data
Please see the downloading and preparing OISST data and detecting marine heatwaves (MHWs) in gridded data vignettes first if you have not yet worked through them. We will be using the MHW results created from those two vignettes below.
MHW_res_grid <- readRDS("~/Desktop/MHW_result.Rds")
Prepare the data
The main sticking point between MHW results and the NetCDF file
format in R is that NetCDF files will only accept the file type in R
known as “arrays”, and most R outputs are data.frames. So the majority
of what we need to do is to convert our data from data.frames to arrays.
There are many ways to do this and a search of the interwebs will
produce a plethora of results. Over the years I have settled into an
approach that I use operationally for the MHW Tracker that I will also
use here. It is not necessarily the fastest method, nor does it use the
fewest lines of code possible, but it does follow the
tidyverse
approach to programming, and I
think it is about as transparent as this process can be. We will create
two functions below that will help us along in this process.
# Function for creating arrays from data.frames
df_acast <- function(df, lon_lat){
# Force grid
res <- df %>%
right_join(lon_lat, by = c("lon", "lat")) %>%
arrange(lon, lat)
# Convert date values to integers if they are present
if(lubridate::is.Date(res[1,4])) res[,4] <- as.integer(res[,4])
# Create array
res_array <- base::array(res[,4], dim = c(length(unique(lon_lat$lon)), length(unique(lon_lat$lat))))
dimnames(res_array) <- list(lon = unique(lon_lat$lon),
lat = unique(lon_lat$lat))
return(res_array)
}
# Wrapper function for last step before data are entered into NetCDF files
df_proc <- function(df, col_choice){
# Determine the correct array dimensions
lon_step <- mean(diff(sort(unique(df$lon))))
lat_step <- mean(diff(sort(unique(df$lat))))
lon <- seq(min(df$lon), max(df$lon), by = lon_step)
lat <- seq(min(df$lat), max(df$lat), by = lat_step)
# Create full lon/lat grid
lon_lat <- expand.grid(lon = lon, lat = lat) %>%
data.frame()
# Acast only the desired column
dfa <- plyr::daply(df[c("lon", "lat", "event_no", col_choice)],
c("event_no"), df_acast, .parallel = T, lon_lat = lon_lat)
return(dfa)
}
# We must now run this function on each column of data we want to add to the NetCDF file
doParallel::registerDoParallel(cores = 7)
prep_dur <- df_proc(MHW_res_grid, "duration")
prep_max_int <- df_proc(MHW_res_grid, "intensity_max")
prep_cum_int <- df_proc(MHW_res_grid, "intensity_cumulative")
prep_peak <- df_proc(MHW_res_grid, "date_peak")
Create NetCDF shell
With our data prepared into a series of arrays, we now need to set the stage for our NetCDF file. The important thing here is that the dimensions of the arrays we created match up to the dimensions of the NetCDF file.
# Get file attributes
lon_step <- mean(diff(sort(unique(MHW_res_grid$lon))))
lat_step <- mean(diff(sort(unique(MHW_res_grid$lat))))
lon <- seq(min(MHW_res_grid$lon), max(MHW_res_grid$lon), by = lon_step)
lat <- seq(min(MHW_res_grid$lat), max(MHW_res_grid$lat), by = lat_step)
event_no <- seq(min(MHW_res_grid$event_no), max(MHW_res_grid$event_no), by = 1)
tunits <- "days since 1970-01-01"
# Length of each attribute
nlon <- length(lon)
nlat <- length(lat)
nen <- length(event_no)
Create NetCDF file
The last step in this process is to add the prepared data to the
NetCDF shell we have constructed. The
ncdf4
package helps to make this all a
relatively straight forward process.
# Path and file name
# NB: We are net setting time dimensions here because we are using event_no as our "time" dimension
ncpath <- "~/Desktop/"
ncname <- "MHW_results"
ncfname <- paste0(ncpath, ncname, ".nc")
# dname <- "tmp"
# Define dimensions
londim <- ncdim_def("lon", "degrees_east", lon)
latdim <- ncdim_def("lat", "degrees_north", lat)
endim <- ncdim_def("event_no", "event_number", event_no)
# timedim <- ncdim_def("time", tunits, as.double(time))
# Define variables
fillvalue <- 1e32
def_dur <- ncvar_def(name = "duration", units = "days", dim = list(endim, latdim, londim),
missval = fillvalue, longname = "duration of MHW", prec = "double")
def_max_int <- ncvar_def(name = "max_int", units = "deg_c", dim = list(endim, latdim, londim),
missval = fillvalue, longname = "maximum intensity during MHW", prec = "double")
def_cum_int <- ncvar_def(name = "cum_int", units = "deg_c days", dim = list(endim, latdim, londim),
missval = fillvalue, longname = "cumulative intensity during MHW", prec = "double")
def_peak <- ncvar_def(name = "date_peak", units = tunits, dim = list(endim, latdim, londim),
missval = 0, longname = "date of peak temperature anomaly during MHW", prec = "integer")
# Create netCDF file and prepare space for our arrays
ncout <- nc_create(ncfname, list(def_peak, def_dur, def_max_int, def_cum_int), force_v4 = TRUE)
# Add the actual data
ncvar_put(ncout, def_dur, prep_dur)
ncvar_put(ncout, def_max_int, prep_max_int)
ncvar_put(ncout, def_cum_int, prep_cum_int)
ncvar_put(ncout, def_peak, prep_peak)
# Put additional attributes into dimension and data variables
ncatt_put(ncout, "lon", "axis", "X") #,verbose=FALSE) #,definemode=FALSE)
ncatt_put(ncout, "lat", "axis", "Y")
ncatt_put(ncout, "event_no", "axis", "event_no")
# Add global attributes
# These are useful for whomever else may want to use your NetCDF file
ncatt_put(ncout, 0, "title", paste0("MHW results from lon: ",
min(lon)," to ",max(lon),
" and lat: ",min(lat)," to ",max(lat)))
ncatt_put(ncout, 0, "institution", "Your institution here!")
ncatt_put(ncout, 0, "source", "NOAA OISST v2.1")
ncatt_put(ncout,0, "references", "Banzon et al. (2020) J. Atmos. Oce. Tech. 37:341-349")
history <- paste0("Your name here!, ", date())
ncatt_put(ncout, 0, "history", history)
ncatt_put(ncout, 0, "Conventions", "Hobday et al. (2016)") # Assuming one has used the Hobday definition
# Get a summary of the created file:
ncout
Visualising the results
Let’s not stop there. To ensure that the NetCDF file was created correctly we want to load it back into our workspace and plot the results.
# Convenience function for comparing files
quick_grid <- function(df, var_choice){
df %>%
filter(event_no == 13) %>%
ggplot(aes(x = lon, y = lat)) +
geom_raster(aes_string(fill = var_choice)) +
coord_cartesian(expand = F) +
scale_fill_viridis_c() +
theme(legend.position = "bottom")
}
# Thanks to the tidync package, loading the gridded data is very simple
MHW_res_nc <- tidync("~/Desktop/MHW_results.nc") %>%
hyper_tibble() %>%
mutate(date_peak = as.Date(date_peak, origin = "1970-01-01"))
# Plot the duration results
quick_grid(MHW_res_grid, "duration")
quick_grid(MHW_res_nc, "duration")
# Cumulative intensity
quick_grid(MHW_res_grid, "intensity_cumulative")
quick_grid(MHW_res_nc, "cum_int")