Gulf of Maine BT Interpolation Testing

Processing Regional Timeseries of Surface and Bottom Temperatures

Published

August 22, 2024

About: Using the Gulf of Maine for Regional FVCOM Evaluation

For this project we need to use FVCOM for point-value interpolations (what was the temperature at this location, at this time), and for zonal summaries (what was the average temperature over time for this area).

This document steps through the approaach to getting zonal sumaries for our areas of interest.

Code
####. packages. ####
library(gmRi)
library(tidyverse)
library(sf)
library(rnaturalearth)
library(fvcom)
library(ncdf4)

# Set the theme
theme_set(theme_bw() + map_theme())

# Project paths
lob_ecol_path <- cs_path("mills", "Projects/Lobster ECOL")
fvcom_path <- cs_path("res", "FVCOM/Lobster-ECOL")
poly_paths <- cs_path("mills", "Projects/Lobster ECOL/Spatial_Defs")

# Shapefiles
new_england <- ne_states("united states of america", returnclass = "sf") %>% 
  filter(postal %in% c("VT", "ME", "RI", "MA", "CT", "NH", "NY", "MD", "VA", "NJ", "DE", "NC", "PA", "WV"))
canada <- ne_states("canada", returnclass = "sf")
Code
# Use GMRI style
use_gmri_style_rmd()

Load Regional Shapefiles

This work is looking at two scales: a nearshore scale within 12nm of the coast, which is further sub-divided by NMFS statistical areas. The second is an offshore scale extending beyond 12nm to the shelf break. This area is split into Southern New England and a Northern Gulf of Maine + Georges Bank region.

Code
# Load inshore
inshore_areas <- map(
  setNames(
    list.files(str_c(poly_paths, "inshore_areas"), full.names = T),
    str_remove(list.files(str_c(poly_paths, "inshore_areas")), ".geojson")),
  function(x){read_sf(x)})

# Load offshore
offshore_areas <- map(
  setNames(
    list.files(str_c(poly_paths, "offshore_areas"), full.names = T),
    str_remove(list.files(str_c(poly_paths, "offshore_areas")), ".geojson")),
  function(x){read_sf(x)})


# Map everything
ggplot() +
  geom_sf(data = bind_rows(inshore_areas), aes(fill = area_id), alpha = 0.4) +
  geom_sf(data = bind_rows(offshore_areas), aes(fill = Region), alpha = 0.4) +
  geom_sf(data = new_england) +
  geom_sf(data = canada) +
  theme(legend.position = "right") +
  scale_fill_gmri() +
  coord_sf(xlim = c(-78, -66), ylim = c(35.5, 45)) +
  theme_bw() + map_theme() +
  labs(fill = "Area")

Open Daily Data

Daily surface and bottom temperatures were obtained through direct communication with the Dr. Chen’s group at UMASS Dartmouth, with special thanks for Drs. Wang & Li for their help and continued correspondence.

From these daily-averagede NetCDF files and using the FVCOM r-package it should be possible to load the GOM3 triangular mesh as a simple features dataframe into R.

Code
# Here are the files we have, loop over them later
fvcom_surfbot_files <- setNames(
  list.files(fvcom_path, full.names = T, pattern = ".nc"),
  str_remove(list.files(fvcom_path, full.names = F, pattern = ".nc"), ".nc"))


# Test File: GOM3 1978
# Load some daily FVCOM that we downloaded and averaged
fvcom_yrx <- nc_open(fvcom_surfbot_files["gom3_1978"])



# Get the mesh itself as a simple feature collection
gom3_mesh <- get_mesh_geometry(fvcom_yrx, what = 'lonlat')


# Map everything
ggplot() +
  geom_sf(data = gom3_mesh, alpha = 0.4, linewidth = 0.1) +
  geom_sf(data = bind_rows(inshore_areas), aes(fill = area_id), alpha = 0.4) +
  geom_sf(data = bind_rows(offshore_areas), aes(fill = Region), alpha = 0.4) +
  geom_sf(data = new_england) +
  geom_sf(data = canada) +
  theme(legend.position = "right") +
  scale_fill_gmri() +
  coord_sf(xlim = c(-78, -65), ylim = c(35.5, 45)) +
  theme_bw() + map_theme() +
  labs(
    title = "Coverage overlap of FVCOM and study regions",
    fill = "Area")

Overlay Regions

For each region we will need to perform an intersection with the FVCOM mesh. From that geoprocessing operation we can then determine the node & element ID’s to pull out of the netcdf files.

At this step we can also calculate relative areas of resulting triangles and fractions of triangles to use for weighted-averages for zonal summaries.

Code
# Nodes can be acquired this way
node_ids <- fvcom_nodes(fvcom_yrx) %>% 
  rename(node_id = node) %>% 
  st_as_sf(coords = c("lon", "lat"), crs = 4326)

# Center element ids can be gained this way
elem_ids <- fvcom_elems(fvcom_yrx) %>% 
  st_as_sf(coords = c("lon", "lat"), crs = 4326)

# We want both the ids to join, but we also want to crop it...
inshore_trans <- map(inshore_areas, ~st_transform(.x, crs = 6318))
offshore_trans <- map(offshore_areas, ~st_transform(.x, crs = 6318))


# Map what is going on in the next steps
# everything below steps through the steps
crop_t <- st_intersection(gom3_mesh, inshore_trans$`511-Eastern_Maine`) 


# Map what is happening:
ggplot() +
  geom_sf(data = crop_t, aes(color = "FVCOM Mesh"), 
          alpha = 0.5) +
  geom_sf(data = inshore_trans$`511-Eastern_Maine`, 
          aes(color = "511 - Eastern Maine"), linewidth = 1,
          alpha = 0.5, fill = "transparent") +
  scale_color_gmri() +
  labs(
    title = "Assigning node/elements and area-weights to regions",
    subtitle = "st_intersection(mesh, area_poly)",
       color = "Geometry Source")

Perform Intersections on All Polygons

Code
# Run them all and save tables in- case we need to operate in python
inshore_intersections <- map_dfr(inshore_trans, function(x){
  # Run intersection
  mesh_clip <- st_intersection(gom3_mesh, x) 
  
  # Get areas
  # Pull necessary information out
  mesh_clip_df <- mesh_clip %>% 
    mutate(rel_area = st_area(mesh_clip)) %>% 
    select(SHORT_NAME, Id_1, FULL_NAME, elem, p1, p2, p3, rel_area) %>% 
    st_drop_geometry()
  
  # Return
  return(mesh_clip_df)
  
})

# Run them all and save tables in- case we need to operate in python
offshore_intersections <- map_dfr(offshore_trans, function(x){
  
  # Run intersection
  mesh_clip <- st_intersection(gom3_mesh, x) 
  
  # Get areas
  # Pull necessary information out
  mesh_clip_df <- mesh_clip %>% 
    mutate(rel_area = st_area(mesh_clip)) %>% 
    select(Region, elem, p1, p2, p3, rel_area) %>% 
    st_drop_geometry()
  
  # Return
  return(mesh_clip_df)
  
})


# # Save these out for lookup tables
# write_csv(inshore_intersections, here::here("local_data/inshore_areas_mesh_weights.csv"))
# write_csv(offshore_intersections, here::here("local_data/offshore_areas_mesh_weights.csv"))
Code
# Load what we did above
inshore_zones <- read_csv(here::here("local_data/inshore_areas_mesh_weights.csv"))
offshore_zones <- read_csv(here::here("local_data/offshore_areas_mesh_weights.csv"))


# Put the inshore and offshore together
zonal_assignments <- bind_rows(
    dplyr::select(inshore_zones, area_id = SHORT_NAME, elem, p1, p2, p3, rel_area),
    dplyr::select(offshore_zones, area_id = Region, elem, p1, p2, p3, rel_area)) %>% 
  mutate(rel_area = as.numeric(rel_area))


# From intersection we need to add areas
# mutate(crop_t, rel_area = st_area(crop_t), .before = "Join_Count") %>% 
#   select(SHORT_NAME, Id_1, FULL_NAME, elem, p1, p2, p3, rel_area) %>% 
zonal_assignments %>% 
  st_drop_geometry() %>% 
  head() %>% 
  gt::gt() %>% gt::tab_header(
    title = "Intersection Insight gained:",
    subtitle = "Information needed to subset from NetCDF and weight appropriately for zonal statistics")
Intersection Insight gained:
Information needed to subset from NetCDF and weight appropriately for zonal statistics
area_id elem p1 p2 p3 rel_area
Eastern Maine 53546 26971 27320 27319 156.5751
Eastern Maine 53559 26978 27326 27325 1352508.0688
Eastern Maine 53560 27326 26978 26979 143082.1057
Eastern Maine 53561 27327 27326 26979 717469.8470
Eastern Maine 54232 27681 27680 27318 228.3860
Eastern Maine 54233 27681 27318 27319 927.9917

Get triangle-specific averages of nodal values

For surface and bottom temperatures the values are stored/calculated at the triangle vertices, or nodes. For zonal statistics like the regional average temperature, we need to average the three nodes to get a value representative of the interior space.

This step loops through all the zonal elements that are within any of our regions and processes that value once for each. These are then stored in a named list that can be accessed later.

Code
# Now we want to pull all values on the time dimension 
# for the key nodes we need, the ones that surround all unique
# elements after the st_intersecction()



#' @Title FVCOM Element Average from Vertices
#' 
#' 
#' @description Get the average value from the relevant vertices, along  
#' the time dimension for FVCOM mesh triangles.
#'
#' @param mesh_df 
#' @param nc
#' @param nc_varname 
#'
#' @return
#' @export
#'
#' @examples
zonal_from_nodes <- function(fvcom_mesh_trios, nc, nc_varname = "surface_t"){
  
  
  # Take the unique elements from our table:
  unique_elems <- distinct(fvcom_mesh_trios, elem) %>% pull(elem)
  
  # Make a named list
  unique_elems <- setNames(unique_elems, unique_elems)
  
  # Iterate through each unique element (triangle element, p1, p2, p3)
  # Average the nodes to get value for each element
  triangle_temps <- map(unique_elems, function(elem_id){
    
    # Slice the row for that element, get distinct in case >1
    elem_df <- filter(zonal_assignments, elem == elem_id) %>% 
      distinct(elem, p1, p2, p3)
    
    # Get element id
    elem_num <- elem_df[[1, "elem"]]
    
    # Pull surface_t for the three nodes
    p1_ts <- ncvar_get(
      nc, 
      varid = nc_varname, 
      start = c(elem_df[[1, "p1"]], 1), 
      count = c(1, -1))
    p2_ts <- ncvar_get(
      nc, 
      varid = nc_varname, 
      start = c(elem_df[[1, "p2"]], 1), 
      count = c(1, -1))
    p3_ts <- ncvar_get(
      nc, 
      varid = nc_varname, 
      start = c(elem_df[[1, "p3"]], 1), 
      count = c(1, -1))
    
    # Get Averages
    elem_avg_var <- (p1_ts + p2_ts + p3_ts) / 3
    
    # Spit it out, no dataframe bs
    unique_elems[elem_num] <- elem_avg_var
    
  })
  
}
Code
# Run that for all the years I guess?!?
triangle_avg_1978 <- zonal_from_nodes(
  fvcom_mesh_trios = zonal_assignments, 
  nc = fvcom_yrx, 
  nc_varname = "surface_t")
# Sys.time()

Area-weighted Zonal Averages

Within each polygon overlay, we want to weight the averages of each triangle we just estimated by their relative areas. This will give us the average value for the entire polygon, appropriately weighting each of the component areas.

Code
# Take the code above and calculate the weighted averages:
# function should take:
# 1. region overlay dataframe, containing relative areas for weights
# 2. triangle-specific values





#' @title Zonal Averages from FVCOM-Polygon Intersection
#' 
#' @description
#'
#' @param mesh_poly_intersection 
#' @param zonal_means 
#' @param nc 
#'
#' @return
#' @export
#'
#' @examples
fvcom_zonal_averages <- function(mesh_poly_intersection, zonal_means, nc){
  
  # Get the elements
  poly_elems <- as.character(mesh_poly_intersection$elem)

  # pull out the weights, name them
  poly_wts <- as.numeric(mesh_poly_intersection$rel_area) %>% 
    setNames(poly_elems)
  
  # Weight each timeseries by area, then sum them all
  # Get the timeseries that go with the triangles
  # multiply the relevant relative areas
  poly_wtd_ts <- purrr::map(poly_elems, function(x){
    wtd_vals <- zonal_means[[x]] * poly_wts[[x]]
    return(wtd_vals)}) %>% 
    reduce(.x = ., .f = `+`)
  
  # Divide by total area
  poly_tot_area <- sum(as.numeric(mesh_poly_intersection$rel_area))
  
  # Add the time dimension and return as dataframe
  poly_zonal <- data.frame(
    "time"     = as.Date(ncvar_get(nc, "Times")),
    "zonal_mu" = poly_wtd_ts / poly_tot_area)
  return(poly_zonal)
  
}
Code
#------

# Why is this not working...
# Works great on one table at a time

fvcom_zonal_averages(
      mesh_poly_intersection = filter(zonal_assignments, area_id == "Eastern Maine"),
      zonal_means = triangle_avg_1978, 
      nc = fvcom_yrx)




# Now we loop over areas
poly_avgs_1978 <-  map_dfr(
  .x = zonal_assignments %>% split(.$area_id),
  .f = ~fvcom_zonal_averages(
    mesh_poly_intersection = .x,
    zonal_means = triangle_avg_1978, 
    nc = fvcom_yrx),
  .id = "area_id"
)


mutate(poly_avgs_1978, time = as.Date(time)) %>% 
ggplot(aes(time, zonal_mu, color = area_id)) +
  geom_line(alpha = 0.4, linewidth = 0.5) +
  labs(title = "Zonal Average SST for Lob-ECOL Polygons: 1978 Test")

Run for All Years, Save.

Before this step, need to perform the st_intersection step that gathers the information on which mesh triangles to get averages from, and how to weight them appropriately.

Then we’re just looping through opening the files for each year and the dataframes for each area.

Everything below could be done in one step instead of a loop with xarray

Surface Temperature Processing

Code
# Process Surface Temperature for all the regions:
fvcom_var_name <- "surface_t"


# Time it
Sys.time()
all_region_surface_temp <- map_dfr(fvcom_surfbot_files, function(nc_x){
  
  # 1. Open the netcdf:
  yr_x_fvcom <- nc_open(nc_x)
  
  # 2. Get the average values for the triangles
  # Run that for all the years I guess?!?
  triangle_avgs <- zonal_from_nodes(
    fvcom_mesh_trios = zonal_assignments, 
    nc = yr_x_fvcom, 
    nc_varname = fvcom_var_name)
  
  
  # Get the zonal averages for each region/group
  poly_avgs_yr_x <-  zonal_assignments %>% 
    split(.$area_id) %>% 
    map_dfr(
    .x = .,
    .f = ~fvcom_zonal_averages(
      mesh_poly_intersection = .x,
      zonal_means = triangle_avgs, 
      nc = yr_x_fvcom),
    .id = "area_id"
  )
  
  # Close the netcdf connection
  nc_close(yr_x_fvcom)
  
  # Return the zonal averages
  return(poly_avgs_yr_x)
  
})

# time completion
Sys.time()


# # Save them individually
# fvcom_regional_surface_temps %>% 
# # all_region_surface_temp %>%
#   # rename(surface_temp = zonal_mu) %>%
#   split(.$area_id) %>%
#   iwalk(
#     function(.x,.y){
#       write_csv(.x, str_c(lob_ecol_path, "FVCOM_processed/area_timeseries/surface_temperature/", .y, ".csv"))
#     }
#     )


# # Plot
# all_region_surface_temp %>% 
#   mutate(time = as.Date(time)) %>% 
#   ggplot() +
#   geom_line(aes(time, zonal_mu, color = area_id),
#             linewidth = 0.4, alpha = 0.4)

Bottom Temperature Processing

Code
# Process Surface Temperature for all the regions:
fvcom_var_name <- "bottom_t"


# Time it
Sys.time()
all_region_bottom_temp <- map_dfr(fvcom_surfbot_files, function(nc_x){
  
  # 1. Open the netcdf:
  yr_x_fvcom <- nc_open(nc_x)
  
  # 2. Get the average values for the triangles
  # Run that for all the years I guess?!?
  triangle_avgs <- zonal_from_nodes(
    fvcom_mesh_trios = zonal_assignments, 
    nc = yr_x_fvcom, 
    nc_varname = fvcom_var_name)
  
  
  # Get the zonal averages for each region/group
  poly_avgs_yr_x <-  zonal_assignments %>% 
    split(.$area_id) %>% 
    map_dfr(
    .x = .,
    .f = ~fvcom_zonal_averages(
      mesh_poly_intersection = .x,
      zonal_means = triangle_avgs, 
      nc = yr_x_fvcom),
    .id = "area_id"
  )
  
  # Close the netcdf connection
  nc_close(yr_x_fvcom)
  
  # Return the zonal averages
  return(poly_avgs_yr_x)
  
})

# time completion
Sys.time()





# # Save them individually
# fvcom_regional_bottom_temps %>% 
# #all_region_bottom_temp %>%
#   #rename(bottom_temp = zonal_mu) %>%
#   split(.$area_id) %>%
#   iwalk(
#     function(.x,.y){
#       write_csv(.x, str_c(lob_ecol_path, "FVCOM_processed/area_timeseries/bottom_temperature/", .y, ".csv"))
#     }
#     )




# Plot
all_region_bottom_temp %>% 
  mutate(time = as.Date(time)) %>% 
  filter(area_id %in% c("GOM_GBK", "SNE"))
  ggplot() +
  geom_line(aes(time, zonal_mu, color = area_id),
            linewidth = 0.4, alpha = 0.4)

Running 2012 B/C I missed it

This is a good example of what the process looks likg for a single NetCDF file.

Code
# Run 2012
fvcom_2012_file <- str_c(fvcom_path, "gom3_2012.nc")

# 1. Open the netcdf:
fvcom_2012_nc <- nc_open(fvcom_2012_file)

  
# 2. Get the average values for the triangles
# Run that for all the years I guess?!?
stemp_triangle_avgs <- zonal_from_nodes(
  fvcom_mesh_trios = zonal_assignments, 
  nc = fvcom_2012_nc, 
  nc_varname = "surface_t")

# bottom as well
btemp_triangle_avgs <- zonal_from_nodes(
  fvcom_mesh_trios = zonal_assignments, 
  nc = fvcom_2012_nc, 
  nc_varname = "bottom_t")


# 3. Get the zonal averages for each region/group
stemp_poly_avgs <-  zonal_assignments %>% 
  split(.$area_id) %>% 
  map_dfr(
  .x = .,
  .f = ~fvcom_zonal_averages(
    mesh_poly_intersection = .x,
    zonal_means = stemp_triangle_avgs, 
    nc = fvcom_2012_nc),
  .id = "area_id")

# Btemp
btemp_poly_avgs <-  zonal_assignments %>% 
  split(.$area_id) %>% 
  map_dfr(
  .x = .,
  .f = ~fvcom_zonal_averages(
    mesh_poly_intersection = .x,
    zonal_means = btemp_triangle_avgs, 
    nc = fvcom_2012_nc),
  .id = "area_id")



########

# Join them back in with the surface and bottom

# Surface Temperatures
fvcom_regional_surface_temps <- bind_rows(
  all_region_surface_temp,
  stemp_poly_avgs) %>% 
  mutate(time = as.Date(time)) %>% 
  rename(surface_t = zonal_mu)




# Bottom Temperatures
fvcom_regional_bottom_temps <- bind_rows(
  all_region_bottom_temp,
  btemp_poly_avgs) %>% 
  mutate(time = as.Date(time)) %>% 
  rename(bottom_t = zonal_mu)

# Plot
fvcom_regional_surface_temps %>% 
  filter(area_id %in% c("GOM_GBK", "SNE")) %>% 
  ggplot() +
  geom_line(aes(time, surface_t, color = area_id),
            linewidth = 0.4, alpha = 0.4)

# Plot
fvcom_regional_bottom_temps %>% 
  filter(area_id %in% c("GOM_GBK", "SNE")) %>% 
  ggplot() +
  geom_line(aes(time, bottom_t, color = area_id),
            linewidth = 0.4, alpha = 0.4)


# Put them into one table and save them:
fvcom_regional_temperatures <- left_join(fvcom_regional_surface_temps, fvcom_regional_bottom_temps)

# write_csv(
#   fvcom_regional_temperatures, 
#   str_c(lob_ecol_path, "FVCOM_processed/area_timeseries/all_regions_fvcom_temperatures_daily.csv"))

Inspecting Timeseries

Prior to any interpolations, timeseries for both inshore and offshore areas are evaluated for both long-term trends (Kendall test), and structural breaks in means/trends (changepoint/breakpoint tests).

These tests determine the following:
- Significant trends = non-stationary, and global krigging model should not be used
- Breakpoints determine which groups of years should be used for seasonal kriging models

Code
# Load and add inshore/offshore labels on the daily data
fvcom_regional_temperatures <- read_csv(
  str_c(lob_ecol_path, "FVCOM_processed/area_timeseries/all_regions_fvcom_temperatures_daily.csv")) %>% 
  mutate(depth_type = if_else(area_id %in% c("GOM_GBK", "SNE"), "offshore", "nearshore"))