Detailing common methods and preprocessing steps for regime shift detection methods

Published

August 19, 2025

Code
# Packages
{
  library(here)
  library(tidyverse)
  library(rshift)
  library(gmRi)
  library(mgcv)
  library(EnvCpt)
  library(showtext)
  library(patchwork)
  library(degday)
}

# Make sure lag is done correctly
conflicted::conflict_prefer("lag", "stats")
conflicted::conflict_prefer("filter", "dplyr")
conflicted::conflict_prefer("select", "dplyr")

# Path to timeseries
lob_ecol_path <- cs_path("mills", "Projects/Lobster ECOL")


# Plot theme
theme_set(theme_gmri_simple(legend.position = "bottom"))


# Factor Levels

# New areas
areas_northsouth <- c(
  "eastern maine",  
  "central maine",  
  "western maine",  
  "eastern mass",
  "southern mass",  
  "rhode island shore",
  "long island sound", 
  "new jersey shore",  
  "five fathom bank",  
  "virginia shore",
  "gom_gbk",         
  "sne")


deg_c <- "\u00b0C"
Code
# Use GMRI style for fonts
use_gmri_style_rmd()
Code
# ------- New Regions

# Load the data for the new regions
daily_fvcom_temps <- read_csv(
  str_c(lob_ecol_path, "FVCOM_processed/area_timeseries/new_regions_fvcom_temperatures_daily.csv")
) %>% 
  mutate(
    yday = lubridate::yday(time),
    year = lubridate::year(time),
    month = lubridate::month(time),
    month_lab = factor(month.abb[month(time)], levels = month.abb), 
    depth_type = if_else(area_id %in% c("gom_gbk", "SNE"), "offshore", "nearshore"),
    area_id = factor(area_id, levels = areas_northsouth)
  )

# Monthly Salinity
monthly_fvcom_sal <- read_csv(
  str_c(lob_ecol_path, "FVCOM_processed/area_timeseries/new_regions_fvcom_salinity_monthly_gom3.csv")) %>% 
  mutate(
    month = lubridate::month(time),
    month_lab = factor(month.abb[month(time)], levels = month.abb),
    depth_type = if_else(area_id %in% c("gom_gbk", "SNE"), "offshore", "nearshore"),
    area_id = factor(area_id, levels = areas_northsouth)
  )

STARS Regime Shifts - LobECOL Areas

{rstars} Implementation

The following results extend the detailed approach of Stirnimann et al. 2019’s {rstars} repository.

Code
# Stirnimann used these values in their paper:
# l = 5,10,15,17.5 years, with monthly data
# Huber = 1
# Subsampling = (l + 1)/3


# Load the function(s)
source(here::here("rstars-master","rSTARS.R"))

Data Pre-Processing

For our results we’ve expanded on their approach with two additional pre-procesing steps recommended by Rodionov (pretty sure) & in the recent review by Lund et al. 2023’s.

These two additional steps include removing the seasonal cycle using a GAM and a cyclic cubic regression spline to model the seasonal cycle. The other additional pre-processing step is the removal of the long-term trend. To illustrate these pre-processing steps, a daily bottome temperature timeseries for Gulf of Maine / Georges Bank will be used below:

Code
# Test timeseries
test_area <- "gom_gbk"
tester   <- filter(daily_fvcom_temps, area_id == test_area) 
test_sal <- filter(monthly_fvcom_sal, area_id == test_area) 

# Plot Surface and Bottom

p1 <- ggplot(tester, aes(time)) +
  geom_line(aes(y = surface_t, color = "Surface Temperature"), alpha = 0.8) +
  geom_line(aes(y = bottom_t, color = "Bottom Temperature"), alpha = 0.8) + 
  scale_color_gmri() +
  labs(y = "Temperature", title = test_area, subtitle =  "Temperatures")

p2 <- ggplot(test_sal, aes(time)) +
  geom_line(aes(y = surface_salinity, color = "Surface Salinity"), alpha = 0.8) +
  geom_line(aes(y = bottom_salinity, color = "Bottom Salinity"), alpha = 0.8) + 
  scale_color_gmri() +
  labs(y = "Salinity", subtitle = "Salinity")

p1/p2

Removing Seasonal Cycle

We can model this periodicity to get a sense of the average temperature based on the day of the year. As a pre-processing step

Seasonal temperatures are at their lowest during February-March, and they peak during July-August. Each individual year will vary, but the seasonal period is quite consistent.

Code
# Model a linear trend and the seasonal cycle
st_seasonality_gam <- gam(
  surface_t ~ s(yday, bs = "cc"), 
  data = tester,
  method = "REML")
bt_seasonality_gam <- gam(
  bottom_t ~ s(yday, bs = "cc"), 
  data = tester,
  method = "REML")

# Add the predictions and residuals
bt_season_test <- broom::augment(x = bt_seasonality_gam) %>% 
  rename(bt_seasonal_fit = .fitted) %>% 
  distinct(yday, bt_seasonal_fit)

# Add the predictions and residuals
st_season_test <- broom::augment(x = st_seasonality_gam) %>% 
  rename(st_seasonal_fit = .fitted) %>% 
  distinct(yday, st_seasonal_fit) 

# Put them together
season_test <- left_join(bt_season_test, st_season_test)
Code
# Plot what that looke like on an annual time scale
p1 <- ggplot(tester, aes(x = yday)) +
  geom_line(
    aes(y = bottom_t, group = year(time), color = "Bottom Temperature"), 
    linewidth = 0.5, alpha = 0.25) +
  geom_smooth(
    data = tester,
    aes(y = bottom_t, color = "Bottom Temperature"), 
    method = "gam", se = F,
    alpha = 0.75, linewidth = 1.5) +
  geom_line(
    aes(y = surface_t, group = year(time), color = "Surface Temperature"), 
    linewidth = 0.5, alpha = 0.25) +
  geom_smooth(
    data = tester,
    aes(y = surface_t, color = "Surface Temperature"), 
    method = "gam", se = F,
    alpha = 0.75, linewidth = 1.5) +
  scale_color_gmri() +
  theme(legend.position = "bottom") +
  labs(color = "Data",
       y = "Temperature",
       x = "Day of Year",
       title = test_area, 
       subtitle = "Temperature Seasonality")



p2 <- test_sal  %>% 
  pivot_longer(cols = ends_with("salinity"), names_to = "var", values_to = "salinity") %>% 
  group_by(var, month_lab) %>% 
  summarise(avg_monthly = mean(salinity, na.rm = T)) %>% 
  ggplot() +
  geom_point(
    data = test_sal,
    aes(x = month_lab, y = bottom_salinity, color = "bottom_salinity"),
    position = position_jitter(height = 0, width = 0.1),
    alpha = 0.35) +
  geom_point(
    data = test_sal,
    aes(x = month_lab, y = surface_salinity, color = "surface_salinity"),
    position = position_jitter(height = 0, width = 0.1),
    alpha = 0.35) +
  geom_point(
    aes(month_lab, avg_monthly, color = var),
    shape = "-", size = 20) +
  scale_color_gmri() +
  labs(color = "Data",
       y = "Salinity",
       x = "Month",
       subtitle = "Salinity Seasonality")

p1/p2

Removal of seasonal (monthly) means results in timeseries that look like this:

Code
# Remove the Daily Seasonality
tester %>%
  left_join(season_test, by = join_by("yday")) %>% 
  mutate(
    st_resid = surface_t - st_seasonal_fit,
    bt_resid = bottom_t - bt_seasonal_fit)  %>% 
  ggplot() +
  geom_line(aes(time, st_resid, color = "Surface Temperature"), alpha = 0.75) +
  geom_line(aes(time, bt_resid, color = "Bottom Temperature"), alpha = 0.75) +
  geom_hline(yintercept = 0, linewidth = 2, alpha = 0.75) +
  scale_color_gmri() +
  labs(y = "Seasonal Temperature Anomaly",
       title = "Removal of Seasonal Temperature Cycle")

Code
# Remove mean monthly salinity too
test_sal %>% 
  group_by(month_lab) %>% 
  summarise(surf_sal_mean = mean(surface_salinity),
            bot_sal_mean = mean(bottom_salinity),
            .groups = "drop") %>% 
  right_join(test_sal, join_by(month_lab)) %>% 
  mutate(
    surface_resid = surface_salinity - surf_sal_mean,
    bottom_resid = bottom_salinity - bot_sal_mean) %>% 
  ggplot() +
  geom_line(aes(time, surface_resid, color = "Surface Salinity"), alpha = 0.75, linewidth = 1) +
  geom_line(aes(time, bottom_resid, color = "Bottom Salinity"), alpha = 0.75, linewidth = 1) +
  geom_hline(yintercept = 0, linewidth = 2, alpha = 0.75) +
  scale_color_gmri() +
  labs(
    y = "Seasonal Salinity Anomaly",
    title = "Removal of Long-Term Monthly Mean Salinity")

RSTARS Daily with Full Pre-Processing Routine

The last step is to run it through the RSTARS algorithm and specify a few more arguments for how to treat the timeseries.

We need to set arguments for regime length, outlier weighting, and for our regime probability cutoff. There are also options for prewhitening methods which help address autocorrelation in the timeseries.

Code
# Run it with the same presets that were uses in Stirnimann paper

# Stirnimann used these values in their paper:
# l = 5,10,15,17.5 years, with monthly data
# Huber = 1
# Subsampling = (l + 1)/3

# He also recommended using prewhitening, and recommended MPK and IP4

# Or do the prewhitening on the detrended data?
rstars_test <- rstars(
  data.timeseries = as.data.frame(season_trend_results[,c("time", "seasonal_resid")]), 
  l.cutoff = 365*7, 
  pValue = 0.05, 
  Huber = 1, 
  Endfunction = T,    # Some behavior at the end of the timeseries
  preWhitening = T,   # Apply prewhitening T/F
  OLS = F,            # OLSprewhitening method T/F
  MPK = T,            # Marriott-Pope + Kennedy prewhitening method T/F
  IP4 = F,            # IP4 prewhitening method T/F
  SubsampleSize = (365*7 + 1) / 3, # subsampling rate for hubers + prewhitening
  returnResults = T) # Return the results as a dataframe

  


# Results now look like this:
ggplot() +
  geom_line(
    data = season_trend_results,
    aes(time, seasonal_resid), 
    linewidth = 0.5, alpha = 0.5) +
  geom_line(
    data = rstars_test,
    aes(time, regime_mu), linewidth = 1) +
  geom_vline(
    data = filter(rstars_test, RSI != 0),
    aes(xintercept = time), color = "red", linewidth = 1) +
  labs(x = "", 
       y = "Detrended Bottom Temp Anomaly",
       title = "STARS Results - After full pre-processing steps",
       subtitle = test_area)

RSTARS Daily

If we apply the above steps to each daily bottom temperature timeseries we can return the following results:

Code
# # Run them all
# bot_temp_shifts <- daily_fvcom_temps %>%
#   mutate(yday = lubridate::yday(time)) %>%
#   split(.$area_id) %>%
#   map_dfr(function(.x){
# 
#     # Fit the model to detrend and remove seasons
#     # Model a linear trend and the seasonal cycle
#     season_trend_model <- gam(
#       bottom_t ~ s(yday, bs = "cc") + time,
#       data = .x,
#       method = "REML")
# 
#     # save the results
#     preprocessed_results <- broom::augment(x = season_trend_model) %>%
#       rename(
#         model_fit = .fitted,
#         model_resid = .resid)
# 
# 
#     # Get the results from that
#     x_rstars <- rstars(
#       data.timeseries = as.data.frame(preprocessed_results[,c("time", "model_resid")]),
#       l.cutoff = 365*7,
#       pValue = 0.05,
#       Huber = 1,
#       Endfunction = T,
#       preWhitening = T,
#       OLS = F,
#       MPK = T,
#       IP4 = F,
#       SubsampleSize = (365*7 + 1)/3,
#       returnResults = T)
# 
#     return(x_rstars)
# 
#     }, .id = "area_id")
# 
# 
# # Save it
# write_csv(bot_temp_shifts, here::here("rstars_results/lobecol_btemp_shifts_detrended.csv"))

Repeating the process for daily sea surface temperatures gives us these RSI results:

Code
# # Run them all
# surf_temp_shifts <- daily_fvcom_temps %>%
#   mutate(yday = lubridate::yday(time)) %>%
#   split(.$area_id) %>%
#   map_dfr(function(.x){
# 
#     # Fit the model to detrend and remove seasons
#     # Model a linear trend and the seasonal cycle
#     season_trend_model <- gam(
#       surface_t ~ s(yday, bs = "cc") + time,
#       data = .x,
#       method = "REML")
# 
#     # save the results
#     preprocessed_results <- broom::augment(x = season_trend_model) %>%
#       rename(
#         model_fit = .fitted,
#         model_resid = .resid)
# 
# 
#     # Get the results from that
#     x_rstars <- rstars(
#       data.timeseries = as.data.frame(preprocessed_results[,c("time", "model_resid")]),
#       l.cutoff = 365*7,
#       pValue = 0.05,
#       Huber = 1,
#       Endfunction = T,
#       preWhitening = T,
#       OLS = F,
#       MPK = T,
#       IP4 = F,
#       SubsampleSize = (365*7 + 1)/3,
#       returnResults = T)
# 
#     return(x_rstars)
# 
#     }, .id = "area_id")
# 
# 
# # Save it
# write_csv(surf_temp_shifts, here::here("rstars_results/lobecol_stemp_shifts_detrended.csv"))

Daily Temperatures Shifts

The following results show the regime shift index results from detrended dailt temperatures:

Code
# Load that data again
bot_temp_shifts <- read_csv(here::here("rstars_results/lobecol_btemp_shifts_detrended.csv"))

# Load that data again
surf_temp_shifts <- read_csv(here::here("rstars_results/lobecol_stemp_shifts_detrended.csv"))


# # Plot the RSI
# p1 <- surf_temp_shifts %>% 
#  ggplot() +
#   geom_line(aes(time, RSI), linewidth = 0.5, alpha = 0.35) +
#   geom_hline(yintercept = 0) +
#   facet_grid() +
#   labs(
#     x = "Time",
#     y = "RSI",
#     title = "All Areas, Surface Temperature RSI")
# 
# # Plot the RSI
# p2 <- bot_temp_shifts %>% 
#  ggplot() +
#   geom_line(aes(time, RSI), linewidth = 0.5, alpha = 0.35) +
#   geom_hline(yintercept = 0) +
#   facet_grid() +
#   labs(
#     x = "Time",
#     y = "RSI",
#     title = "All Areas, Bottom Temperature RSI")
# 
# p1/p2
Code
# better daily plots



# Summarise them and put them together
mst_daily <- surf_temp_shifts %>% filter(RSI != 0) %>% 
  mutate(var = "daily surface temperature") %>% 
  dplyr::select(area_id, time, RSI, var)
mbt_daily <- bot_temp_shifts %>% filter(RSI != 0) %>% 
  mutate(var = "daily bottom temperature") %>% 
  dplyr::select(area_id, time, RSI, var)


# Combine results for plotting
results_daily_summ <- bind_rows(list(mst_daily, mbt_daily)) %>% 
  mutate(
    shift_direction = if_else(RSI > 0, "Shift to Warm", "Shift to Cool"), 
    area_id = factor(area_id, levels = areas_northsouth),
    year = year(time))


# Get Monthly temperatures in long form
temps_daily_long <- daily_fvcom_temps %>%
  rename(
    `daily surface temperature` = surface_t,
    `daily bottom temperature` = bottom_t) %>% 
  pivot_longer(
    names_to = "var", 
    values_to = "temperature", cols = ends_with("temperature"))



# Plot the breaks over the timeseries
ggplot() +
  geom_line(
    data = temps_daily_long,
    aes(time, temperature),
    linewidth = 0.2, alpha = 0.5) +
  # Add a vertical line using geom_segment with an arrow
  geom_segment(
    data = filter(
      results_daily_summ, 
      str_detect(shift_direction, "Warm")),
    linewidth = 0.8,
    aes(x = time, xend = time, y = -Inf, , yend = Inf, 
        color = shift_direction),  
    arrow = arrow(length = unit(0.3, "cm"), ends = "last")) +
  geom_segment(
    data = filter(
      results_daily_summ, 
      !str_detect(shift_direction, "Warm")),
    linewidth = 0.8,
    aes(x = time, xend = time, y = -Inf, , yend = Inf, 
        color = shift_direction),  
    arrow = arrow(length = unit(0.3, "cm"), ends = "first")) +
  scale_color_gmri() +
  scale_y_continuous(
    labels = scales::label_number(suffix = deg_c)) +
  facet_grid(
    area_id~var, 
    labeller = label_wrap_gen(),
    scales = "free") +
  theme(
    legend.position = "bottom",
    strip.text.y = element_text(angle = 0)) + 
  labs(
    title = "FVCOM Hindcast Temperatures",
    subtitle = "STARS Regime Shift Index - Detrended Daily Data",
    y = "Temperature",
    color = "")

RSTARS on Monthly Temperatures

Other than temperature we have very few daily environmental timeseries.

We also are less interested in the time within a year that changes ocurred, and more interested in the general year/month that changes are ocurring in.

For these reasons and for consistency with the other environmental drivers, I will repeat these steps using monthly mean SSt & BT.

Code
# Take the input data,
# Get monthly averages
# remove trend and long-term monthly means
# run STARS routine again


# Run the monthly versions
monthly_fvcom_temps <- daily_fvcom_temps  %>%
  group_by(area_id, year, month) %>%
  summarise(
    surface_t = mean(surface_t, na.rm = T),
    bottom_t  = mean(bottom_t, na.rm = T),
    .groups = "drop") %>%
  mutate(
    month = factor(month),
    yr_num = as.numeric(as.character(year)),
    time = as.Date(
      str_c(
        year,
        str_pad(month, side = "left", pad = "0", width = 2),
        "15", sep = "-")))


# Fit the model (by area_id) to detrend and remove seasons
# Model a linear trend and the seasonal cycle
monthly_fvcom_temps <- monthly_fvcom_temps %>% 
  split(.$area_id) %>% 
  map_dfr(function(area_x_monthly){
    
    monthly_stemp_model <- gam(
      surface_t ~ month + yr_num,
      data = area_x_monthly,
      method = "REML")
    monthly_btemp_model <- gam(
      bottom_t ~ month + yr_num,
      data = area_x_monthly,
      method = "REML")
    
    # Add back the results to the model data
    detrend_out <- area_x_monthly %>% 
      mutate(
        stemp_model_fit   = monthly_stemp_model$fitted.values,
        stemp_model_resid = monthly_stemp_model$residuals,
        btemp_model_fit   = monthly_btemp_model$fitted.values,
        btemp_model_resid = monthly_btemp_model$residuals)
    return(detrend_out)
    
  }, .id = "area_id") %>% 
  mutate(area_id = factor(area_id, levels = areas_northsouth))

Recording Rates of Change

It is usefull to be aware on how much each area is warming as a long-term rate and whether or not these are significant baseline levels of warming or not. The following table reports the annual warming rate and whether or not this is significant.

Code
library(trend)

# # Mann-Kendall Test
# # Kendall::MannKendall(tester$surface_t)$sl
# str(trend::mk.test(tester$surface_t))
# trend::mk.test(tester$surface_t)$p.value < 0.05

# # Seasonal mann-kendall
# str(trend::smk.test(ts(tester$surface_t, frequency = 365)))
# trend::smk.test(ts(tester$surface_t, frequency = 365))$p.value < 0.05

# # sens slope
# trend::sens.slope(tester$surface_t) # sens slope

daily_fvcom_temps %>% 
  split(.$area_id) %>% 
  map_dfr(function(x){
    surface_warming <- trend::smk.test(ts(x$surface_t, frequency = 365))$p.value < 0.05
    bottom_warming <- trend::smk.test(ts(x$bottom_t, frequency = 365))$p.value < 0.05
    surface_annual_rate <- ifelse(
      surface_warming, 
      coef(lm(surface_t ~ time, data = x))[[2]] * 365 * 10,
      NA)
    bottom_annual_rate <- ifelse(
      bottom_warming, 
      coef(lm(surface_t ~ time, data = x))[[2]] * 365 * 10,
      NA)
    return(tibble(
      "Surface Warming" = surface_warming,
      "Decadal Surface Warming" = round(surface_annual_rate, 2),
      "Bottom Warming" = bottom_warming,
      "Decadal Bottom Warming"  = round(bottom_annual_rate, 2)))
  }, .id = "area_id") %>% 
  gt::gt() %>% 
  gt::tab_header(
    title = "Long Term Regional Warming Rates",
    subtitle = "Trend Significance Evaluated with Seasonal Mann-Kendall Trend Test (Hirsch-Slack Test)

")
Long Term Regional Warming Rates
Trend Significance Evaluated with Seasonal Mann-Kendall Trend Test (Hirsch-Slack Test)
area_id Surface Warming Decadal Surface Warming Bottom Warming Decadal Bottom Warming
eastern maine TRUE 0.28 TRUE 0.28
central maine TRUE 0.33 TRUE 0.33
western maine TRUE 0.49 TRUE 0.49
eastern mass TRUE 0.56 TRUE 0.56
southern mass TRUE 0.33 TRUE 0.33
rhode island shore TRUE 0.28 TRUE 0.28
long island sound TRUE 0.21 TRUE 0.21
new jersey shore TRUE 0.43 TRUE 0.43
five fathom bank TRUE 0.15 TRUE 0.15
virginia shore TRUE 0.39 TRUE 0.39
gom_gbk TRUE 0.41 TRUE 0.41
sne TRUE 0.43 TRUE 0.43

Raw Temperatures

Without removing the seasonal cycle or the long-term warming trend we get the following breakpoints:

Code
# Run them all
surf_temp_monthly_shifts_raw <- monthly_fvcom_temps %>%
  split(.$area_id) %>%
  map_dfr(function(.x_monthly){

    # Get the results from that
    x_rstars <- rstars(
      data.timeseries = as.data.frame(
        .x_monthly[,c("time", "surface_t")]),
      l.cutoff = 12*7,
      pValue = 0.05,
      Huber = 1,
      Endfunction = T,
      preWhitening = T,
      OLS = F,
      MPK = T,
      IP4 = F,
      SubsampleSize = (12*7 + 1)/3,
      returnResults = T)

    return(x_rstars)

    }, .id = "area_id")


#Bottom temperature
bot_temp_monthly_shifts_raw <- monthly_fvcom_temps %>%
  split(.$area_id) %>%
  map_dfr(function(.x_monthly){

    # Get the results from that
    x_rstars <- rstars(
      data.timeseries = as.data.frame(
        .x_monthly[,c("time", "bottom_t")]),
      l.cutoff = 12*7,
      pValue = 0.05,
      Huber = 1,
      Endfunction = T,
      preWhitening = T,
      OLS = F,
      MPK = T,
      IP4 = F,
      SubsampleSize = (12*7 + 1)/3,
      returnResults = T)

    return(x_rstars)

    }, .id = "area_id")



# # Save them
# write_csv(surf_temp_monthly_shifts_raw, here::here("rstars_results/lobecol_stemp_monthly_shifts_raw.csv"))
# write_csv(bot_temp_monthly_shifts_raw, here::here("rstars_results/lobecol_btemp_monthly_shifts_raw.csv"))

Raw Temp Shifts

If we pass monthly temperatures to the algorithm it does not detect any noticeable regime change in our regional timeseries.

Code
# Load that data again
raw_surf_temp_monthly_shifts <- read_csv(here::here("rstars_results/lobecol_stemp_monthly_shifts_raw.csv"))

raw_bot_temp_monthly_shifts <- read_csv(here::here("rstars_results/lobecol_btemp_monthly_shifts_raw.csv"))



# Summarise them and put them together
mst_raw <- raw_surf_temp_monthly_shifts %>% filter(RSI != 0) %>% 
  mutate(var = "raw monthly surface temperature") %>% 
  dplyr::select(area_id, time, RSI, var)
mbt_raw <- raw_bot_temp_monthly_shifts %>% filter(RSI != 0) %>% 
  mutate(var = "raw monthly bottom temperature") %>% 
  dplyr::select(area_id, time, RSI, var)


# Combine results for plotting
stars_results_summ_raw <- bind_rows(list(mst_raw, mbt_raw)) %>% 
  mutate(
    shift_direction = if_else(RSI > 0, "Shift to Warm", "Shift to Cool"), 
    area_id = factor(area_id, levels = areas_northsouth),
    year = year(time))


# Get Monthly temperatures in long form
temps_monthly_long <- monthly_fvcom_temps %>%
  rename(
    `monthly surface temperature` = surface_t,
    `monthly bottom temperature` = bottom_t) %>% 
  pivot_longer(
    names_to = "var", 
    values_to = "temperature", cols = ends_with("temperature"))



# Plot the breaks over the timeseries
ggplot() +
  geom_line(
    data = temps_monthly_long,
    aes(time, temperature),
    linewidth = 0.2, alpha = 0.5) +
  # Add a vertical line using geom_segment with an arrow
  geom_segment(
    data = filter(
      stars_results_summ_raw, 
      str_detect(var, "monthly"),
      str_detect(shift_direction, "Warm")),
    linewidth = 0.8,
    aes(x = time, xend = time, y = -Inf, , yend = Inf, 
        color = shift_direction),  
    arrow = arrow(length = unit(0.3, "cm"), ends = "last")) +
  geom_segment(
    data = filter(
      stars_results_summ_raw, 
      !str_detect(shift_direction, "Warm")),
    linewidth = 0.8,
    aes(x = time, xend = time, y = -Inf, , yend = Inf, 
        color = shift_direction),  
    arrow = arrow(length = unit(0.3, "cm"), ends = "first")) +
  scale_color_gmri() +
  scale_y_continuous(
    labels = scales::label_number(suffix = deg_c)) +
  facet_grid(
    area_id~var, 
    labeller = label_wrap_gen(),
    scales = "free") +
  theme(
    legend.position = "bottom",
    strip.text.y = element_text(angle = 0)) + 
  labs(
    title = "FVCOM Hindcast Temperatures",
    subtitle = "STARS Regime Shift Index - Raw Monthly Data",
    y = "Temperature",
    color = "")

Detrended Temperatures

For comparison, I’m also running the routine on timeseries where the long-term trend and the average monthly temperatures are removed.

Code
# Run them all
surf_temp_monthly_shifts <- monthly_fvcom_temps %>%
  split(.$area_id) %>%
  map_dfr(function(.x_monthly){

    # Get the results from that
    x_rstars <- rstars(
      data.timeseries = as.data.frame(
        .x_monthly[,c("time", "stemp_model_resid")]),
      l.cutoff = 12*7,
      pValue = 0.05,
      Huber = 1,
      Endfunction = T,
      preWhitening = T,
      OLS = F,
      MPK = T,
      IP4 = F,
      SubsampleSize = (12*7 + 1)/3,
      returnResults = T)

    return(x_rstars)

    }, .id = "area_id")



#Bottom temperature
bot_temp_monthly_shifts <- monthly_fvcom_temps %>%
  split(.$area_id) %>%
  map_dfr(function(.x_monthly){

    # Get the results from that
    x_rstars <- rstars(
      data.timeseries = as.data.frame(
        .x_monthly[,c("time", "btemp_model_resid")]),
      l.cutoff = 12*7,
      pValue = 0.05,
      Huber = 1,
      Endfunction = T,
      preWhitening = T,
      OLS = F,
      MPK = T,
      IP4 = F,
      SubsampleSize = (12*7 + 1)/3,
      returnResults = T)

    return(x_rstars)

    }, .id = "area_id")



# Save them
write_csv(surf_temp_monthly_shifts, here::here("rstars_results/lobecol_stemp_monthly_shifts_detrended.csv"))
write_csv(bot_temp_monthly_shifts, here::here("rstars_results/lobecol_btemp_monthly_shifts_detrended.csv"))

Detrended Temp Shifts

Code
# Load that data again
surf_temp_monthly_shifts <- read_csv(here::here("rstars_results/lobecol_stemp_monthly_shifts_detrended.csv"))

bot_temp_monthly_shifts <- read_csv(here::here("rstars_results/lobecol_btemp_monthly_shifts_detrended.csv"))

# Summarise them and put them together
mst <- surf_temp_monthly_shifts %>% filter(RSI != 0) %>% 
  mutate(var = "monthly surface temperature") %>% 
  dplyr::select(area_id, time, RSI, var)
mbt <- bot_temp_monthly_shifts %>% filter(RSI != 0) %>% 
  mutate(var = "monthly bottom temperature") %>% 
  dplyr::select(area_id, time, RSI, var)


# Combine results for plotting
stars_results_summ <- bind_rows(list(mst, mbt)) %>% 
  mutate(
    shift_direction = if_else(RSI>0, "Shift to Warm", "Shift to Cool"), 
    area_id = factor(area_id, levels = areas_northsouth),
    year = year(time))


# Get Monthly Residuals in long form
resids_monthly_long <- monthly_fvcom_temps %>%
  rename(
    `monthly surface temperature` = stemp_model_resid,
    `monthly bottom temperature` = btemp_model_resid) %>% 
  pivot_longer(
    names_to = "var", 
    values_to = "temperature", 
    cols = ends_with("temperature"))



# Plot the breaks over the detrended timesiers
ggplot() +
  geom_line(
    data = resids_monthly_long,
    aes(time, temperature),
    linewidth = 0.2, alpha = 0.5) +
  # Add a vertical line using geom_segment with an arrow
  geom_segment(
    data = filter(
      stars_results_summ, 
      str_detect(var, "monthly"),
      str_detect(shift_direction, "Warm")),
    linewidth = 0.8,
    aes(x = time, xend = time, y = -Inf, , yend = Inf, 
        color = shift_direction),  
    arrow = arrow(length = unit(0.3, "cm"), ends = "last")) +
  geom_segment(
    data = filter(
      stars_results_summ, 
      str_detect(var, "monthly"),
      !str_detect(shift_direction, "Warm")),
    linewidth = 0.8,
    aes(x = time, xend = time, y = -Inf, , yend = Inf, 
        color = shift_direction),  
    arrow = arrow(length = unit(0.3, "cm"), ends = "first")) +
  scale_color_gmri() +
  scale_y_continuous(
    labels = scales::label_number(suffix = deg_c)) +
  facet_grid(
    area_id~var, 
    labeller = label_wrap_gen(),
    scales = "free") +
  theme(
    legend.position = "bottom",
    strip.text.y = element_text(angle = 0)) + 
  labs(
    title = "FVCOM Hindcast Temperatures",
    subtitle = "STARS Regime Shift Index - Detrended Temperatures",
    y = "Temperature Anomaly",
    color = "")

Monthly Salinity

Salinity was processed separately and is only available as monthly data. Regime shift checks will only be performed on the detrended timeseries based on how the temperature results went. Without removing seasonality the regime change is lost in the noise of within-year variability.

Code
# Fit the model (by area_id) to detrend and remove seasons
# Model a linear trend and the seasonal cycle
monthly_fvcom_sal <- monthly_fvcom_sal %>% 
  mutate(month = month(time),
         yr_num = year(time)) %>% 
  split(.$area_id) %>% 
  map_dfr(function(area_x_monthly){
    
    surface_model <- gam(
      surface_salinity ~ month + yr_num,
      data = area_x_monthly,
      method = "REML")
    bottom_model <- gam(
      bottom_salinity ~ month + yr_num,
      data = area_x_monthly,
      method = "REML")
    
    # Add back the results to the model data
    detrend_out <- area_x_monthly %>% 
      mutate(
        ssal_model_fit   = surface_model$fitted.values,
        ssal_model_resid = surface_model$residuals,
        bsal_model_fit   = bottom_model$fitted.values,
        bsal_model_resid = bottom_model$residuals)
    return(detrend_out)
    
  }, .id = "area_id") %>% 
  mutate(area_id = factor(area_id, levels = areas_northsouth))
Code
# Take the input data,
# Get monthly averages
# remove trend and long-term monthly means
# run STARS routine again


# Run the monthly versions
surf_sal_monthly_shifts <- monthly_fvcom_sal %>%
  split(.$area_id) %>%
  map_dfr(function(.x){

    # Get the results from the detrended surface salinities
    x_rstars <- rstars(
      data.timeseries = as.data.frame(
        .x[,c("time", "ssal_model_resid")]),
      l.cutoff = 12*7,
      pValue = 0.05,
      Huber = 1,
      Endfunction = T,
      preWhitening = T,
      OLS = F,
      MPK = T,
      IP4 = F,
      SubsampleSize = (12*7 + 1)/3,
      returnResults = T)

    return(x_rstars)

    }, .id = "area_id")



#Bottom Salinity
bot_sal_monthly_shifts <- monthly_fvcom_sal %>%
  split(.$area_id) %>%
  map_dfr(function(.x){

    # Get the results from the detrended bottom salinities
    x_rstars <- rstars(
      data.timeseries = as.data.frame(
        .x[,c("time", "bsal_model_resid")]),
      l.cutoff = 12*7,
      pValue = 0.05,
      Huber = 1,
      Endfunction = T,
      preWhitening = T,
      OLS = F,
      MPK = T,
      IP4 = F,
      SubsampleSize = (12*7 + 1)/3,
      returnResults = T)

    return(x_rstars)

    }, .id = "area_id")





# Save them
write_csv(surf_sal_monthly_shifts, here::here("rstars_results/lobecol_ssal_monthly_shifts_detrended.csv"))
write_csv(bot_sal_monthly_shifts, here::here("rstars_results/lobecol_bsal_monthly_shifts_detrended.csv"))

Plotting Salinity Breaks

Code
surf_sal_monthly_shifts <- read_csv(here::here("rstars_results/lobecol_ssal_monthly_shifts_detrended.csv"))
bot_sal_monthly_shifts <- read_csv(here::here("rstars_results/lobecol_bsal_monthly_shifts_detrended.csv"))

mss <- surf_sal_monthly_shifts %>% 
  filter(RSI != 0) %>% 
  mutate(var = "surface salinity") %>% 
  dplyr::select(area_id, time, RSI, var)

mbs <- bot_sal_monthly_shifts %>% 
  filter(RSI != 0) %>% 
  mutate(var = "bottom salinity") %>% 
  dplyr::select(area_id, time, RSI, var)


# Combine results for plotting
sal_results_summ <- bind_rows(list(mss, mbs)) %>% 
  mutate(
    shift_direction = if_else(RSI > 0, "Shift to Salt", "Shift to Fresh"),
    area_id = factor(area_id, levels = areas_northsouth),
    year = year(time))

# Plot the breaks over the
monthly_fvcom_sal  %>% 
  select(-c(bottom_salinity, surface_salinity)) %>% 
  rename(
    `bottom salinity` = bsal_model_resid,
    `surface salinity` = ssal_model_resid) %>% 
  pivot_longer(
    cols = ends_with("salinity"), 
    names_to = "var", 
    values_to = "salinity") %>% 
  mutate(var = str_replace_all(var, "_", " "),
         area_id = factor(area_id, levels = areas_northsouth)) %>% 
  ggplot() +
  geom_line(
    aes(time, salinity),
    linewidth = 0.2, alpha = 0.5) +
  geom_segment(
    data = filter(
      sal_results_summ, 
      str_detect(shift_direction, "Salt")),
    linewidth = 0.8,
    aes(x = time, xend = time, y = -Inf, , yend = Inf, 
        color = shift_direction),  
    arrow = arrow(length = unit(0.3, "cm"), ends = "last")) +
  geom_segment(
    data = filter(
      sal_results_summ, 
      !str_detect(shift_direction, "Salt")),
    linewidth = 0.8,
    aes(x = time, xend = time, y = -Inf, , yend = Inf, 
        color = shift_direction),  
    arrow = arrow(length = unit(0.3, "cm"), ends = "first")) +
  scale_color_gmri() +
  facet_grid(area_id~var, labeller = label_wrap_gen(), scales = "free") +
  theme(
    panel.grid.major = element_blank(),
    legend.position = "bottom",
    strip.text.y = element_text(angle = 0)) + 
  labs(color = "",
       y = "Salinity Anomaly",
       title = "FVCOM Hindcast Salinity",
       subtitle = "STARS Regime Shift Index - Detrended Salinity"
       )


Days In/Out Temperature Preferences

Things we were thinking about

days over 20 is too high for 12 degrees is the magic number for larvae

Metabolism/Stress at 18C

Code
# # The degday functions were built with the ability to model daily cycles
# # the sine and triangle methods accomodate this
# # since we only have daily data the simple average is probably the way to do it
# thresh_low <- 10
# thresh_up  <- 18
# lobecol_dd <- daily_fvcom_temps %>%
#   group_by(area_id) %>% 
#   mutate(
#     opt_btemp_dd = dd_calc(
#       daily_min = bottom_t, 
#       daily_max = bottom_t, 
#       thresh_low = thresh_low, 
#       thresh_up = thresh_up, 
#       method = "simp_avg"),
#     opt_btemp  = if_else(between(bottom_t, 10,18), 1, 0),
#     stress_btemp  = if_else(bottom_t > 18, 1, NA),
#     cold_btemp  = if_else(bottom_t < 10, 1, NA))
# 
# 
# lobecol_dd %>% 
#   mutate(year = lubridate::year(time)) %>% 
#   group_by(area_id, year) %>% 
#   summarise(
#     across(
#       ends_with("temp"), 
#       ~sum(.x, na.rm = T)),
#     .groups = "drop") %>% 
#   pivot_longer(
#     cols = ends_with("temp"), 
#     names_to = "var", 
#     values_to = "totals") %>% 
#   mutate(area_id = factor(area_id, levels = areas_northsouth),
#          var = case_when(
#            var == "opt_btemp" ~ "Preferred Bottom Temperatures 10-18C", 
#            var == "stress_btemp" ~ "Heat Stress Conditions >18C",
#            var == "cold_btemp" ~ "Below Preferred Conditions <10C")) %>% 
#   ggplot() +
#   geom_col(aes(year, y = totals, fill = var), 
#            color = "white", width = 1, linewidth = 0.1) + 
#    facet_grid(area_id~.) +
#   scale_x_continuous(expand = expansion(add = c(0.15,0.15))) +
#   scale_fill_manual(values = c("lightblue",  "#ea4f12", "#057872")) +
#   theme(strip.text.y = element_text(angle = 0),
#       legend.position = "bottom") +
#   guides(fill = guide_legend(
#     nrow = 3, 
#     title.position = "top",
#     title.hjust = 0.5))+
#   labs(y = "Days in Range",
#        fill = "Daily Temperature Conditions", color = "",
#        title = "FVCOM Bottom Temperature Suitability")
# 

Other Environmental Features

In addition to temperature, we have the additional environmental covariates as well.

Code
# list(
#   "PPD" = ecodata::chl_pp,
#   "Z" = ecodata::chl_pp)
# 
# 
# ecodata::plot_nao()
# ecodata::plot_gsi()