Detailing common methods and preprocessing steps for regime shift detection methods
Published
August 19, 2025
Code
# Packages{library(here)library(tidyverse)library(rshift)library(gmRi)library(mgcv)library(EnvCpt)library(showtext)library(patchwork)library(degday)}# Make sure lag is done correctlyconflicted::conflict_prefer("lag", "stats")conflicted::conflict_prefer("filter", "dplyr")conflicted::conflict_prefer("select", "dplyr")# Path to timeserieslob_ecol_path <-cs_path("mills", "Projects/Lobster ECOL")# Plot themetheme_set(theme_gmri_simple(legend.position ="bottom"))# Factor Levels# New areasareas_northsouth <-c("eastern maine", "central maine", "western maine", "eastern mass","southern mass", "rhode island shore","long island sound", "new jersey shore", "five fathom bank", "virginia shore","gom_gbk", "sne")deg_c <-"\u00b0C"
Code
# Use GMRI style for fontsuse_gmri_style_rmd()
Code
# ------- New Regions# Load the data for the new regionsdaily_fvcom_temps <-read_csv(str_c(lob_ecol_path, "FVCOM_processed/area_timeseries/new_regions_fvcom_temperatures_daily.csv")) %>%mutate(yday = lubridate::yday(time),year = lubridate::year(time),month = lubridate::month(time),month_lab =factor(month.abb[month(time)], levels = month.abb), depth_type =if_else(area_id %in%c("gom_gbk", "SNE"), "offshore", "nearshore"),area_id =factor(area_id, levels = areas_northsouth) )# Monthly Salinitymonthly_fvcom_sal <-read_csv(str_c(lob_ecol_path, "FVCOM_processed/area_timeseries/new_regions_fvcom_salinity_monthly_gom3.csv")) %>%mutate(month = lubridate::month(time),month_lab =factor(month.abb[month(time)], levels = month.abb),depth_type =if_else(area_id %in%c("gom_gbk", "SNE"), "offshore", "nearshore"),area_id =factor(area_id, levels = areas_northsouth) )
STARS Regime Shifts - LobECOL Areas
{rstars} Implementation
The following results extend the detailed approach of Stirnimann et al. 2019’s {rstars} repository.
Code
# Stirnimann used these values in their paper:# l = 5,10,15,17.5 years, with monthly data# Huber = 1# Subsampling = (l + 1)/3# Load the function(s)source(here::here("rstars-master","rSTARS.R"))
Data Pre-Processing
For our results we’ve expanded on their approach with two additional pre-procesing steps recommended by Rodionov (pretty sure) & in the recent review by Lund et al. 2023’s.
These two additional steps include removing the seasonal cycle using a GAM and a cyclic cubic regression spline to model the seasonal cycle. The other additional pre-processing step is the removal of the long-term trend. To illustrate these pre-processing steps, a daily bottome temperature timeseries for Gulf of Maine / Georges Bank will be used below:
Code
# Test timeseriestest_area <-"gom_gbk"tester <-filter(daily_fvcom_temps, area_id == test_area) test_sal <-filter(monthly_fvcom_sal, area_id == test_area) # Plot Surface and Bottomp1 <-ggplot(tester, aes(time)) +geom_line(aes(y = surface_t, color ="Surface Temperature"), alpha =0.8) +geom_line(aes(y = bottom_t, color ="Bottom Temperature"), alpha =0.8) +scale_color_gmri() +labs(y ="Temperature", title = test_area, subtitle ="Temperatures")p2 <-ggplot(test_sal, aes(time)) +geom_line(aes(y = surface_salinity, color ="Surface Salinity"), alpha =0.8) +geom_line(aes(y = bottom_salinity, color ="Bottom Salinity"), alpha =0.8) +scale_color_gmri() +labs(y ="Salinity", subtitle ="Salinity")p1/p2
Removing Seasonal Cycle
We can model this periodicity to get a sense of the average temperature based on the day of the year. As a pre-processing step
Seasonal temperatures are at their lowest during February-March, and they peak during July-August. Each individual year will vary, but the seasonal period is quite consistent.
Code
# Model a linear trend and the seasonal cyclest_seasonality_gam <-gam( surface_t ~s(yday, bs ="cc"), data = tester,method ="REML")bt_seasonality_gam <-gam( bottom_t ~s(yday, bs ="cc"), data = tester,method ="REML")# Add the predictions and residualsbt_season_test <- broom::augment(x = bt_seasonality_gam) %>%rename(bt_seasonal_fit = .fitted) %>%distinct(yday, bt_seasonal_fit)# Add the predictions and residualsst_season_test <- broom::augment(x = st_seasonality_gam) %>%rename(st_seasonal_fit = .fitted) %>%distinct(yday, st_seasonal_fit) # Put them togetherseason_test <-left_join(bt_season_test, st_season_test)
Code
# Plot what that looke like on an annual time scalep1 <-ggplot(tester, aes(x = yday)) +geom_line(aes(y = bottom_t, group =year(time), color ="Bottom Temperature"), linewidth =0.5, alpha =0.25) +geom_smooth(data = tester,aes(y = bottom_t, color ="Bottom Temperature"), method ="gam", se = F,alpha =0.75, linewidth =1.5) +geom_line(aes(y = surface_t, group =year(time), color ="Surface Temperature"), linewidth =0.5, alpha =0.25) +geom_smooth(data = tester,aes(y = surface_t, color ="Surface Temperature"), method ="gam", se = F,alpha =0.75, linewidth =1.5) +scale_color_gmri() +theme(legend.position ="bottom") +labs(color ="Data",y ="Temperature",x ="Day of Year",title = test_area, subtitle ="Temperature Seasonality")p2 <- test_sal %>%pivot_longer(cols =ends_with("salinity"), names_to ="var", values_to ="salinity") %>%group_by(var, month_lab) %>%summarise(avg_monthly =mean(salinity, na.rm = T)) %>%ggplot() +geom_point(data = test_sal,aes(x = month_lab, y = bottom_salinity, color ="bottom_salinity"),position =position_jitter(height =0, width =0.1),alpha =0.35) +geom_point(data = test_sal,aes(x = month_lab, y = surface_salinity, color ="surface_salinity"),position =position_jitter(height =0, width =0.1),alpha =0.35) +geom_point(aes(month_lab, avg_monthly, color = var),shape ="-", size =20) +scale_color_gmri() +labs(color ="Data",y ="Salinity",x ="Month",subtitle ="Salinity Seasonality")p1/p2
Removal of seasonal (monthly) means results in timeseries that look like this:
Code
# Remove the Daily Seasonalitytester %>%left_join(season_test, by =join_by("yday")) %>%mutate(st_resid = surface_t - st_seasonal_fit,bt_resid = bottom_t - bt_seasonal_fit) %>%ggplot() +geom_line(aes(time, st_resid, color ="Surface Temperature"), alpha =0.75) +geom_line(aes(time, bt_resid, color ="Bottom Temperature"), alpha =0.75) +geom_hline(yintercept =0, linewidth =2, alpha =0.75) +scale_color_gmri() +labs(y ="Seasonal Temperature Anomaly",title ="Removal of Seasonal Temperature Cycle")
The RSTARS approach is a sequential evaluation of whether new data falls within a probable range of values based on the mean/variance of data from some starting “regime”. For cases when there exists is a long-term trend, this approach can sometimes treat a gradual transition as regime shift, which we also don’t want.
De-trending can be done quickly with lm
Code
# linear model with timetester_trend_mod <-lm(bottom_t ~ time, tester)# residuals are the detrended timeseries# we can re-add global mean if we want it in original unitstrend_tester <- broom::augment(x = tester_trend_mod) %>%rename(trend_fit = .fitted,trend_resid = .resid)# Plot detrended timeseries??geom_labellinetrend_tester %>%ggplot() +geom_line(aes(time, bottom_t),color ="gray60", alpha =0.45) +geom_line(aes(time, trend_resid +coef(tester_trend_mod)[[1]]), color ="gray20", alpha =0.65) + geomtextpath::geom_labelsmooth(aes(time, bottom_t, color ="Bottom Temperature", label ="Original"),method ="lm", se = F,color ="gray50") + geomtextpath::geom_labelsmooth(method ="lm", se = F,aes(time, trend_resid +coef(tester_trend_mod)[[1]], label ="Trend-Removed"),color ="gray10") +labs(title ="Detrending Daily Timeseries", color ="Data")
Removing Seasonalality and Long-Term Trends
We can easily implement the linear long-term trend and the seasonal periodicity using the GAM framework. Residuals from this model are what we will pass along to the RSTARS package with the recommended prewhitening routines applied.
Code
# Model a linear trend and the seasonal cycleseason_trend_gam <-gam( bottom_t ~s(yday, bs ="cc") + time, data = tester,method ="REML")# save the resultsseason_trend_results <- broom::augment(x = season_trend_gam) %>%rename(seasonal_fit = .fitted,seasonal_resid = .resid)
This is what that seasonal cycle looks like with the long-term trend compared to
Code
# Plot the fit of that processseason_trend_results %>%ggplot(aes(x = time)) +geom_line(aes(y = seasonal_fit, color ="Seasonal Cycle + Trend to Remove"), alpha =0.8, linewidth =1) +geom_line(aes(y = bottom_t, color ="Original Timeseries"), alpha =0.8, linewidth =1) +scale_color_gmri() +labs(color ="Data", y ="Temperature", title ="Seasonal Cycle + Warming Trend",subtitle ="Removed to isolate regime change unattributable to known cycles")
And this is the timeseries that would go into rstars for regime shift detection:
Code
season_trend_results %>%ggplot(aes(x = time)) +geom_line(aes(y = seasonal_resid), alpha =0.7) +labs(color ="Data", y ="Temperature Anomaly", title =str_c(test_area, " - Detrended Timeseries"))
RSTARS Daily with Full Pre-Processing Routine
The last step is to run it through the RSTARS algorithm and specify a few more arguments for how to treat the timeseries.
We need to set arguments for regime length, outlier weighting, and for our regime probability cutoff. There are also options for prewhitening methods which help address autocorrelation in the timeseries.
Code
# Run it with the same presets that were uses in Stirnimann paper# Stirnimann used these values in their paper:# l = 5,10,15,17.5 years, with monthly data# Huber = 1# Subsampling = (l + 1)/3# He also recommended using prewhitening, and recommended MPK and IP4# Or do the prewhitening on the detrended data?rstars_test <-rstars(data.timeseries =as.data.frame(season_trend_results[,c("time", "seasonal_resid")]), l.cutoff =365*7, pValue =0.05, Huber =1, Endfunction = T, # Some behavior at the end of the timeseriespreWhitening = T, # Apply prewhitening T/FOLS = F, # OLSprewhitening method T/FMPK = T, # Marriott-Pope + Kennedy prewhitening method T/FIP4 = F, # IP4 prewhitening method T/FSubsampleSize = (365*7+1) /3, # subsampling rate for hubers + prewhiteningreturnResults = T) # Return the results as a dataframe# Results now look like this:ggplot() +geom_line(data = season_trend_results,aes(time, seasonal_resid), linewidth =0.5, alpha =0.5) +geom_line(data = rstars_test,aes(time, regime_mu), linewidth =1) +geom_vline(data =filter(rstars_test, RSI !=0),aes(xintercept = time), color ="red", linewidth =1) +labs(x ="", y ="Detrended Bottom Temp Anomaly",title ="STARS Results - After full pre-processing steps",subtitle = test_area)
RSTARS Daily
If we apply the above steps to each daily bottom temperature timeseries we can return the following results:
Code
# # Run them all# bot_temp_shifts <- daily_fvcom_temps %>%# mutate(yday = lubridate::yday(time)) %>%# split(.$area_id) %>%# map_dfr(function(.x){# # # Fit the model to detrend and remove seasons# # Model a linear trend and the seasonal cycle# season_trend_model <- gam(# bottom_t ~ s(yday, bs = "cc") + time,# data = .x,# method = "REML")# # # save the results# preprocessed_results <- broom::augment(x = season_trend_model) %>%# rename(# model_fit = .fitted,# model_resid = .resid)# # # # Get the results from that# x_rstars <- rstars(# data.timeseries = as.data.frame(preprocessed_results[,c("time", "model_resid")]),# l.cutoff = 365*7,# pValue = 0.05,# Huber = 1,# Endfunction = T,# preWhitening = T,# OLS = F,# MPK = T,# IP4 = F,# SubsampleSize = (365*7 + 1)/3,# returnResults = T)# # return(x_rstars)# # }, .id = "area_id")# # # # Save it# write_csv(bot_temp_shifts, here::here("rstars_results/lobecol_btemp_shifts_detrended.csv"))
Repeating the process for daily sea surface temperatures gives us these RSI results:
Code
# # Run them all# surf_temp_shifts <- daily_fvcom_temps %>%# mutate(yday = lubridate::yday(time)) %>%# split(.$area_id) %>%# map_dfr(function(.x){# # # Fit the model to detrend and remove seasons# # Model a linear trend and the seasonal cycle# season_trend_model <- gam(# surface_t ~ s(yday, bs = "cc") + time,# data = .x,# method = "REML")# # # save the results# preprocessed_results <- broom::augment(x = season_trend_model) %>%# rename(# model_fit = .fitted,# model_resid = .resid)# # # # Get the results from that# x_rstars <- rstars(# data.timeseries = as.data.frame(preprocessed_results[,c("time", "model_resid")]),# l.cutoff = 365*7,# pValue = 0.05,# Huber = 1,# Endfunction = T,# preWhitening = T,# OLS = F,# MPK = T,# IP4 = F,# SubsampleSize = (365*7 + 1)/3,# returnResults = T)# # return(x_rstars)# # }, .id = "area_id")# # # # Save it# write_csv(surf_temp_shifts, here::here("rstars_results/lobecol_stemp_shifts_detrended.csv"))
Daily Temperatures Shifts
The following results show the regime shift index results from detrended dailt temperatures:
Code
# Load that data againbot_temp_shifts <-read_csv(here::here("rstars_results/lobecol_btemp_shifts_detrended.csv"))# Load that data againsurf_temp_shifts <-read_csv(here::here("rstars_results/lobecol_stemp_shifts_detrended.csv"))# # Plot the RSI# p1 <- surf_temp_shifts %>% # ggplot() +# geom_line(aes(time, RSI), linewidth = 0.5, alpha = 0.35) +# geom_hline(yintercept = 0) +# facet_grid() +# labs(# x = "Time",# y = "RSI",# title = "All Areas, Surface Temperature RSI")# # # Plot the RSI# p2 <- bot_temp_shifts %>% # ggplot() +# geom_line(aes(time, RSI), linewidth = 0.5, alpha = 0.35) +# geom_hline(yintercept = 0) +# facet_grid() +# labs(# x = "Time",# y = "RSI",# title = "All Areas, Bottom Temperature RSI")# # p1/p2
Code
# better daily plots# Summarise them and put them togethermst_daily <- surf_temp_shifts %>%filter(RSI !=0) %>%mutate(var ="daily surface temperature") %>% dplyr::select(area_id, time, RSI, var)mbt_daily <- bot_temp_shifts %>%filter(RSI !=0) %>%mutate(var ="daily bottom temperature") %>% dplyr::select(area_id, time, RSI, var)# Combine results for plottingresults_daily_summ <-bind_rows(list(mst_daily, mbt_daily)) %>%mutate(shift_direction =if_else(RSI >0, "Shift to Warm", "Shift to Cool"), area_id =factor(area_id, levels = areas_northsouth),year =year(time))# Get Monthly temperatures in long formtemps_daily_long <- daily_fvcom_temps %>%rename(`daily surface temperature`= surface_t,`daily bottom temperature`= bottom_t) %>%pivot_longer(names_to ="var", values_to ="temperature", cols =ends_with("temperature"))# Plot the breaks over the timeseriesggplot() +geom_line(data = temps_daily_long,aes(time, temperature),linewidth =0.2, alpha =0.5) +# Add a vertical line using geom_segment with an arrowgeom_segment(data =filter( results_daily_summ, str_detect(shift_direction, "Warm")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="last")) +geom_segment(data =filter( results_daily_summ, !str_detect(shift_direction, "Warm")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="first")) +scale_color_gmri() +scale_y_continuous(labels = scales::label_number(suffix = deg_c)) +facet_grid( area_id~var, labeller =label_wrap_gen(),scales ="free") +theme(legend.position ="bottom",strip.text.y =element_text(angle =0)) +labs(title ="FVCOM Hindcast Temperatures",subtitle ="STARS Regime Shift Index - Detrended Daily Data",y ="Temperature",color ="")
RSTARS on Monthly Temperatures
Other than temperature we have very few daily environmental timeseries.
We also are less interested in the time within a year that changes ocurred, and more interested in the general year/month that changes are ocurring in.
For these reasons and for consistency with the other environmental drivers, I will repeat these steps using monthly mean SSt & BT.
Code
# Take the input data,# Get monthly averages# remove trend and long-term monthly means# run STARS routine again# Run the monthly versionsmonthly_fvcom_temps <- daily_fvcom_temps %>%group_by(area_id, year, month) %>%summarise(surface_t =mean(surface_t, na.rm = T),bottom_t =mean(bottom_t, na.rm = T),.groups ="drop") %>%mutate(month =factor(month),yr_num =as.numeric(as.character(year)),time =as.Date(str_c( year,str_pad(month, side ="left", pad ="0", width =2),"15", sep ="-")))# Fit the model (by area_id) to detrend and remove seasons# Model a linear trend and the seasonal cyclemonthly_fvcom_temps <- monthly_fvcom_temps %>%split(.$area_id) %>%map_dfr(function(area_x_monthly){ monthly_stemp_model <-gam( surface_t ~ month + yr_num,data = area_x_monthly,method ="REML") monthly_btemp_model <-gam( bottom_t ~ month + yr_num,data = area_x_monthly,method ="REML")# Add back the results to the model data detrend_out <- area_x_monthly %>%mutate(stemp_model_fit = monthly_stemp_model$fitted.values,stemp_model_resid = monthly_stemp_model$residuals,btemp_model_fit = monthly_btemp_model$fitted.values,btemp_model_resid = monthly_btemp_model$residuals)return(detrend_out) }, .id ="area_id") %>%mutate(area_id =factor(area_id, levels = areas_northsouth))
Recording Rates of Change
It is usefull to be aware on how much each area is warming as a long-term rate and whether or not these are significant baseline levels of warming or not. The following table reports the annual warming rate and whether or not this is significant.
Code
library(trend)# # Mann-Kendall Test# # Kendall::MannKendall(tester$surface_t)$sl# str(trend::mk.test(tester$surface_t))# trend::mk.test(tester$surface_t)$p.value < 0.05# # Seasonal mann-kendall# str(trend::smk.test(ts(tester$surface_t, frequency = 365)))# trend::smk.test(ts(tester$surface_t, frequency = 365))$p.value < 0.05# # sens slope# trend::sens.slope(tester$surface_t) # sens slopedaily_fvcom_temps %>%split(.$area_id) %>%map_dfr(function(x){ surface_warming <- trend::smk.test(ts(x$surface_t, frequency =365))$p.value <0.05 bottom_warming <- trend::smk.test(ts(x$bottom_t, frequency =365))$p.value <0.05 surface_annual_rate <-ifelse( surface_warming, coef(lm(surface_t ~ time, data = x))[[2]] *365*10,NA) bottom_annual_rate <-ifelse( bottom_warming, coef(lm(surface_t ~ time, data = x))[[2]] *365*10,NA)return(tibble("Surface Warming"= surface_warming,"Decadal Surface Warming"=round(surface_annual_rate, 2),"Bottom Warming"= bottom_warming,"Decadal Bottom Warming"=round(bottom_annual_rate, 2))) }, .id ="area_id") %>% gt::gt() %>% gt::tab_header(title ="Long Term Regional Warming Rates",subtitle ="Trend Significance Evaluated with Seasonal Mann-Kendall Trend Test (Hirsch-Slack Test)")
Long Term Regional Warming Rates
Trend Significance Evaluated with Seasonal Mann-Kendall Trend Test (Hirsch-Slack Test)
area_id
Surface Warming
Decadal Surface Warming
Bottom Warming
Decadal Bottom Warming
eastern maine
TRUE
0.28
TRUE
0.28
central maine
TRUE
0.33
TRUE
0.33
western maine
TRUE
0.49
TRUE
0.49
eastern mass
TRUE
0.56
TRUE
0.56
southern mass
TRUE
0.33
TRUE
0.33
rhode island shore
TRUE
0.28
TRUE
0.28
long island sound
TRUE
0.21
TRUE
0.21
new jersey shore
TRUE
0.43
TRUE
0.43
five fathom bank
TRUE
0.15
TRUE
0.15
virginia shore
TRUE
0.39
TRUE
0.39
gom_gbk
TRUE
0.41
TRUE
0.41
sne
TRUE
0.43
TRUE
0.43
Raw Temperatures
Without removing the seasonal cycle or the long-term warming trend we get the following breakpoints:
Code
# Run them allsurf_temp_monthly_shifts_raw <- monthly_fvcom_temps %>%split(.$area_id) %>%map_dfr(function(.x_monthly){# Get the results from that x_rstars <-rstars(data.timeseries =as.data.frame( .x_monthly[,c("time", "surface_t")]),l.cutoff =12*7,pValue =0.05,Huber =1,Endfunction = T,preWhitening = T,OLS = F,MPK = T,IP4 = F,SubsampleSize = (12*7+1)/3,returnResults = T)return(x_rstars) }, .id ="area_id")#Bottom temperaturebot_temp_monthly_shifts_raw <- monthly_fvcom_temps %>%split(.$area_id) %>%map_dfr(function(.x_monthly){# Get the results from that x_rstars <-rstars(data.timeseries =as.data.frame( .x_monthly[,c("time", "bottom_t")]),l.cutoff =12*7,pValue =0.05,Huber =1,Endfunction = T,preWhitening = T,OLS = F,MPK = T,IP4 = F,SubsampleSize = (12*7+1)/3,returnResults = T)return(x_rstars) }, .id ="area_id")# # Save them# write_csv(surf_temp_monthly_shifts_raw, here::here("rstars_results/lobecol_stemp_monthly_shifts_raw.csv"))# write_csv(bot_temp_monthly_shifts_raw, here::here("rstars_results/lobecol_btemp_monthly_shifts_raw.csv"))
Raw Temp Shifts
If we pass monthly temperatures to the algorithm it does not detect any noticeable regime change in our regional timeseries.
Code
# Load that data againraw_surf_temp_monthly_shifts <-read_csv(here::here("rstars_results/lobecol_stemp_monthly_shifts_raw.csv"))raw_bot_temp_monthly_shifts <-read_csv(here::here("rstars_results/lobecol_btemp_monthly_shifts_raw.csv"))# Summarise them and put them togethermst_raw <- raw_surf_temp_monthly_shifts %>%filter(RSI !=0) %>%mutate(var ="raw monthly surface temperature") %>% dplyr::select(area_id, time, RSI, var)mbt_raw <- raw_bot_temp_monthly_shifts %>%filter(RSI !=0) %>%mutate(var ="raw monthly bottom temperature") %>% dplyr::select(area_id, time, RSI, var)# Combine results for plottingstars_results_summ_raw <-bind_rows(list(mst_raw, mbt_raw)) %>%mutate(shift_direction =if_else(RSI >0, "Shift to Warm", "Shift to Cool"), area_id =factor(area_id, levels = areas_northsouth),year =year(time))# Get Monthly temperatures in long formtemps_monthly_long <- monthly_fvcom_temps %>%rename(`monthly surface temperature`= surface_t,`monthly bottom temperature`= bottom_t) %>%pivot_longer(names_to ="var", values_to ="temperature", cols =ends_with("temperature"))# Plot the breaks over the timeseriesggplot() +geom_line(data = temps_monthly_long,aes(time, temperature),linewidth =0.2, alpha =0.5) +# Add a vertical line using geom_segment with an arrowgeom_segment(data =filter( stars_results_summ_raw, str_detect(var, "monthly"),str_detect(shift_direction, "Warm")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="last")) +geom_segment(data =filter( stars_results_summ_raw, !str_detect(shift_direction, "Warm")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="first")) +scale_color_gmri() +scale_y_continuous(labels = scales::label_number(suffix = deg_c)) +facet_grid( area_id~var, labeller =label_wrap_gen(),scales ="free") +theme(legend.position ="bottom",strip.text.y =element_text(angle =0)) +labs(title ="FVCOM Hindcast Temperatures",subtitle ="STARS Regime Shift Index - Raw Monthly Data",y ="Temperature",color ="")
Detrended Temperatures
For comparison, I’m also running the routine on timeseries where the long-term trend and the average monthly temperatures are removed.
Code
# Run them allsurf_temp_monthly_shifts <- monthly_fvcom_temps %>%split(.$area_id) %>%map_dfr(function(.x_monthly){# Get the results from that x_rstars <-rstars(data.timeseries =as.data.frame( .x_monthly[,c("time", "stemp_model_resid")]),l.cutoff =12*7,pValue =0.05,Huber =1,Endfunction = T,preWhitening = T,OLS = F,MPK = T,IP4 = F,SubsampleSize = (12*7+1)/3,returnResults = T)return(x_rstars) }, .id ="area_id")#Bottom temperaturebot_temp_monthly_shifts <- monthly_fvcom_temps %>%split(.$area_id) %>%map_dfr(function(.x_monthly){# Get the results from that x_rstars <-rstars(data.timeseries =as.data.frame( .x_monthly[,c("time", "btemp_model_resid")]),l.cutoff =12*7,pValue =0.05,Huber =1,Endfunction = T,preWhitening = T,OLS = F,MPK = T,IP4 = F,SubsampleSize = (12*7+1)/3,returnResults = T)return(x_rstars) }, .id ="area_id")# Save themwrite_csv(surf_temp_monthly_shifts, here::here("rstars_results/lobecol_stemp_monthly_shifts_detrended.csv"))write_csv(bot_temp_monthly_shifts, here::here("rstars_results/lobecol_btemp_monthly_shifts_detrended.csv"))
Detrended Temp Shifts
Code
# Load that data againsurf_temp_monthly_shifts <-read_csv(here::here("rstars_results/lobecol_stemp_monthly_shifts_detrended.csv"))bot_temp_monthly_shifts <-read_csv(here::here("rstars_results/lobecol_btemp_monthly_shifts_detrended.csv"))# Summarise them and put them togethermst <- surf_temp_monthly_shifts %>%filter(RSI !=0) %>%mutate(var ="monthly surface temperature") %>% dplyr::select(area_id, time, RSI, var)mbt <- bot_temp_monthly_shifts %>%filter(RSI !=0) %>%mutate(var ="monthly bottom temperature") %>% dplyr::select(area_id, time, RSI, var)# Combine results for plottingstars_results_summ <-bind_rows(list(mst, mbt)) %>%mutate(shift_direction =if_else(RSI>0, "Shift to Warm", "Shift to Cool"), area_id =factor(area_id, levels = areas_northsouth),year =year(time))# Get Monthly Residuals in long formresids_monthly_long <- monthly_fvcom_temps %>%rename(`monthly surface temperature`= stemp_model_resid,`monthly bottom temperature`= btemp_model_resid) %>%pivot_longer(names_to ="var", values_to ="temperature", cols =ends_with("temperature"))# Plot the breaks over the detrended timesiersggplot() +geom_line(data = resids_monthly_long,aes(time, temperature),linewidth =0.2, alpha =0.5) +# Add a vertical line using geom_segment with an arrowgeom_segment(data =filter( stars_results_summ, str_detect(var, "monthly"),str_detect(shift_direction, "Warm")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="last")) +geom_segment(data =filter( stars_results_summ, str_detect(var, "monthly"),!str_detect(shift_direction, "Warm")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="first")) +scale_color_gmri() +scale_y_continuous(labels = scales::label_number(suffix = deg_c)) +facet_grid( area_id~var, labeller =label_wrap_gen(),scales ="free") +theme(legend.position ="bottom",strip.text.y =element_text(angle =0)) +labs(title ="FVCOM Hindcast Temperatures",subtitle ="STARS Regime Shift Index - Detrended Temperatures",y ="Temperature Anomaly",color ="")
Monthly Salinity
Salinity was processed separately and is only available as monthly data. Regime shift checks will only be performed on the detrended timeseries based on how the temperature results went. Without removing seasonality the regime change is lost in the noise of within-year variability.
Code
# Fit the model (by area_id) to detrend and remove seasons# Model a linear trend and the seasonal cyclemonthly_fvcom_sal <- monthly_fvcom_sal %>%mutate(month =month(time),yr_num =year(time)) %>%split(.$area_id) %>%map_dfr(function(area_x_monthly){ surface_model <-gam( surface_salinity ~ month + yr_num,data = area_x_monthly,method ="REML") bottom_model <-gam( bottom_salinity ~ month + yr_num,data = area_x_monthly,method ="REML")# Add back the results to the model data detrend_out <- area_x_monthly %>%mutate(ssal_model_fit = surface_model$fitted.values,ssal_model_resid = surface_model$residuals,bsal_model_fit = bottom_model$fitted.values,bsal_model_resid = bottom_model$residuals)return(detrend_out) }, .id ="area_id") %>%mutate(area_id =factor(area_id, levels = areas_northsouth))
Code
# Take the input data,# Get monthly averages# remove trend and long-term monthly means# run STARS routine again# Run the monthly versionssurf_sal_monthly_shifts <- monthly_fvcom_sal %>%split(.$area_id) %>%map_dfr(function(.x){# Get the results from the detrended surface salinities x_rstars <-rstars(data.timeseries =as.data.frame( .x[,c("time", "ssal_model_resid")]),l.cutoff =12*7,pValue =0.05,Huber =1,Endfunction = T,preWhitening = T,OLS = F,MPK = T,IP4 = F,SubsampleSize = (12*7+1)/3,returnResults = T)return(x_rstars) }, .id ="area_id")#Bottom Salinitybot_sal_monthly_shifts <- monthly_fvcom_sal %>%split(.$area_id) %>%map_dfr(function(.x){# Get the results from the detrended bottom salinities x_rstars <-rstars(data.timeseries =as.data.frame( .x[,c("time", "bsal_model_resid")]),l.cutoff =12*7,pValue =0.05,Huber =1,Endfunction = T,preWhitening = T,OLS = F,MPK = T,IP4 = F,SubsampleSize = (12*7+1)/3,returnResults = T)return(x_rstars) }, .id ="area_id")# Save themwrite_csv(surf_sal_monthly_shifts, here::here("rstars_results/lobecol_ssal_monthly_shifts_detrended.csv"))write_csv(bot_sal_monthly_shifts, here::here("rstars_results/lobecol_bsal_monthly_shifts_detrended.csv"))
Plotting Salinity Breaks
Code
surf_sal_monthly_shifts <-read_csv(here::here("rstars_results/lobecol_ssal_monthly_shifts_detrended.csv"))bot_sal_monthly_shifts <-read_csv(here::here("rstars_results/lobecol_bsal_monthly_shifts_detrended.csv"))mss <- surf_sal_monthly_shifts %>%filter(RSI !=0) %>%mutate(var ="surface salinity") %>% dplyr::select(area_id, time, RSI, var)mbs <- bot_sal_monthly_shifts %>%filter(RSI !=0) %>%mutate(var ="bottom salinity") %>% dplyr::select(area_id, time, RSI, var)# Combine results for plottingsal_results_summ <-bind_rows(list(mss, mbs)) %>%mutate(shift_direction =if_else(RSI >0, "Shift to Salt", "Shift to Fresh"),area_id =factor(area_id, levels = areas_northsouth),year =year(time))# Plot the breaks over themonthly_fvcom_sal %>%select(-c(bottom_salinity, surface_salinity)) %>%rename(`bottom salinity`= bsal_model_resid,`surface salinity`= ssal_model_resid) %>%pivot_longer(cols =ends_with("salinity"), names_to ="var", values_to ="salinity") %>%mutate(var =str_replace_all(var, "_", " "),area_id =factor(area_id, levels = areas_northsouth)) %>%ggplot() +geom_line(aes(time, salinity),linewidth =0.2, alpha =0.5) +geom_segment(data =filter( sal_results_summ, str_detect(shift_direction, "Salt")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="last")) +geom_segment(data =filter( sal_results_summ, !str_detect(shift_direction, "Salt")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="first")) +scale_color_gmri() +facet_grid(area_id~var, labeller =label_wrap_gen(), scales ="free") +theme(panel.grid.major =element_blank(),legend.position ="bottom",strip.text.y =element_text(angle =0)) +labs(color ="",y ="Salinity Anomaly",title ="FVCOM Hindcast Salinity",subtitle ="STARS Regime Shift Index - Detrended Salinity" )
Days In/Out Temperature Preferences
Things we were thinking about
days over 20 is too high for 12 degrees is the magic number for larvae
Metabolism/Stress at 18C
Code
# # The degday functions were built with the ability to model daily cycles# # the sine and triangle methods accomodate this# # since we only have daily data the simple average is probably the way to do it# thresh_low <- 10# thresh_up <- 18# lobecol_dd <- daily_fvcom_temps %>%# group_by(area_id) %>% # mutate(# opt_btemp_dd = dd_calc(# daily_min = bottom_t, # daily_max = bottom_t, # thresh_low = thresh_low, # thresh_up = thresh_up, # method = "simp_avg"),# opt_btemp = if_else(between(bottom_t, 10,18), 1, 0),# stress_btemp = if_else(bottom_t > 18, 1, NA),# cold_btemp = if_else(bottom_t < 10, 1, NA))# # # lobecol_dd %>% # mutate(year = lubridate::year(time)) %>% # group_by(area_id, year) %>% # summarise(# across(# ends_with("temp"), # ~sum(.x, na.rm = T)),# .groups = "drop") %>% # pivot_longer(# cols = ends_with("temp"), # names_to = "var", # values_to = "totals") %>% # mutate(area_id = factor(area_id, levels = areas_northsouth),# var = case_when(# var == "opt_btemp" ~ "Preferred Bottom Temperatures 10-18C", # var == "stress_btemp" ~ "Heat Stress Conditions >18C",# var == "cold_btemp" ~ "Below Preferred Conditions <10C")) %>% # ggplot() +# geom_col(aes(year, y = totals, fill = var), # color = "white", width = 1, linewidth = 0.1) + # facet_grid(area_id~.) +# scale_x_continuous(expand = expansion(add = c(0.15,0.15))) +# scale_fill_manual(values = c("lightblue", "#ea4f12", "#057872")) +# theme(strip.text.y = element_text(angle = 0),# legend.position = "bottom") +# guides(fill = guide_legend(# nrow = 3, # title.position = "top",# title.hjust = 0.5))+# labs(y = "Days in Range",# fill = "Daily Temperature Conditions", color = "",# title = "FVCOM Bottom Temperature Suitability")#
Other Environmental Features
In addition to temperature, we have the additional environmental covariates as well.
---title: "RSTARS FVCOM"description: | Detailing common methods and preprocessing steps for regime shift detection methodsdate: "Updated on: `r Sys.Date()`"format: html: code-fold: true code-tools: true df-print: kable self-contained: trueexecute: echo: true warning: false message: false fig.align: "center" comment: ""---```{r}# Packages{library(here)library(tidyverse)library(rshift)library(gmRi)library(mgcv)library(EnvCpt)library(showtext)library(patchwork)library(degday)}# Make sure lag is done correctlyconflicted::conflict_prefer("lag", "stats")conflicted::conflict_prefer("filter", "dplyr")conflicted::conflict_prefer("select", "dplyr")# Path to timeserieslob_ecol_path <-cs_path("mills", "Projects/Lobster ECOL")# Plot themetheme_set(theme_gmri_simple(legend.position ="bottom"))# Factor Levels# New areasareas_northsouth <-c("eastern maine", "central maine", "western maine", "eastern mass","southern mass", "rhode island shore","long island sound", "new jersey shore", "five fathom bank", "virginia shore","gom_gbk", "sne")deg_c <-"\u00b0C"``````{r}#| label: style-sheet#| results: asis# Use GMRI style for fontsuse_gmri_style_rmd()``````{r}#| label: fonts-config#| echo: false# Path to the directory containing the font file (replace with your actual path)font_dir <-paste0(system.file("stylesheets", package ="gmRi"), "/GMRI_fonts/Avenir/")# Register the fontfont_add(family ="Avenir",file.path(font_dir, "LTe50342.ttf"),bold =file.path(font_dir, "LTe50340.ttf"),italic =file.path(font_dir, "LTe50343.ttf"),bolditalic =file.path(font_dir, "LTe50347.ttf"))# Load the fontshowtext::showtext_auto()``````{r}#| label: load timeseries# ------- New Regions# Load the data for the new regionsdaily_fvcom_temps <-read_csv(str_c(lob_ecol_path, "FVCOM_processed/area_timeseries/new_regions_fvcom_temperatures_daily.csv")) %>%mutate(yday = lubridate::yday(time),year = lubridate::year(time),month = lubridate::month(time),month_lab =factor(month.abb[month(time)], levels = month.abb), depth_type =if_else(area_id %in%c("gom_gbk", "SNE"), "offshore", "nearshore"),area_id =factor(area_id, levels = areas_northsouth) )# Monthly Salinitymonthly_fvcom_sal <-read_csv(str_c(lob_ecol_path, "FVCOM_processed/area_timeseries/new_regions_fvcom_salinity_monthly_gom3.csv")) %>%mutate(month = lubridate::month(time),month_lab =factor(month.abb[month(time)], levels = month.abb),depth_type =if_else(area_id %in%c("gom_gbk", "SNE"), "offshore", "nearshore"),area_id =factor(area_id, levels = areas_northsouth) )```# STARS Regime Shifts - LobECOL Areas# {rstars} ImplementationThe following results extend the detailed approach of Stirnimann et al. 2019's [{rstars}](https://github.com/LStirnimann/rstars) repository.```{r}# Stirnimann used these values in their paper:# l = 5,10,15,17.5 years, with monthly data# Huber = 1# Subsampling = (l + 1)/3# Load the function(s)source(here::here("rstars-master","rSTARS.R"))```# Data Pre-ProcessingFor our results we've expanded on their approach with two additional pre-procesing steps recommended by Rodionov (pretty sure) & in the recent review by Lund et al. 2023's.These two additional steps include removing the seasonal cycle using a GAM and a cyclic cubic regression spline to model the seasonal cycle. The other additional pre-processing step is the removal of the long-term trend. To illustrate these pre-processing steps, a daily bottome temperature timeseries for `Gulf of Maine / Georges Bank` will be used below:```{r}# Test timeseriestest_area <-"gom_gbk"tester <-filter(daily_fvcom_temps, area_id == test_area) test_sal <-filter(monthly_fvcom_sal, area_id == test_area) # Plot Surface and Bottomp1 <-ggplot(tester, aes(time)) +geom_line(aes(y = surface_t, color ="Surface Temperature"), alpha =0.8) +geom_line(aes(y = bottom_t, color ="Bottom Temperature"), alpha =0.8) +scale_color_gmri() +labs(y ="Temperature", title = test_area, subtitle ="Temperatures")p2 <-ggplot(test_sal, aes(time)) +geom_line(aes(y = surface_salinity, color ="Surface Salinity"), alpha =0.8) +geom_line(aes(y = bottom_salinity, color ="Bottom Salinity"), alpha =0.8) +scale_color_gmri() +labs(y ="Salinity", subtitle ="Salinity")p1/p2```### Removing Seasonal CycleWe can model this periodicity to get a sense of the average temperature based on the day of the year. As a pre-processing stepSeasonal temperatures are at their lowest during February-March, and they peak during July-August. Each individual year will vary, but the seasonal period is quite consistent.```{r}# Model a linear trend and the seasonal cyclest_seasonality_gam <-gam( surface_t ~s(yday, bs ="cc"), data = tester,method ="REML")bt_seasonality_gam <-gam( bottom_t ~s(yday, bs ="cc"), data = tester,method ="REML")# Add the predictions and residualsbt_season_test <- broom::augment(x = bt_seasonality_gam) %>%rename(bt_seasonal_fit = .fitted) %>%distinct(yday, bt_seasonal_fit)# Add the predictions and residualsst_season_test <- broom::augment(x = st_seasonality_gam) %>%rename(st_seasonal_fit = .fitted) %>%distinct(yday, st_seasonal_fit) # Put them togetherseason_test <-left_join(bt_season_test, st_season_test)``````{r}# Plot what that looke like on an annual time scalep1 <-ggplot(tester, aes(x = yday)) +geom_line(aes(y = bottom_t, group =year(time), color ="Bottom Temperature"), linewidth =0.5, alpha =0.25) +geom_smooth(data = tester,aes(y = bottom_t, color ="Bottom Temperature"), method ="gam", se = F,alpha =0.75, linewidth =1.5) +geom_line(aes(y = surface_t, group =year(time), color ="Surface Temperature"), linewidth =0.5, alpha =0.25) +geom_smooth(data = tester,aes(y = surface_t, color ="Surface Temperature"), method ="gam", se = F,alpha =0.75, linewidth =1.5) +scale_color_gmri() +theme(legend.position ="bottom") +labs(color ="Data",y ="Temperature",x ="Day of Year",title = test_area, subtitle ="Temperature Seasonality")p2 <- test_sal %>%pivot_longer(cols =ends_with("salinity"), names_to ="var", values_to ="salinity") %>%group_by(var, month_lab) %>%summarise(avg_monthly =mean(salinity, na.rm = T)) %>%ggplot() +geom_point(data = test_sal,aes(x = month_lab, y = bottom_salinity, color ="bottom_salinity"),position =position_jitter(height =0, width =0.1),alpha =0.35) +geom_point(data = test_sal,aes(x = month_lab, y = surface_salinity, color ="surface_salinity"),position =position_jitter(height =0, width =0.1),alpha =0.35) +geom_point(aes(month_lab, avg_monthly, color = var),shape ="-", size =20) +scale_color_gmri() +labs(color ="Data",y ="Salinity",x ="Month",subtitle ="Salinity Seasonality")p1/p2```Removal of seasonal (monthly) means results in timeseries that look like this:```{r}# Remove the Daily Seasonalitytester %>%left_join(season_test, by =join_by("yday")) %>%mutate(st_resid = surface_t - st_seasonal_fit,bt_resid = bottom_t - bt_seasonal_fit) %>%ggplot() +geom_line(aes(time, st_resid, color ="Surface Temperature"), alpha =0.75) +geom_line(aes(time, bt_resid, color ="Bottom Temperature"), alpha =0.75) +geom_hline(yintercept =0, linewidth =2, alpha =0.75) +scale_color_gmri() +labs(y ="Seasonal Temperature Anomaly",title ="Removal of Seasonal Temperature Cycle")# Remove mean monthly salinity tootest_sal %>%group_by(month_lab) %>%summarise(surf_sal_mean =mean(surface_salinity),bot_sal_mean =mean(bottom_salinity),.groups ="drop") %>%right_join(test_sal, join_by(month_lab)) %>%mutate(surface_resid = surface_salinity - surf_sal_mean,bottom_resid = bottom_salinity - bot_sal_mean) %>%ggplot() +geom_line(aes(time, surface_resid, color ="Surface Salinity"), alpha =0.75, linewidth =1) +geom_line(aes(time, bottom_resid, color ="Bottom Salinity"), alpha =0.75, linewidth =1) +geom_hline(yintercept =0, linewidth =2, alpha =0.75) +scale_color_gmri() +labs(y ="Seasonal Salinity Anomaly",title ="Removal of Long-Term Monthly Mean Salinity")```### Removing Long-Term TrendsThe RSTARS approach is a sequential evaluation of whether new data falls within a probable range of values based on the mean/variance of data from some starting "regime". For cases when there exists is a long-term trend, this approach can sometimes treat a gradual transition as regime shift, which we also don't want.De-trending can be done quickly with `lm````{r}# linear model with timetester_trend_mod <-lm(bottom_t ~ time, tester)# residuals are the detrended timeseries# we can re-add global mean if we want it in original unitstrend_tester <- broom::augment(x = tester_trend_mod) %>%rename(trend_fit = .fitted,trend_resid = .resid)# Plot detrended timeseries??geom_labellinetrend_tester %>%ggplot() +geom_line(aes(time, bottom_t),color ="gray60", alpha =0.45) +geom_line(aes(time, trend_resid +coef(tester_trend_mod)[[1]]), color ="gray20", alpha =0.65) + geomtextpath::geom_labelsmooth(aes(time, bottom_t, color ="Bottom Temperature", label ="Original"),method ="lm", se = F,color ="gray50") + geomtextpath::geom_labelsmooth(method ="lm", se = F,aes(time, trend_resid +coef(tester_trend_mod)[[1]], label ="Trend-Removed"),color ="gray10") +labs(title ="Detrending Daily Timeseries", color ="Data")```## Removing Seasonalality and Long-Term TrendsWe can easily implement the linear long-term trend and the seasonal periodicity using the GAM framework. Residuals from this model are what we will pass along to the RSTARS package with the recommended prewhitening routines applied.```{r}# Model a linear trend and the seasonal cycleseason_trend_gam <-gam( bottom_t ~s(yday, bs ="cc") + time, data = tester,method ="REML")# save the resultsseason_trend_results <- broom::augment(x = season_trend_gam) %>%rename(seasonal_fit = .fitted,seasonal_resid = .resid) ```This is what that seasonal cycle looks like with the long-term trend compared to ```{r}# Plot the fit of that processseason_trend_results %>%ggplot(aes(x = time)) +geom_line(aes(y = seasonal_fit, color ="Seasonal Cycle + Trend to Remove"), alpha =0.8, linewidth =1) +geom_line(aes(y = bottom_t, color ="Original Timeseries"), alpha =0.8, linewidth =1) +scale_color_gmri() +labs(color ="Data", y ="Temperature", title ="Seasonal Cycle + Warming Trend",subtitle ="Removed to isolate regime change unattributable to known cycles")```And this is the timeseries that would go into rstars for regime shift detection:```{r}season_trend_results %>%ggplot(aes(x = time)) +geom_line(aes(y = seasonal_resid), alpha =0.7) +labs(color ="Data", y ="Temperature Anomaly", title =str_c(test_area, " - Detrended Timeseries"))```# RSTARS Daily with Full Pre-Processing RoutineThe last step is to run it through the RSTARS algorithm and specify a few more arguments for how to treat the timeseries.We need to set arguments for regime length, outlier weighting, and for our regime probability cutoff. There are also options for prewhitening methods which help address autocorrelation in the timeseries.```{r}# Run it with the same presets that were uses in Stirnimann paper# Stirnimann used these values in their paper:# l = 5,10,15,17.5 years, with monthly data# Huber = 1# Subsampling = (l + 1)/3# He also recommended using prewhitening, and recommended MPK and IP4# Or do the prewhitening on the detrended data?rstars_test <-rstars(data.timeseries =as.data.frame(season_trend_results[,c("time", "seasonal_resid")]), l.cutoff =365*7, pValue =0.05, Huber =1, Endfunction = T, # Some behavior at the end of the timeseriespreWhitening = T, # Apply prewhitening T/FOLS = F, # OLSprewhitening method T/FMPK = T, # Marriott-Pope + Kennedy prewhitening method T/FIP4 = F, # IP4 prewhitening method T/FSubsampleSize = (365*7+1) /3, # subsampling rate for hubers + prewhiteningreturnResults = T) # Return the results as a dataframe# Results now look like this:ggplot() +geom_line(data = season_trend_results,aes(time, seasonal_resid), linewidth =0.5, alpha =0.5) +geom_line(data = rstars_test,aes(time, regime_mu), linewidth =1) +geom_vline(data =filter(rstars_test, RSI !=0),aes(xintercept = time), color ="red", linewidth =1) +labs(x ="", y ="Detrended Bottom Temp Anomaly",title ="STARS Results - After full pre-processing steps",subtitle = test_area)```### RSTARS DailyIf we apply the above steps to each daily bottom temperature timeseries we can return the following results:```{r}#| eval: false#| label: bottom temperature processing# # Run them all# bot_temp_shifts <- daily_fvcom_temps %>%# mutate(yday = lubridate::yday(time)) %>%# split(.$area_id) %>%# map_dfr(function(.x){# # # Fit the model to detrend and remove seasons# # Model a linear trend and the seasonal cycle# season_trend_model <- gam(# bottom_t ~ s(yday, bs = "cc") + time,# data = .x,# method = "REML")# # # save the results# preprocessed_results <- broom::augment(x = season_trend_model) %>%# rename(# model_fit = .fitted,# model_resid = .resid)# # # # Get the results from that# x_rstars <- rstars(# data.timeseries = as.data.frame(preprocessed_results[,c("time", "model_resid")]),# l.cutoff = 365*7,# pValue = 0.05,# Huber = 1,# Endfunction = T,# preWhitening = T,# OLS = F,# MPK = T,# IP4 = F,# SubsampleSize = (365*7 + 1)/3,# returnResults = T)# # return(x_rstars)# # }, .id = "area_id")# # # # Save it# write_csv(bot_temp_shifts, here::here("rstars_results/lobecol_btemp_shifts_detrended.csv"))```Repeating the process for daily sea surface temperatures gives us these RSI results:```{r}#| eval: false#| label: surface temperature processing# # Run them all# surf_temp_shifts <- daily_fvcom_temps %>%# mutate(yday = lubridate::yday(time)) %>%# split(.$area_id) %>%# map_dfr(function(.x){# # # Fit the model to detrend and remove seasons# # Model a linear trend and the seasonal cycle# season_trend_model <- gam(# surface_t ~ s(yday, bs = "cc") + time,# data = .x,# method = "REML")# # # save the results# preprocessed_results <- broom::augment(x = season_trend_model) %>%# rename(# model_fit = .fitted,# model_resid = .resid)# # # # Get the results from that# x_rstars <- rstars(# data.timeseries = as.data.frame(preprocessed_results[,c("time", "model_resid")]),# l.cutoff = 365*7,# pValue = 0.05,# Huber = 1,# Endfunction = T,# preWhitening = T,# OLS = F,# MPK = T,# IP4 = F,# SubsampleSize = (365*7 + 1)/3,# returnResults = T)# # return(x_rstars)# # }, .id = "area_id")# # # # Save it# write_csv(surf_temp_shifts, here::here("rstars_results/lobecol_stemp_shifts_detrended.csv"))```### Daily Temperatures ShiftsThe following results show the regime shift index results from detrended dailt temperatures:```{r}#| eval: true# Load that data againbot_temp_shifts <-read_csv(here::here("rstars_results/lobecol_btemp_shifts_detrended.csv"))# Load that data againsurf_temp_shifts <-read_csv(here::here("rstars_results/lobecol_stemp_shifts_detrended.csv"))# # Plot the RSI# p1 <- surf_temp_shifts %>% # ggplot() +# geom_line(aes(time, RSI), linewidth = 0.5, alpha = 0.35) +# geom_hline(yintercept = 0) +# facet_grid() +# labs(# x = "Time",# y = "RSI",# title = "All Areas, Surface Temperature RSI")# # # Plot the RSI# p2 <- bot_temp_shifts %>% # ggplot() +# geom_line(aes(time, RSI), linewidth = 0.5, alpha = 0.35) +# geom_hline(yintercept = 0) +# facet_grid() +# labs(# x = "Time",# y = "RSI",# title = "All Areas, Bottom Temperature RSI")# # p1/p2``````{r}# better daily plots# Summarise them and put them togethermst_daily <- surf_temp_shifts %>%filter(RSI !=0) %>%mutate(var ="daily surface temperature") %>% dplyr::select(area_id, time, RSI, var)mbt_daily <- bot_temp_shifts %>%filter(RSI !=0) %>%mutate(var ="daily bottom temperature") %>% dplyr::select(area_id, time, RSI, var)# Combine results for plottingresults_daily_summ <-bind_rows(list(mst_daily, mbt_daily)) %>%mutate(shift_direction =if_else(RSI >0, "Shift to Warm", "Shift to Cool"), area_id =factor(area_id, levels = areas_northsouth),year =year(time))# Get Monthly temperatures in long formtemps_daily_long <- daily_fvcom_temps %>%rename(`daily surface temperature`= surface_t,`daily bottom temperature`= bottom_t) %>%pivot_longer(names_to ="var", values_to ="temperature", cols =ends_with("temperature"))# Plot the breaks over the timeseriesggplot() +geom_line(data = temps_daily_long,aes(time, temperature),linewidth =0.2, alpha =0.5) +# Add a vertical line using geom_segment with an arrowgeom_segment(data =filter( results_daily_summ, str_detect(shift_direction, "Warm")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="last")) +geom_segment(data =filter( results_daily_summ, !str_detect(shift_direction, "Warm")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="first")) +scale_color_gmri() +scale_y_continuous(labels = scales::label_number(suffix = deg_c)) +facet_grid( area_id~var, labeller =label_wrap_gen(),scales ="free") +theme(legend.position ="bottom",strip.text.y =element_text(angle =0)) +labs(title ="FVCOM Hindcast Temperatures",subtitle ="STARS Regime Shift Index - Detrended Daily Data",y ="Temperature",color ="")```# RSTARS on Monthly TemperaturesOther than temperature we have very few daily environmental timeseries. We also are less interested in the time within a year that changes ocurred, and more interested in the general year/month that changes are ocurring in. For these reasons and for consistency with the other environmental drivers, I will repeat these steps using monthly mean SSt & BT.```{r}#| label: process monthly mean SST + BT# Take the input data,# Get monthly averages# remove trend and long-term monthly means# run STARS routine again# Run the monthly versionsmonthly_fvcom_temps <- daily_fvcom_temps %>%group_by(area_id, year, month) %>%summarise(surface_t =mean(surface_t, na.rm = T),bottom_t =mean(bottom_t, na.rm = T),.groups ="drop") %>%mutate(month =factor(month),yr_num =as.numeric(as.character(year)),time =as.Date(str_c( year,str_pad(month, side ="left", pad ="0", width =2),"15", sep ="-")))# Fit the model (by area_id) to detrend and remove seasons# Model a linear trend and the seasonal cyclemonthly_fvcom_temps <- monthly_fvcom_temps %>%split(.$area_id) %>%map_dfr(function(area_x_monthly){ monthly_stemp_model <-gam( surface_t ~ month + yr_num,data = area_x_monthly,method ="REML") monthly_btemp_model <-gam( bottom_t ~ month + yr_num,data = area_x_monthly,method ="REML")# Add back the results to the model data detrend_out <- area_x_monthly %>%mutate(stemp_model_fit = monthly_stemp_model$fitted.values,stemp_model_resid = monthly_stemp_model$residuals,btemp_model_fit = monthly_btemp_model$fitted.values,btemp_model_resid = monthly_btemp_model$residuals)return(detrend_out) }, .id ="area_id") %>%mutate(area_id =factor(area_id, levels = areas_northsouth))```### Recording Rates of ChangeIt is usefull to be aware on how much each area is warming as a long-term rate and whether or not these are significant baseline levels of warming or not. The following table reports the annual warming rate and whether or not this is significant.```{r}library(trend)# # Mann-Kendall Test# # Kendall::MannKendall(tester$surface_t)$sl# str(trend::mk.test(tester$surface_t))# trend::mk.test(tester$surface_t)$p.value < 0.05# # Seasonal mann-kendall# str(trend::smk.test(ts(tester$surface_t, frequency = 365)))# trend::smk.test(ts(tester$surface_t, frequency = 365))$p.value < 0.05# # sens slope# trend::sens.slope(tester$surface_t) # sens slopedaily_fvcom_temps %>%split(.$area_id) %>%map_dfr(function(x){ surface_warming <- trend::smk.test(ts(x$surface_t, frequency =365))$p.value <0.05 bottom_warming <- trend::smk.test(ts(x$bottom_t, frequency =365))$p.value <0.05 surface_annual_rate <-ifelse( surface_warming, coef(lm(surface_t ~ time, data = x))[[2]] *365*10,NA) bottom_annual_rate <-ifelse( bottom_warming, coef(lm(surface_t ~ time, data = x))[[2]] *365*10,NA)return(tibble("Surface Warming"= surface_warming,"Decadal Surface Warming"=round(surface_annual_rate, 2),"Bottom Warming"= bottom_warming,"Decadal Bottom Warming"=round(bottom_annual_rate, 2))) }, .id ="area_id") %>% gt::gt() %>% gt::tab_header(title ="Long Term Regional Warming Rates",subtitle ="Trend Significance Evaluated with Seasonal Mann-Kendall Trend Test (Hirsch-Slack Test)")```### Raw TemperaturesWithout removing the seasonal cycle or the long-term warming trend we get the following breakpoints:```{r}#| label: run monthly temps raw# Run them allsurf_temp_monthly_shifts_raw <- monthly_fvcom_temps %>%split(.$area_id) %>%map_dfr(function(.x_monthly){# Get the results from that x_rstars <-rstars(data.timeseries =as.data.frame( .x_monthly[,c("time", "surface_t")]),l.cutoff =12*7,pValue =0.05,Huber =1,Endfunction = T,preWhitening = T,OLS = F,MPK = T,IP4 = F,SubsampleSize = (12*7+1)/3,returnResults = T)return(x_rstars) }, .id ="area_id")#Bottom temperaturebot_temp_monthly_shifts_raw <- monthly_fvcom_temps %>%split(.$area_id) %>%map_dfr(function(.x_monthly){# Get the results from that x_rstars <-rstars(data.timeseries =as.data.frame( .x_monthly[,c("time", "bottom_t")]),l.cutoff =12*7,pValue =0.05,Huber =1,Endfunction = T,preWhitening = T,OLS = F,MPK = T,IP4 = F,SubsampleSize = (12*7+1)/3,returnResults = T)return(x_rstars) }, .id ="area_id")# # Save them# write_csv(surf_temp_monthly_shifts_raw, here::here("rstars_results/lobecol_stemp_monthly_shifts_raw.csv"))# write_csv(bot_temp_monthly_shifts_raw, here::here("rstars_results/lobecol_btemp_monthly_shifts_raw.csv"))```### Raw Temp ShiftsIf we pass monthly temperatures to the algorithm it does not detect any noticeable regime change in our regional timeseries.```{r}#| eval: true#| label: raw monthly temp rsi plot# Load that data againraw_surf_temp_monthly_shifts <-read_csv(here::here("rstars_results/lobecol_stemp_monthly_shifts_raw.csv"))raw_bot_temp_monthly_shifts <-read_csv(here::here("rstars_results/lobecol_btemp_monthly_shifts_raw.csv"))# Summarise them and put them togethermst_raw <- raw_surf_temp_monthly_shifts %>%filter(RSI !=0) %>%mutate(var ="raw monthly surface temperature") %>% dplyr::select(area_id, time, RSI, var)mbt_raw <- raw_bot_temp_monthly_shifts %>%filter(RSI !=0) %>%mutate(var ="raw monthly bottom temperature") %>% dplyr::select(area_id, time, RSI, var)# Combine results for plottingstars_results_summ_raw <-bind_rows(list(mst_raw, mbt_raw)) %>%mutate(shift_direction =if_else(RSI >0, "Shift to Warm", "Shift to Cool"), area_id =factor(area_id, levels = areas_northsouth),year =year(time))# Get Monthly temperatures in long formtemps_monthly_long <- monthly_fvcom_temps %>%rename(`monthly surface temperature`= surface_t,`monthly bottom temperature`= bottom_t) %>%pivot_longer(names_to ="var", values_to ="temperature", cols =ends_with("temperature"))# Plot the breaks over the timeseriesggplot() +geom_line(data = temps_monthly_long,aes(time, temperature),linewidth =0.2, alpha =0.5) +# Add a vertical line using geom_segment with an arrowgeom_segment(data =filter( stars_results_summ_raw, str_detect(var, "monthly"),str_detect(shift_direction, "Warm")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="last")) +geom_segment(data =filter( stars_results_summ_raw, !str_detect(shift_direction, "Warm")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="first")) +scale_color_gmri() +scale_y_continuous(labels = scales::label_number(suffix = deg_c)) +facet_grid( area_id~var, labeller =label_wrap_gen(),scales ="free") +theme(legend.position ="bottom",strip.text.y =element_text(angle =0)) +labs(title ="FVCOM Hindcast Temperatures",subtitle ="STARS Regime Shift Index - Raw Monthly Data",y ="Temperature",color ="")```## Detrended TemperaturesFor comparison, I'm also running the routine on timeseries where the long-term trend and the average monthly temperatures are removed.```{r}#| label: run monthly prewhitened temps# Run them allsurf_temp_monthly_shifts <- monthly_fvcom_temps %>%split(.$area_id) %>%map_dfr(function(.x_monthly){# Get the results from that x_rstars <-rstars(data.timeseries =as.data.frame( .x_monthly[,c("time", "stemp_model_resid")]),l.cutoff =12*7,pValue =0.05,Huber =1,Endfunction = T,preWhitening = T,OLS = F,MPK = T,IP4 = F,SubsampleSize = (12*7+1)/3,returnResults = T)return(x_rstars) }, .id ="area_id")#Bottom temperaturebot_temp_monthly_shifts <- monthly_fvcom_temps %>%split(.$area_id) %>%map_dfr(function(.x_monthly){# Get the results from that x_rstars <-rstars(data.timeseries =as.data.frame( .x_monthly[,c("time", "btemp_model_resid")]),l.cutoff =12*7,pValue =0.05,Huber =1,Endfunction = T,preWhitening = T,OLS = F,MPK = T,IP4 = F,SubsampleSize = (12*7+1)/3,returnResults = T)return(x_rstars) }, .id ="area_id")# Save themwrite_csv(surf_temp_monthly_shifts, here::here("rstars_results/lobecol_stemp_monthly_shifts_detrended.csv"))write_csv(bot_temp_monthly_shifts, here::here("rstars_results/lobecol_btemp_monthly_shifts_detrended.csv"))```### Detrended Temp Shifts```{r}#| eval: true#| label: detrended monthly temp rsi# Load that data againsurf_temp_monthly_shifts <-read_csv(here::here("rstars_results/lobecol_stemp_monthly_shifts_detrended.csv"))bot_temp_monthly_shifts <-read_csv(here::here("rstars_results/lobecol_btemp_monthly_shifts_detrended.csv"))# Summarise them and put them togethermst <- surf_temp_monthly_shifts %>%filter(RSI !=0) %>%mutate(var ="monthly surface temperature") %>% dplyr::select(area_id, time, RSI, var)mbt <- bot_temp_monthly_shifts %>%filter(RSI !=0) %>%mutate(var ="monthly bottom temperature") %>% dplyr::select(area_id, time, RSI, var)# Combine results for plottingstars_results_summ <-bind_rows(list(mst, mbt)) %>%mutate(shift_direction =if_else(RSI>0, "Shift to Warm", "Shift to Cool"), area_id =factor(area_id, levels = areas_northsouth),year =year(time))# Get Monthly Residuals in long formresids_monthly_long <- monthly_fvcom_temps %>%rename(`monthly surface temperature`= stemp_model_resid,`monthly bottom temperature`= btemp_model_resid) %>%pivot_longer(names_to ="var", values_to ="temperature", cols =ends_with("temperature"))# Plot the breaks over the detrended timesiersggplot() +geom_line(data = resids_monthly_long,aes(time, temperature),linewidth =0.2, alpha =0.5) +# Add a vertical line using geom_segment with an arrowgeom_segment(data =filter( stars_results_summ, str_detect(var, "monthly"),str_detect(shift_direction, "Warm")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="last")) +geom_segment(data =filter( stars_results_summ, str_detect(var, "monthly"),!str_detect(shift_direction, "Warm")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="first")) +scale_color_gmri() +scale_y_continuous(labels = scales::label_number(suffix = deg_c)) +facet_grid( area_id~var, labeller =label_wrap_gen(),scales ="free") +theme(legend.position ="bottom",strip.text.y =element_text(angle =0)) +labs(title ="FVCOM Hindcast Temperatures",subtitle ="STARS Regime Shift Index - Detrended Temperatures",y ="Temperature Anomaly",color ="")```# Monthly SalinitySalinity was processed separately and is only available as monthly data. Regime shift checks will only be performed on the detrended timeseries based on how the temperature results went. Without removing seasonality the regime change is lost in the noise of within-year variability.```{r}# Fit the model (by area_id) to detrend and remove seasons# Model a linear trend and the seasonal cyclemonthly_fvcom_sal <- monthly_fvcom_sal %>%mutate(month =month(time),yr_num =year(time)) %>%split(.$area_id) %>%map_dfr(function(area_x_monthly){ surface_model <-gam( surface_salinity ~ month + yr_num,data = area_x_monthly,method ="REML") bottom_model <-gam( bottom_salinity ~ month + yr_num,data = area_x_monthly,method ="REML")# Add back the results to the model data detrend_out <- area_x_monthly %>%mutate(ssal_model_fit = surface_model$fitted.values,ssal_model_resid = surface_model$residuals,bsal_model_fit = bottom_model$fitted.values,bsal_model_resid = bottom_model$residuals)return(detrend_out) }, .id ="area_id") %>%mutate(area_id =factor(area_id, levels = areas_northsouth))``````{r}#| label: process monthly salinity regimes#| eval: false# Take the input data,# Get monthly averages# remove trend and long-term monthly means# run STARS routine again# Run the monthly versionssurf_sal_monthly_shifts <- monthly_fvcom_sal %>%split(.$area_id) %>%map_dfr(function(.x){# Get the results from the detrended surface salinities x_rstars <-rstars(data.timeseries =as.data.frame( .x[,c("time", "ssal_model_resid")]),l.cutoff =12*7,pValue =0.05,Huber =1,Endfunction = T,preWhitening = T,OLS = F,MPK = T,IP4 = F,SubsampleSize = (12*7+1)/3,returnResults = T)return(x_rstars) }, .id ="area_id")#Bottom Salinitybot_sal_monthly_shifts <- monthly_fvcom_sal %>%split(.$area_id) %>%map_dfr(function(.x){# Get the results from the detrended bottom salinities x_rstars <-rstars(data.timeseries =as.data.frame( .x[,c("time", "bsal_model_resid")]),l.cutoff =12*7,pValue =0.05,Huber =1,Endfunction = T,preWhitening = T,OLS = F,MPK = T,IP4 = F,SubsampleSize = (12*7+1)/3,returnResults = T)return(x_rstars) }, .id ="area_id")# Save themwrite_csv(surf_sal_monthly_shifts, here::here("rstars_results/lobecol_ssal_monthly_shifts_detrended.csv"))write_csv(bot_sal_monthly_shifts, here::here("rstars_results/lobecol_bsal_monthly_shifts_detrended.csv"))```### Plotting Salinity Breaks```{r}surf_sal_monthly_shifts <-read_csv(here::here("rstars_results/lobecol_ssal_monthly_shifts_detrended.csv"))bot_sal_monthly_shifts <-read_csv(here::here("rstars_results/lobecol_bsal_monthly_shifts_detrended.csv"))mss <- surf_sal_monthly_shifts %>%filter(RSI !=0) %>%mutate(var ="surface salinity") %>% dplyr::select(area_id, time, RSI, var)mbs <- bot_sal_monthly_shifts %>%filter(RSI !=0) %>%mutate(var ="bottom salinity") %>% dplyr::select(area_id, time, RSI, var)# Combine results for plottingsal_results_summ <-bind_rows(list(mss, mbs)) %>%mutate(shift_direction =if_else(RSI >0, "Shift to Salt", "Shift to Fresh"),area_id =factor(area_id, levels = areas_northsouth),year =year(time))# Plot the breaks over themonthly_fvcom_sal %>%select(-c(bottom_salinity, surface_salinity)) %>%rename(`bottom salinity`= bsal_model_resid,`surface salinity`= ssal_model_resid) %>%pivot_longer(cols =ends_with("salinity"), names_to ="var", values_to ="salinity") %>%mutate(var =str_replace_all(var, "_", " "),area_id =factor(area_id, levels = areas_northsouth)) %>%ggplot() +geom_line(aes(time, salinity),linewidth =0.2, alpha =0.5) +geom_segment(data =filter( sal_results_summ, str_detect(shift_direction, "Salt")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="last")) +geom_segment(data =filter( sal_results_summ, !str_detect(shift_direction, "Salt")),linewidth =0.8,aes(x = time, xend = time, y =-Inf, , yend =Inf, color = shift_direction), arrow =arrow(length =unit(0.3, "cm"), ends ="first")) +scale_color_gmri() +facet_grid(area_id~var, labeller =label_wrap_gen(), scales ="free") +theme(panel.grid.major =element_blank(),legend.position ="bottom",strip.text.y =element_text(angle =0)) +labs(color ="",y ="Salinity Anomaly",title ="FVCOM Hindcast Salinity",subtitle ="STARS Regime Shift Index - Detrended Salinity" )```---## Days In/Out Temperature PreferencesThings we were thinking aboutdays over 20 is too high for12 degrees is the magic number for larvaeMetabolism/Stress at 18C```{r}#| eval: false# # The degday functions were built with the ability to model daily cycles# # the sine and triangle methods accomodate this# # since we only have daily data the simple average is probably the way to do it# thresh_low <- 10# thresh_up <- 18# lobecol_dd <- daily_fvcom_temps %>%# group_by(area_id) %>% # mutate(# opt_btemp_dd = dd_calc(# daily_min = bottom_t, # daily_max = bottom_t, # thresh_low = thresh_low, # thresh_up = thresh_up, # method = "simp_avg"),# opt_btemp = if_else(between(bottom_t, 10,18), 1, 0),# stress_btemp = if_else(bottom_t > 18, 1, NA),# cold_btemp = if_else(bottom_t < 10, 1, NA))# # # lobecol_dd %>% # mutate(year = lubridate::year(time)) %>% # group_by(area_id, year) %>% # summarise(# across(# ends_with("temp"), # ~sum(.x, na.rm = T)),# .groups = "drop") %>% # pivot_longer(# cols = ends_with("temp"), # names_to = "var", # values_to = "totals") %>% # mutate(area_id = factor(area_id, levels = areas_northsouth),# var = case_when(# var == "opt_btemp" ~ "Preferred Bottom Temperatures 10-18C", # var == "stress_btemp" ~ "Heat Stress Conditions >18C",# var == "cold_btemp" ~ "Below Preferred Conditions <10C")) %>% # ggplot() +# geom_col(aes(year, y = totals, fill = var), # color = "white", width = 1, linewidth = 0.1) + # facet_grid(area_id~.) +# scale_x_continuous(expand = expansion(add = c(0.15,0.15))) +# scale_fill_manual(values = c("lightblue", "#ea4f12", "#057872")) +# theme(strip.text.y = element_text(angle = 0),# legend.position = "bottom") +# guides(fill = guide_legend(# nrow = 3, # title.position = "top",# title.hjust = 0.5))+# labs(y = "Days in Range",# fill = "Daily Temperature Conditions", color = "",# title = "FVCOM Bottom Temperature Suitability")# ```# Other Environmental FeaturesIn addition to temperature, we have the additional environmental covariates as well.```{r}#| eval: false# list(# "PPD" = ecodata::chl_pp,# "Z" = ecodata::chl_pp)# # # ecodata::plot_nao()# ecodata::plot_gsi()```