Exporatory Data Analysis of NEFSC Bottom Trawl Survey Data

Exploration of spatial and temporal patterns in abundance, and bodymass of fishes from the Northeast groundfish survey.

Build code containing data wrangling and conversions can be accessed here.

Data loaded for this markdown is pulled directly from the {targets} pipeline for data consistency with a developing codebase.

####  Load NEFSC Groundfish Data  ####

# Loading from targets:
# Load the area-stratified biomass/abundances that used species cutoffs
withr::with_dir(rprojroot::find_root('_targets.R'), 
                tar_load(nefsc_stratified))


# Do some text formatting
nefsc_stratified <- nefsc_stratified %>% 
  mutate(
    id = as.character(id),
    spec_class = case_when(
      spec_class == "dem" ~ "Demersal",
      spec_class == "gf" ~ "Groundfish",
      spec_class == "pel" ~ "Pelagic",
      spec_class == "el" ~ "Elasmobranch",
      TRUE ~ "Unclassified"
  ))

# Run Summary Functions
ann_means <- ss_annual_summary(nefsc_stratified, include_epu = F)
seasonals <- ss_seasonal_summary(nefsc_stratified, include_epu = F) 

# bind them so you can facet
summs <- bind_rows(ann_means, seasonals) %>% 
  mutate(season = factor(season, levels = c("Spring", "Fall", "Spring + Fall")))


#drop haddock to see if that changes the bump
haddock_ann  <- ss_annual_summary(filter(nefsc_stratified, comname != "haddock"))
haddock_seas <- ss_seasonal_summary(filter(nefsc_stratified, comname != "haddock"))
no_haddockn  <- bind_rows(haddock_ann, haddock_seas) %>% 
  mutate(season = factor(season, levels = c("Spring", "Fall", "Spring + Fall")))

Prominant Species Timelines

This section is for species of prominent importance either for their significance as a commercial species, or their proliferance in the survey.

Highlighting Single Species

# What do we want to show for these highlight species
species_timeline <- function(common_name){
  annual_totals <- nefsc_stratified %>% 
    filter(comname == common_name) %>% 
    group_by(est_year) %>% 
    summarize(total_strat_biom = sum(strat_total_lwbio_s), 
              avg_ind_mass = weighted.mean(ind_weight_kg, numlen_adj),
              .groups = "drop")
  
  biomass_timeline <- ggplot(annual_totals, aes(est_year, total_strat_biom)) +
    geom_line(group = 1) +
    geom_point() +
    scale_y_continuous(labels = scales::comma_format()) +
    labs(x = "", y = "Stratified Total Biomass (kg)",
         title = str_to_title(common_name),
         subtitle = "Total Biomass Over Time")
  
  bodysize_timeline <- ggplot(annual_totals, aes(est_year, avg_ind_mass)) +
    geom_line(group = 1) +
    geom_point() +
    labs(x = "Year", y = "Individual Bodymass (kg)",
         subtitle = "Individual Bodymass Over Time")
  
  biomass_timeline / bodysize_timeline
}

Atlantic Cod

species_timeline("atlantic cod")

Haddock

species_timeline("haddock")

Spiny Dogfish

species_timeline("spiny dogfish")

Acadian Redfish

species_timeline("acadian redfish")

Winter Skate

species_timeline("winter skate")

Silver Hake

species_timeline("silver hake")

Spatial Patterns

For large regions like Georges Bank and the Gulf of Maine, what kind of patterns are we seeing.

# Load the strata
survey_strata <- read_sf(str_c(res_path, "Shapefiles/BottomTrawlStrata/BTS_Strata.shp"))  %>% 
  clean_names() %>% 
  filter(strata >= 01010 ,
         strata <= 01760,
         strata != 1310,
         strata != 1320,
         strata != 1330,
         strata != 1350,
         strata != 1410,
         strata != 1420,
         strata != 1490) 


# Key to which strata = which regions
strata_key <- list(
  "Georges Bank"          = as.character(13:23),
  "Gulf of Maine"         = as.character(24:40),
  "Southern New England"  = str_pad(as.character(1:12), width = 2, pad = "0", side = "left"),
  "Mid-Atlantic Bight"    = as.character(61:76))


# Assign Areas
survey_strata <- survey_strata %>% 
  mutate(
    strata = str_pad(strata, width = 5, pad = "0", side = "left"),
    strata_num = str_sub(strata, 3, 4),
    area = case_when(
      strata_num %in% strata_key$`Georges Bank` ~ "Georges Bank",
      strata_num %in% strata_key$`Gulf of Maine` ~ "Gulf of Maine",
      strata_num %in% strata_key$`Southern New England` ~ "Southern New England",
      strata_num %in% strata_key$`Mid-Atlantic Bight` ~ "Mid-Atlantic Bight",
    TRUE ~ "Outside Major Study Areas"
  )) %>% 
  select(finstr_id, strata, strata_num, area, a2, str2, set, stratuma, str3, geometry)


# Make trawl data an sf dataset
trawl_sf <- nefsc_stratified %>% st_as_sf(coords = c("decdeg_beglon", "decdeg_beglat"), crs = 4326)

Map of Trawl Regions

For this project we are using different regions composed of collections of individual trawl survey strata.

For comparison sake the areas of the different ecological production units has also been included.

# Plot to check
strata_plot <- ggplot() +
  geom_sf(data = new_england) +
  geom_sf(data = canada) +
  geom_sf(data = survey_strata, aes(fill = area)) +
  coord_sf(xlim = c(-77, -65.5), ylim = c(34, 45.75), expand = FALSE) +
  guides(fill = guide_legend(nrow = 2)) +
  theme_bw() +
  theme(legend.position = "bottom", legend.title = element_blank()) +
  labs(subtitle = "Survey Regions")


# load EPU shapes
epu_sf <- ecodata::epu_sf
epu_plot <- ggplot() +
  geom_sf(data = new_england) +
  geom_sf(data = canada) +
  geom_sf(data = epu_sf, aes(fill = EPU)) +
  coord_sf(xlim = c(-77, -65.5), ylim = c(34, 45.75), expand = FALSE) +
  guides(fill = guide_legend(nrow = 2)) +
  theme_bw() +
  theme(legend.position = "bottom", legend.title = element_blank())+
  labs(subtitle = "Ecological Production Units")

strata_plot | epu_plot

Regional Summaries

# Just Area, all seasons
area_summs <- nefsc_stratified %>% 
  group_by(survey_area) %>% 
  summarise(
    season = "Spring + Fall",
    lw_biomass_kg = sum(sum_weight_kg, na.rm = T),
    n_stations = n_distinct(id),
    lw_biomass_per_station = lw_biomass_kg / n_stations,
    mean_ind_bodymass = weighted.mean(ind_weight_kg, weights = numlen_adj),
    mean_ind_length = weighted.mean(length_cm, weights = numlen_adj),
    .groups = "keep"
  ) %>% 
  ungroup()

# Area x Season
seas_area <- nefsc_stratified %>% 
  group_by(survey_area, season) %>% 
  summarise(
    lw_biomass_kg = sum(sum_weight_kg, na.rm = T),
    n_stations = n_distinct(id),
    lw_biomass_per_station = lw_biomass_kg / n_stations,
    mean_ind_bodymass = weighted.mean(ind_weight_kg, weights = numlen_adj),
    mean_ind_length = weighted.mean(length_cm, weights = numlen_adj),
    .groups = "keep"
  ) %>% 
  ungroup()


# Combine those two
summs_combined <- bind_rows(area_summs, seas_area) %>% 
  mutate(season = factor(season, levels = c("Spring", "Fall", "Spring + Fall")))

summs_combined %>% 
  mutate_if(is.numeric,round, 2) %>% 
  arrange(survey_area,season) %>% 
  knitr::kable()
survey_area season lw_biomass_kg n_stations lw_biomass_per_station mean_ind_bodymass mean_ind_length
GB Spring 438226.3 2996 146.27 0.88 40.33
GB Fall 450348.8 3124 144.16 0.73 37.69
GB Spring + Fall 888575.1 6120 145.19 0.80 38.94
GoM Spring 348424.0 3631 95.96 0.63 35.06
GoM Fall 619849.5 3738 165.82 0.70 36.53
GoM Spring + Fall 968273.5 7369 131.40 0.67 35.84
MAB Spring 752939.2 2544 295.97 0.83 44.44
MAB Fall 104074.9 2476 42.03 0.66 25.95
MAB Spring + Fall 857014.1 5020 170.72 0.78 38.68
SNE Spring 460508.7 2869 160.51 0.54 36.58
SNE Fall 210873.7 2743 76.88 0.45 32.75
SNE Spring + Fall 671382.4 5612 119.63 0.51 35.08
# Year x Area
area_summs_y <- nefsc_stratified %>% 
  group_by(est_year, survey_area) %>% 
  summarise(
    season                 = "Spring + Fall",
    lw_biomass_kg          = sum(sum_weight_kg, na.rm = T),
    n_stations             = n_distinct(id),
    lw_biomass_per_station = lw_biomass_kg / n_stations,
    mean_ind_bodymass      = weighted.mean(ind_weight_kg, weights = numlen_adj),
    mean_ind_length        = weighted.mean(length_cm, weights = numlen_adj),
    strat_total_abund         = sum(strat_total_abund_s),
    strat_total_lwbio         = sum(strat_total_lwbio_s),
    strat_total_biom          = sum(strat_total_biom_s),
    .groups = "keep"
  ) %>% 
  ungroup()

Total Biomass

Total biomass in the figures below is derived from weight at length relationships and reflects the mean expected biomass of the fish caught using the observed distribution and frequency of lengths.

area_summs_y %>% 
  ggplot(aes(est_year, lw_biomass_kg)) +
    geom_line() +
    facet_wrap(~survey_area, ncol = 2) +
    scale_y_continuous(labels = scales::comma_format()) +
    labs(x = "", y = "Total Biomass (kg)")

CPUE

CPUE displayed below is the mean total biomass, derived from weight at length relationships, per station for each region. These values have not been weighted to reflect the difference in area between regions.

area_summs_y %>% 
  ggplot(aes(est_year, lw_biomass_per_station)) +
    geom_line() +
    facet_wrap(~survey_area, ncol = 2) +
    labs(x = "", y = "Adjusted Biomass per Station (kg)")

Stratified Total Abundance

Stratified Total abundance is calculated by taking the catch per station rates for each length class of each species and extending that density to the total areas of the region. Catchability is assumed to be 1 for this calculation though it is certainly lower. This extension of catch rates is highly sensitive to large swings due to high variances and zero inflation of abundances.

area_summs_y %>% 
  ggplot(aes(est_year, strat_total_abund/1000000)) +
    geom_line() +
    scale_y_continuous(labels = scales::comma_format()) +
    facet_wrap(~survey_area, ncol = 2) +
    labs(x = "", y = "Stratified Total Abundance (millions)")

Stratified Total Biomass

The expected biomass estimates for the survdat$biomass data and the expected biomass from L-W regressions should be fairly similar or it would refute the accuracy of length-weight relationshihps.

There also should be no substantial jump in observations immediately following the transition between research vessels as the catch rates have been adjusted to account for that.

This plot should thus show very similar trends for the change in stratified biomass from the biomass weighed during the survey operation (left) and biomass derived from length weight relationships (right).

fscs <- area_summs_y %>% 
  ggplot() +
    geom_line(aes(est_year, strat_total_biom / 1000000, color = "Shipboard Weights")) +
    scale_y_continuous(labels = scales::comma_format()) +
    facet_wrap(~survey_area, ncol = 1) +
    scale_color_gmri(reverse = T) +
    labs(x = "", y = "Stratified Total Biomass FSCS (million kg)") +
    theme(legend.position = "bottom", legend.title = element_blank())

lw <- area_summs_y %>% 
  ggplot() +
    geom_line(aes(est_year, strat_total_lwbio / 1000000, color = "LW Regression Weights")) +
    scale_y_continuous(labels = scales::comma_format()) +
    scale_color_gmri(reverse = F) +
    facet_wrap(~survey_area, ncol = 1) +
    labs(x = "", y = "Stratified Total Biomass LW (million kg)") +
    theme(legend.position = "bottom", legend.title = element_blank())

fscs | lw

 

A work by Adam A. Kemberling

Akemberling@gmri.org