10  Puerto Rico tournament index

In this section we consider tournament data from the U.S. Caribbean as a potential predictive index for dolphin abundance in the South Atlantic. Because dolphin migrate through the Caribbean region in the months before arriving in South Atlantic waters, peaks in catch-per-unit effort for fisheries operating in the Caribbean could potentially be indicative of high abundances later in the year in waters downstream of their migration patterns. Tournament data are available from Puerto Rico’s Departamento de Recursos Naturales y Ambientales, División de Pesquería Recreativa y Deportiva, going back to the year 2000. Tournament data were requested and received from the institution in June 2022.

10.1 Upload and clean data set

We first input the data set, parse out the dates to extract month and year, and standardize labeling of months.

# clear workspace
rm(list = ls())

if(!require("dplyr")) install.packages("dplyr")
if(!require("emmeans")) install.packages("emmeans")

library(dplyr)
library(emmeans) # Best for extracting standardized indices

# import data -------------------------------------
d <- read.csv("data/PRDNER-DolTournamentData.csv") 
#apply(d, 2, table)
head(d)
      Date                       Location Total.Number.Particpants
1 May-6-00 Club N\xe1utico de La Parguera                      124
2 May-6-00 Club N\xe1utico de La Parguera                      124
3 May-6-00 Club N\xe1utico de La Parguera                      124
4 May-6-00 Club N\xe1utico de La Parguera                      124
5 May-6-00 Club N\xe1utico de La Parguera                      124
6 May-6-00 Club N\xe1utico de La Parguera                      124
  Total.Number.of.Boats Average.Time.Spent.Fishing Tournament.Duration
1                    31                       10.5                   2
2                    31                       10.5                   2
3                    31                       10.5                   2
4                    31                       10.5                   2
5                    31                       10.5                   2
6                    31                       10.5                   2
   Fish.Type   Fish.name Sex Boarded Bycatch Lenght..mm. Weight..Kg.
1 8835290101 Dolphinfish   F    TRUE   FALSE        1675        9.09
2 8835290101 Dolphinfish   F    TRUE   FALSE        1675        6.81
3 8835290101 Dolphinfish   M    TRUE   FALSE        1675       10.45
4 8835290101 Dolphinfish   F    TRUE   FALSE        1625       10.00
5 8835290101 Dolphinfish   F    TRUE   FALSE        1625       12.27
6 8835290101 Dolphinfish   F    TRUE   FALSE        1625       10.00
  Distance.to.coast Zone
1                      2
2                      2
3                      2
4                      2
5                      2
6                      2
# clean dates and extract month day year ---------
d$Date <- as.character(d$Date)
d$month <- NA
d$day <- NA
d$year <- NA

for (i in 1:nrow(d))  {# M
  d[i, 16:18] <- unlist(strsplit(d$Date[i], "-"))  }
#head(d)

# clean up errors and check outputs ---------------------
d$month <- substr(d$month, 1, 3)
d$month[which(d$month == "Abr")] <- "Apr"
d$month[which(d$month == "Arp")] <- "Apr"

d$day[which(d$day == "November" | d$day == "December")] <- NA
d$year[which(d$year == "November" | d$year == "December")] <- NA

d$day <- as.numeric(d$day)
d$year <- as.numeric(d$year)

# standardize year format
d$year[which(d$year < 83)] <- d$year[which(d$year < 83)] + 2000
d$year[which(d$year < 2000)] <- d$year[which(d$year < 2000)] + 1900

d$mon <- match(d$month, month.abb)

table(d$month)

 Apr  Aug  Dec  Feb  Jan  Jul  Jun  Mar  May  Nov  Oct  Sep 
7723   57  614 2360 2160   35   23 7995 1496 1036 1344  273 
table(d$day)

   1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
 694  829  792  689  718 1123  618  753  733 1417  493  775 1061  936  278  517 
  17   18   19   20   21   22   23   24   25   26   27   28   29   30   31 
 894  859  524 1213  726  875  880  655  661  808  854 1192  850  563 1050 
table(d$year)

1983 1984 1985 1986 1987 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 
  80  153    5   53   19  843 1254 1413 1040 1303  944 1411 1418 1209 1190  538 
2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 
1263 1815 1321 1153 1512  916  944  808 1213   51   91 1070 

In keeping with the definition of seasons in other parts of the MSE, we group December with the following January and February in a winter season.

# fix so Dec is grouped with following year -----------------
d$mon[which(d$mon == 12)] <- 0.5
d$year2 <- d$year
d$year2[which(d$mon == 0.5)] <- d$year[which(d$mon == 0.5)]+1
rbind(table(d$year), table(d$year2))
     1983 1984 1985 1986 1987 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009
[1,]   80  153    5   53   19  843 1254 1413 1040 1303  944 1411 1418 1209 1190
[2,]   80  153    5   53   19  789 1220 1501 1030 1313  944 1411 1418  863 1536
     2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
[1,]  538 1263 1815 1321 1153 1512  916  944  808 1213   51   91 1070
[2,]  538 1263 1815 1321 1153 1512  915  945  801 1190   81   85 1076
#head(d)
#tail(d)

10.2 Viewing the raw data

Let’s take a look at some of the data columns using barplots and histograms.

# look at data columns ----------------------------------

par(mfrow = c(3, 2))
barplot(table(d$mon), main = "Number of observations - Month")
barplot(table(d$year), main = "Number of observations - Year")
barplot(table(d$day), main = "Number of observations - Day")
barplot(table(d$Sex), main = "Number of observations - Sex")
barplot(table(d$Zone), main = "Number of observations - Zone")
barplot(table(d$Bycatch), main = "Number of observations - Bycatch")

par(mfrow = c(2, 2))
hist(d$Total.Number.Particpants, main = "Total number of participants")
hist(d$Total.Number.of.Boats, main = "Total number of boats")
hist(d$Average.Time.Spent.Fishing, main = "Average time spent fishing")
hist(d$Tournament.Duration, main = "Tournament duration")

There are a few NAs in the tournament data columns. We need to consider what details are available and what factors we can feasibly use in the standardization process.

# find NAs in tournament details -------------------
table(is.na(d$Total.Number.of.Boats))

FALSE  TRUE 
24331   785 
table(is.na(d$Total.Number.Particpants))

FALSE  TRUE 
24189   927 
table(is.na(d$Total.Number.Particpants) & is.na(d$Total.Number.of.Boats))

FALSE  TRUE 
24332   784 
table(is.na(d$Average.Time.Spent.Fishing))

FALSE  TRUE 
23290  1826 
plot(d$Total.Number.of.Boats, d$Total.Number.Particpants, xlab = "total number of boats", ylab = "total number of participants")
out <- lm(d$Total.Number.Particpants ~ d$Total.Number.of.Boats)
abline(out, col = 8)

The total number of boats is highly correlated with the total number of participant and the linear regression indicates that most boats have 4 participants.

10.3 Data preparation for standardization

In preparation for creating a standardized catch-per-unit-effort (CPUE) index that can serve as a proxy for abundance, we have to specify nuisance factors in the data set that have to be accounted for. We create a unique tournament ID number with a combination of the day and number of participants.

# create unique tournament ID number -------------------------
d$dat <- paste0(d$day, d$month, d$year)
length(unique(paste0(d$day, d$month, d$year, d$Total.Number.Particpants)))
[1] 386
length(unique(paste0(d$day, d$month, d$year, d$Location)))
[1] 387
length(unique(paste0(d$day, d$month, d$year, d$Total.Number.of.Boats)))
[1] 386
length(unique(paste0(d$day, d$month, d$year, d$Tournament.Duration)))
[1] 366
d$datID <- paste0(d$day, d$month, d$year, d$Total.Number.Particpants)

#barplot(table(d$mon))

We remove all cases where dolphin is not listed as the target species; we will use only directed dolphin trips in the standardization. We also specify the months that we want to include in the index of abundance. Here we specify December to MArch With this subset of months, we have 128 unique tournaments in the data base across 121 different dates.

# remove non-target cases; use only directed trips -------------------
dfull <- d
d <- d[which(d$Bycatch == FALSE), ]

# subset by season, e.g. 0 - 4 is December to April -------------------
d <- d[which(d$mon >= 0 & d$mon <= 3), ]

#length(unique(d$Date))
length(unique(d$dat))
[1] 121
length(unique(d$datID))
[1] 128

Next we calculate the total weight by tournament, by summing the reported weights across each tournament ID. We then set up a new data frame where each tournament is a row, and columns represent the attributes of each tournament (year, month, zone, participants, number of boats, fishing time, duration, total weight and total abundance). We also calculate the average weight by dividing total weight by total abundance.

# calculate total by tournament ----------------------------
totWT <- tapply(d$Weight..Kg., d$datID, sum, na.rm = T)
totN <- table(d$datID)
mon <- tapply(d$mon, d$datID, mean, na.rm = T)
year <- tapply(d$year2, d$datID, mean, na.rm = T)

# checks on month and tournament assignments ----------------------
#table(d$year[d$mon == 0.5], d$year2[d$mon == 0.5])
#table(d$year[d$mon != 0.5], d$year2[d$mon != 0.5])

max(tapply(d$Total.Number.Particpants, d$datID, sd), na.rm = T)  # should all be zeros
[1] 0
max(tapply(d$Total.Number.of.Boats, d$datID, sd), na.rm = T)
[1] 0
max(tapply(d$Average.Time.Spent.Fishing, d$datID, sd), na.rm = T)
[1] 0
# assign attributes of tournaments -------------------------------
Npart <- tapply(d$Total.Number.Particpants, d$datID, mean, na.rm = T)
Nboat <- tapply(d$Total.Number.of.Boats, d$datID, mean, na.rm = T)
Ftime <- tapply(d$Average.Time.Spent.Fishing, d$datID, mean, na.rm = T)
dura <- tapply(d$Tournament.Duration, d$datID, mean, na.rm = T)
zone <- tapply(d$Zone, d$datID, mean, na.rm = T)

# new data frame for standardization ----------------------------------
dat <- data.frame(cbind(year, mon, zone, Npart, Nboat, Ftime, dura, totWT, totN))
dat$avwt <- dat$totWT / dat$totN

head(dat)
             year mon zone Npart Nboat Ftime dura   totWT totN     avwt
10Feb2007140 2007   2    1   140    35   8.0    1  347.64   49 7.094694
10Feb200824  2008   2    1    24    15   7.0    1  264.71   33 8.021515
10Jan2009332 2009   1    1   332    83   8.0    2  770.44  147 5.241088
10Jan2014188 2014   1    1   188    47   7.5    2 1479.82  249 5.943052
10Jan2015136 2015   1    1   136    34   7.5    2  794.72  264 3.010303
10Mar2001192 2001   3    2   192    48   8.0    2 2499.33  272 9.188713

There are a number of missing values in this data frame. For fishing time and number of boats, we impute the median of the known values into the missing values. For missing values for the number of boats, we take the number of participants and divide by 4 (given the relationship established above). If both values are missing, we impute the median into the unknown values.

# fill in NAs ---------------------------------------------------
dat$Nboat[which(dat$Nboat == 0)] <- NA
dat$Npart[which(dat$Npart == 0)] <- NA
dat$Ftime[which(dat$Ftime == 0)] <- NA

#hist(dat$Ftime)
which(is.na(dat$Ftime))
[1]  15  18  68  72  86  94 101 127 128
dat$Ftime[is.na(dat$Ftime)] <- median(dat$Ftime, na.rm = T)

#hist(dat$Nboat)
which(is.na(dat$Nboat))
[1]  15  83  94 122 128
which(is.na(dat$Npart))
[1]  11  12  15  94 101 110 122 128
dat$Npart[is.na(dat$Nboat)]
[1] NaN  33 NaN  NA NaN
#plot(dat$Nboat, dat$Npart)
#dat$Npart / dat$Nboat
dat$Nboat[is.na(dat$Nboat)] <- dat$Npart[is.na(dat$Nboat)] / 4
#dat$Npart[is.na(dat$Nboat)]
dat$Nboat[is.na(dat$Nboat)] <- median(dat$Nboat, na.rm = T)

which(is.na(dat$Nboat))
integer(0)
which(is.na(dat$Ftime))
integer(0)
which(is.na(dat$totN))
integer(0)

Finally, we subset the years. Data are very sparse prior to 2000 so we only include the data for 2000 and forward. We calculate the catch-per-unit-effort, defining catch as the total abundance, and effort as the number of boats multiplied by the average time spent fishing.

# subset years -----------------------------------

yrs <- 2000:2020

dat[is.na(dat)] <- NA
dat <- dat[which(dat$year >= min(yrs)), ]
dat <- dat[which(dat$year <= max(yrs)), ]

# calculate CPUE -----------------------------------
dat$eff <- dat$Nboat * dat$Ftime   # dat$dura
dat$cpue <- dat$totN / dat$eff
dat$cpueW <- dat$totWT / dat$eff

which(dat$cpue == "Inf")
integer(0)
which(dat$cpueW == "Inf")
integer(0)
#dat$cpue[which(dat$cpue == "Inf")] <- NA

barplot(table(dat$year), las = 2, main = "Number of tournaments per year")

The plot shows the number of tournaments per year. We can see that there are relatively few events for each year.