VAST
VAST copied to clipboard
VAST 5.2.0 estimation fail
I am working on getting VAST (V5.2.0) up and running with NWFSC survey data and am getting the following error during the estimation:
Error in solve.default(h, g) : system is computationally singular: reciprocal condition number = 1.50334e-16
This looks like a TMB problem?
The script is here:
Set inputs
in_species <- "Anoplopoma fimbria" in_maxyear <- 2017
Logical if you want pass as catchability covariate
in_usepass <- TRUE #strata limits, run model but then calculate area specific indices (strata.limits <- data.frame( "STRATA" = c("Coastwide","CA","OR","WA"), "north_border" = c(49.0, 42.0, 46.0, 49.0), "south_border" = c(32.0, 32.0, 42.0, 46.0), "shallow_border" = c(55, 55, 55, 55),
'middle_border' = c(183, 183, 183, 183),
"deep_border" = c(1280, 1280, 1280, 1280) ))
Inputs you might want to change
setwd("C:\Sablefish2019\Data\NWFSCSurveyIndex") HomeDir <- getwd()
Number of "knots" used when Method="Mesh"
n_x <- 250
Distribution specification
ObsModel = c(2, 1) #ObsModel <- c(2, 0) #ObsModel = c(1,0) #lognormal errors
=============================================
require(VAST) library(maps) library(mapdata) #install.packages("INLA") library(INLA)
Sablefish <- JRWToolBox::dataWareHouseTrawlCatch(Species = "Anoplopoma fimbria", project = c('WCGBTS.Combo')) Data_Set <- Sablefish
Version
(Version <- gsub("\.cpp", "", tail(list.files(R.home( file.path("library", "VAST", "executables"))), 1)))
#define the spatial resolution for the model, and whether to use a grid or mesh approximation Method = c("Grid", "Mesh", "Spherical_mesh")[2] grid_size_km = 25 # Value only matters if Method="Grid" Kmeans_Config = list( "randomseed"=1, "nstart"=100, "iter.max"=1e3 ) # Controls K-means algorithm
Model settings
FieldConfig = c("Omega1"=1, "Epsilon1"=1, "Omega2"=1, "Epsilon2"=1) RhoConfig = c("Beta1"=0, "Beta2"=0, "Epsilon1"=0, "Epsilon2"=0) OverdispersionConfig = c("Delta1"=1, "Delta2"=1)
#outputs calculated after model runs, essentially reports to create Options = c("SD_site_density"=0, "SD_site_logdensity"=0, "Calculate_Range"=1, "Calculate_evenness"=0, "Calculate_effective_area"=1, "Calculate_Cov_SE"=0, "Calculate_Synchrony"=0, "Calculate_Coherence"=0)
setwd(HomeDir) # Make sure that the working directory is back where it started
#region that tells software which grid to use Region = "California_current"
#save files setting
DateFile = paste0(getwd(),"/VAST_output/") # Simple, but requires manually changing the directory to save different runs
(DateFile <- file.path(HomeDir, paste0("VAST_output_", Sys.Date(), "_", gsub(" ", "", in_species), "_nx=", n_x, .Platform$file.sep))) dir.create(DateFile)
#save all settings
Record = ThorsonUtilities::bundlelist( c("Data_Set","Version","Method","grid_size_km","n_x","FieldConfig","RhoConfig","OverdispersionConfig","ObsModel","Kmeans_Config") )
save( Record, file=file.path(DateFile,"Record.RData"))
capture.output( Record, file=paste0(DateFile,"Record.txt"))
#set up data frame from data set Data_Geostat = data.frame( Catch_KG = Data_Set[, grep("kg", colnames(Data_Set))], Year = Data_Set$Year, Vessel = paste(Data_Set$Vessel,Data_Set$Year,sep="_"), AreaSwept_km2 = Data_Set$Area_Swept_ha/100, Lat =Data_Set$Latitude_dd, Lon = Data_Set$Longitude_dd, Pass = Data_Set$Pass - 1.5)
Data_Geostat = na.omit( Data_Geostat )
shows data being used, read this document
pander::pandoc.table( Data_Geostat[1:6,], digits=3 )
#extrapolation grid Extrapolation_List = SpatialDeltaGLMM::Prepare_Extrapolation_Data_Fn( Region=Region, strata.limits=strata.limits )
#derived objects for spatio-temporal estiamtion Spatial_List = SpatialDeltaGLMM::Spatial_Information_Fn( grid_size_km=grid_size_km, n_x=n_x, Method=Method, Lon=Data_Geostat[,"Lon"], Lat=Data_Geostat[,"Lat"], Extrapolation_List=Extrapolation_List, randomseed=Kmeans_Config[["randomseed"]], nstart=Kmeans_Config[["nstart"]], iter.max=Kmeans_Config[["iter.max"]], DirPath=DateFile, Save_Results=FALSE )
Add knots to Data_Geostat
Data_Geostat = cbind( Data_Geostat, knot_i = Spatial_List$knot_i ) head(Data_Geostat)
#build model, this is where you could specify new covariates using Data_Fn...use Pass DateFile <- gsub("nx", "Pass_nx", DateFile) dir.create(DateFile) Q_ik <- as.matrix(Data_Geostat[, "Pass", drop=F]) #note, removed the -1 after vessel, why was this here? TmbData = VAST::Data_Fn("Version"=Version, "FieldConfig"=FieldConfig, "OverdispersionConfig"=OverdispersionConfig, "RhoConfig"=RhoConfig, "ObsModel"=ObsModel, "c_i"=rep(0,nrow(Data_Geostat)), "b_i"=Data_Geostat[,"Catch_KG.Total_sp_wt_kg"], "a_i"=Data_Geostat[,"AreaSwept_km2"], "v_i"=as.numeric(Data_Geostat[,"Vessel"])-1, "s_i"=Data_Geostat[,"knot_i"]-1, "t_i"=Data_Geostat[,"Year"], "a_xl"=Spatial_List$a_xl, Q_ik = Q_ik, "MeshList"=Spatial_List$MeshList, "GridList"=Spatial_List$GridList, "Method"=Spatial_List$Method, "Options"=Options )
################
Do the estimation
################
Build tmb object
TmbList = VAST::Build_TMB_Fn("TmbData"=TmbData, "RunDir"=DateFile, "Version"=Version, "RhoConfig"=RhoConfig, "loc_x"=Spatial_List$loc_x, "Method"=Method) Obj = TmbList[["Obj"]]
Run optimizer with Newton steps to improve convergence
Opt = TMBhelper::Optimize( obj=Obj, lower=TmbList[["Lower"]], upper=TmbList[["Upper"]], getsd=TRUE, newtonsteps=2, savedir=DateFile, bias.correct=TRUE, bias.correct.control = list(sd = FALSE, split = NULL, nsplit = 1, vars_to_correct = "Index_cyl"))
could you please resend the script and necessary RData file using the minimal-reproducible-example format specified here: https://github.com/nwfsc-assess/geostatistical_delta-GLMM/wiki/How-to-create-a-minimal-example...?
Was there ever a resolution found for this error message? I recently encountered it as well and was curious if the cause has already been identified...
It usually means that there's some estimability problem. Perhaps run fit_model( getsd=FALSE, newtonsteps=0)
and explore the Hessian manually, or trying TMBhelper::check_estimability
Thanks Jim! This was indeed the issue. Interestingly, running the same model a second time gave the more familiar error message about the hessian not being positive definite.
yeah, it's a question of whether the newtonstep or the sdreport use is the first to have it exceed some machine-tolerance for singular hessian, which presumably depends on randomized starting values.