scorpio
scorpio copied to clipboard
A long ne30+FC5AV1C-L run fails with Scorpio on Compy
@JS-WRF-SBM reported this issue when running ne30 + FC5AV1C-L for 1+ years on compy. The fails with the following error,
1080: PIO: FATAL ERROR: Aborting... An error occured, Writing variables (number of variables = 374) to file (compy_FC5AV1C-L_ne30_SCREAM-github-16thMay20-wrk_scream_p3.cam.h0.2011-03.nc, ncid=459) using PIO_IOTYPE_PNETCDF iotype failed. Non blocking write for variable (WD_H2O2, varid=385) failed (Number of s. err=-60. Aborting since the error handler was set to PIO_INTERNAL_ERROR... (/qfs/people/shpu881/E3SM/SCREAM-github-16thMay20-wrk/externals/scorpio/src/clib/pio_darray_int.c: 389)
The script (from @JS-WRF-SBM ) below recreates the issue,
#!/bin/csh
date
set echo verbose
set fetch_code = 0 # 0 = No, >0 = Yes
set create_newcase = 1
set case_setup = 1
set case_build_all = 1 # 0 = No, >0 = Yes
set case_build_incremental = 0
set case_run = 1 # 0 = No, >0 = Yes
####################################################################
# Fetch code
####################################################################
#setenv CCSMTAG E3SM_20190418
setenv CCSMTAG SCREAM-github-16thMay20-wrk
setenv CCSMROOT /qfs/people/shpu881/E3SM/${CCSMTAG}
#setenv CCSMROOT /compyfs/${USER}/${CCSMTAG}
####################################################################
# Machine, compset, PE layout etc.
####################################################################
setenv COMPSET FC5AV1C-L #F20TRC5-CMIP6 #F2010C5-CMIP6-LR #FSCREAM-LR
setenv RESOLUTION ne30_ne30
setenv MRES ne30
setenv MACH compy
setenv PTMP /compyfs/${USER}/bld
setenv ntasks 1600
setenv nthrds 1
#setenv MYSRC ${CCSMROOT}/mods_v1_p3_cmdv
#setenv MYCLM ${CCSMROOT}/mods_clm
setenv CASE ${MACH}_${COMPSET}_${MRES}_${CCSMTAG}_scream_p3
setenv COMCASE ${MACH}_${COMPSET}_${MRES}_${CCSMTAG}_scream_p3
setenv CASEROOT ${CCSMROOT}/cases/${CASE}
setenv RUNDIR /compyfs/${USER}/csmruns/${CASE}
####################################################################
# Compile model
####################################################################
if ($create_newcase > 0) then
rm -rf ${CASEROOT}
cd ${CCSMROOT}/cime/scripts
./create_newcase --case ${CASEROOT} --project e3sm --mach ${MACH} \
--res ${RESOLUTION} --compset ${COMPSET}
endif
#====================================================================
# set up case
#====================================================================
if ($case_setup > 0) then
cd ${CASEROOT}
./xmlchange -file env_run.xml -id RUNDIR -val ${RUNDIR}
./xmlchange -file env_mach_pes.xml -id NTASKS_ATM -val ${ntasks}
./xmlchange -file env_mach_pes.xml -id NTHRDS_ATM -val ${nthrds}
./xmlchange -file env_mach_pes.xml -id ROOTPE_ATM -val '0'
./xmlchange -file env_mach_pes.xml -id NTASKS_LND -val ${ntasks}
./xmlchange -file env_mach_pes.xml -id NTHRDS_LND -val ${nthrds}
./xmlchange -file env_mach_pes.xml -id ROOTPE_LND -val '0'
./xmlchange -file env_mach_pes.xml -id NTASKS_ROF -val ${ntasks}
./xmlchange -file env_mach_pes.xml -id NTHRDS_ROF -val ${nthrds}
./xmlchange -file env_mach_pes.xml -id ROOTPE_ROF -val '0'
./xmlchange -file env_mach_pes.xml -id NTASKS_ICE -val ${ntasks}
./xmlchange -file env_mach_pes.xml -id NTHRDS_ICE -val ${nthrds}
./xmlchange -file env_mach_pes.xml -id ROOTPE_ICE -val '0'
./xmlchange -file env_mach_pes.xml -id NTASKS_OCN -val ${ntasks}
./xmlchange -file env_mach_pes.xml -id NTHRDS_OCN -val ${nthrds}
./xmlchange -file env_mach_pes.xml -id ROOTPE_OCN -val '0'
./xmlchange -file env_mach_pes.xml -id NTASKS_GLC -val ${ntasks}
./xmlchange -file env_mach_pes.xml -id NTHRDS_GLC -val ${nthrds}
./xmlchange -file env_mach_pes.xml -id ROOTPE_GLC -val '0'
./xmlchange -file env_mach_pes.xml -id NTASKS_WAV -val ${ntasks}
./xmlchange -file env_mach_pes.xml -id NTHRDS_WAV -val ${nthrds}
./xmlchange -file env_mach_pes.xml -id ROOTPE_WAV -val '0'
./xmlchange -file env_mach_pes.xml -id NTASKS_CPL -val ${ntasks}
./xmlchange -file env_mach_pes.xml -id NTHRDS_CPL -val ${nthrds}
./xmlchange -file env_mach_pes.xml -id ROOTPE_CPL -val '0'
./xmlchange -file env_workflow.xml -id JOB_WALLCLOCK_TIME -val '10:00:00'
./xmlchange -file env_workflow.xml -id JOB_QUEUE –val 'slurm'
./case.setup --clean
./case.setup
endif
#====================================================================
# my mods of source code
#====================================================================
if ($case_build_all > 0) then
cd ${CASEROOT}
# ln -s ${MYSRC}/* SourceMods/src.cam # put your mods in here
# ln -s ${MYCLM}/* SourceMods/src.clm # put your mods in here
./xmlchange -file env_build.xml -id DEBUG -val 'FALSE'
./xmlchange -file env_build.xml -id CAM_CONFIG_OPTS -append -val ' -cosp'
cd ${CASEROOT}
#./case.build --clean-all
./case.build
endif
if ($case_build_incremental > 0) then
cd ${CASEROOT}
./case.build
endif
#####################################################################
# Conduct simulation
#####################################################################
if ($case_run > 0) then
#------------------
## set environment
#------------------
cd ${CASEROOT}
./xmlchange -file env_run.xml -id RUN_STARTDATE -val '2010-01-01'
#./xmlchange -file env_run.xml -id RESUBMIT -val '0'
./xmlchange -file env_run.xml -id STOP_N -val '1825'
./xmlchange -file env_run.xml -id STOP_OPTION -val 'ndays'
./xmlchange -file env_run.xml -id REST_N -val '30'
./xmlchange -file env_run.xml -id REST_OPTION -val 'ndays'
./xmlchange -file env_run.xml -id DOUT_S -val 'FALSE'
#./xmlchange -file env_run.xml -id PIO_TYPENAME -val 'netcdf'
#./xmlchange -file env_run.xml -id PIO_NETCDF_FORMAT -val '64bit_data'
#./xmlchange -file env_workflow.xml -id JOB_WALLCLOCK_TIME -val '12:00:00'
#./xmlchange -file env_workflow.xml -id USER_REQUESTED_QUEUE –val 'slurm'
## goto the case directory, make changes, and submit the job
./case.submit
endif