scream icon indicating copy to clipboard operation
scream copied to clipboard

Some bugs and questions about running SCREAM on intel PVC GPU

Open lulu1599 opened this issue 4 months ago • 19 comments

I'm trying to run SCREAM on intel PVC GPU, however meet some errors, looking for help, thanks! Here's my testing environment: SCREAM code: lasted code of master version (https://github.com/E3SM-Project/scream), not sure whether this is the correct version for test on intel GPU? Machine: intel CPU+ intel PVC GPU (1100) Compiler and MPI: intel oneapi 2024 (icx, ifx, icpx; mpiicx, mpiifx, mpiicpx)

Here's my config files:

  1. scream/cime_config/machines/config_machines.xml
  <machine MACH="PVC1100">
    <DESC>HPC, 6430 CPU + 1100 PVC(56core, 48GB)</DESC>
    <NODENAME_REGEX/>
    <OS>LINUX</OS>
    <COMPILERS>oneapi-ifx,oneapi-ifxgpu,gnu</COMPILERS>
    <MPILIBS>impi,openmpi, mpich</MPILIBS>
    <SAVE_TIMING_DIR> </SAVE_TIMING_DIR>
    <CIME_OUTPUT_ROOT>/home/lujingyu/E3SM/SCREAM/cases/scratch/$CASE</CIME_OUTPUT_ROOT>
    <DIN_LOC_ROOT>/home/lujingyu/E3SM/SCREAM/inputdata</DIN_LOC_ROOT>
    <DIN_LOC_ROOT_CLMFORC>/home/lujingyu/E3SM/SCREAM/inputdata/atm/datm7</DIN_LOC_ROOT_CLMFORC>
    <DOUT_S_ROOT>/home/lujingyu/E3SM/SCREAM/cases/archive/$CASE</DOUT_S_ROOT>
    <!--BASELINE_ROOT>/lus/gila/projects/CSC249ADSE15_CNDA/baselines/$COMPILER</BASELINE_ROOT-->
    <!--CCSM_CPRNC>/lus/gila/projects/CSC249ADSE15_CNDA/tools/cprnc/cprnc</CCSM_CPRNC-->
    <GMAKE_J>16</GMAKE_J>
    <TESTS>e3sm_developer</TESTS>
    <NTEST_PARALLEL_JOBS>4</NTEST_PARALLEL_JOBS>
    <BATCH_SYSTEM>none</BATCH_SYSTEM>
    <SUPPORTED_BY>e3sm</SUPPORTED_BY>
    <MAX_TASKS_PER_NODE>56</MAX_TASKS_PER_NODE>
    <MAX_TASKS_PER_NODE compiler="oneapi-ifx">208</MAX_TASKS_PER_NODE>
    <MAX_TASKS_PER_NODE compiler="oneapi-ifxgpu">56</MAX_TASKS_PER_NODE>
    <MAX_MPITASKS_PER_NODE>56</MAX_MPITASKS_PER_NODE>
    <MAX_MPITASKS_PER_NODE compiler="oneapi-ifx">64</MAX_MPITASKS_PER_NODE>
    <MAX_MPITASKS_PER_NODE compiler="oneapi-ifxgpu">56</MAX_MPITASKS_PER_NODE>
    <PROJECT_REQUIRED>FALSE</PROJECT_REQUIRED>
    <mpirun mpilib="impi">
      <executable>mpirun</executable>
      <arguments>
        <arg name="num_tasks"> -np {{ total_tasks }}</arg>
      </arguments>
    </mpirun>
    <module_system type="none"/>
     <RUNDIR>$CIME_OUTPUT_ROOT/$CASE/run</RUNDIR>
     <EXEROOT>$CIME_OUTPUT_ROOT/$CASE/bld</EXEROOT>
     <environment_variables>
        <env name="NETCDF_PATH">/home/lujingyu/nc_pnc2023_intel2024</env>
        <!--env name="PNETCDF_PATH">/home/lujingyu/nc_pnc2023_intel2024</env-->
        <!--env name="MKL_PATH">/opt/intel/oneapi/mkl/2024.0/</env-->
        <env name="LD_LIBRARY_PATH">/home/lujingyu/nc_pnc2023_intel2024/lib:$ENV{LD_LIBRARY_PATH} </env> <!-- -lnetcdf -lnetcdff -lpnetcdf-->
        <env name="PATH">/home/lujingyu/nc_pnc2023_intel2024/bin:$ENV{PATH}</env>
     </environment_variables>
     <environment_variables mpilib="impi">
        <env name="I_MPI_DEBUG">10</env> <!--调试级别-->
        <env name="I_MPI_OFFLOAD">1</env>
        <!-- <env name="I_MPI_PIN_DOMAIN">omp</env> Intel MPI 中用于进程绑定的域 -->
        <!-- <env name="I_MPI_PIN_ORDER">spread</env> 进程绑定时的顺序, spread 表示将进程按照散列分布到CPU核心 -->
        <!-- <env name="I_MPI_PIN_CELL">unit</env> 将进程绑定到处理器的基本执行单元(通常是 CPU 核心) -->
     </environment_variables>
     <environment_variables compiler="oneapi-ifxgpu"> 
        <env name="ONEAPI_DEVICE_SELECTOR">"opencl:gpu;level_zero:gpu"</env> 
        <!-- <env name="ONEAPI_MPICH_GPU">NO_GPU</env> OneAPI MPICH 库不使用 GPU -->
        <!-- <env name="MPIR_CVAR_ENABLE_GPU">0</env> MPICH库禁用 GPU -->
        <!-- <env name="romio_cb_read">disable</env> 禁用ROMIO(MPI I/O 库)的回调功能 -->
        <!-- <env name="romio_cb_write">disable</env> -->
        <env name="SYCL_CACHE_PERSISTENT">1</env> <!--SYCL 编程模型中缓存的持久性: 1 启用-->
        <env name="GATOR_INITIAL_MB">4000MB</env>
        <env name="GATOR_DISABLE">0</env>
        <!-- <env name="GPU_TILE_COMPACT">/soft/tools/mpi_wrapper_utils/gpu_tile_compact.sh</env> --> <!--管理 GPU 内存布局的紧凑性-->
        <env name="FI_CXI_DEFAULT_CQ_SIZE">131072</env>
        <env name="FI_CXI_CQ_FILL_PERCENT">20</env>
    </environment_variables>
    <environment_variables compiler="oneapi-ifx">
        <env name="LIBOMPTARGET_DEBUG">0</env><!--default 0, max 5 -->
        <env name="OMP_TARGET_OFFLOAD">DISABLED</env><!--default OMP_TARGET_OFFLOAD=MANDATORY-->
        <env name="FI_CXI_DEFAULT_CQ_SIZE">131072</env>
        <env name="FI_CXI_CQ_FILL_PERCENT">20</env>
        <env name="MPIR_CVAR_ENABLE_GPU">0</env>
        <env name="GPU_TILE_COMPACT"> </env>
    </environment_variables>
    <resource_limits>
        <resource name="RLIMIT_STACK">-1</resource>
    </resource_limits>
  </machine>
  1. scream/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake
if (compile_threaded)
  string(APPEND CMAKE_C_FLAGS   " -qopenmp")
  string(APPEND CMAKE_Fortran_FLAGS   " -qopenmp")
  string(APPEND CMAKE_CXX_FLAGS " -qopenmp")
  string(APPEND CMAKE_EXE_LINKER_FLAGS  " -qopenmp")
endif()
string(APPEND CMAKE_C_FLAGS_RELEASE   " -O2")
string(APPEND CMAKE_Fortran_FLAGS_RELEASE   " -O2")
string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2")
string(APPEND CMAKE_Fortran_FLAGS_DEBUG   " -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created")
string(APPEND CMAKE_C_FLAGS_DEBUG   " -O0 -g")
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -O0 -g")
string(APPEND CMAKE_C_FLAGS   " -traceback -fp-model precise -std=gnu99")
string(APPEND CMAKE_CXX_FLAGS " -traceback -fp-model precise")
string(APPEND CMAKE_Fortran_FLAGS   " -traceback -convert big_endian -assume byterecl -assume realloc_lhs -fp-model precise ")
string(APPEND CPPDEFS " -DFORTRANUNDERSCORE -DNO_R16 -DCPRINTEL -DHAVE_SLASHPROC -DHIDE_MPI")
string(APPEND CMAKE_Fortran_FORMAT_FIXED_FLAG " -fixed -132")
string(APPEND CMAKE_Fortran_FORMAT_FREE_FLAG " -free")
set(HAS_F2008_CONTIGUOUS "TRUE")
set(MPIFC "mpiifx")
set(MPICC "mpiicx")
set(MPICXX "mpiicpx")
set(SCC "icx")
set(SCXX "icpx")
set(SFC "ifx")
string(APPEND CMAKE_EXE_LINKER_FLAGS " -fiopenmp -fopenmp-targets=spir64 ") 
set(USE_SYCL "TRUE")
set (EAMXX_ENABLE_GPU TRUE CACHE BOOL "") 
string(APPEND SYCL_FLAGS " -fsycl -fsycl-targets=spir64  ") #-linux-intel_gpu_pvc -Xsycl-target-backend Xe-MAX  -sycl-std=121 
string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_INTEL_PVC=On -DKokkos_ENABLE_SYCL=On -DCMAKE_CXX_STANDARD=17")
  1. scream/components/eamxx/cmake/machine-files/PVC1100.cmake
 include(${CMAKE_CURRENT_LIST_DIR}/common.cmake)
common_setup()
# Load all kokkos settings from Ekat's mach file
include (${EKAT_MACH_FILES_PATH}/kokkos/intel-pvc.cmake)
  1. scream/externals/ekat/cmake/machine-files/PVC1100.cmake
# Load PVC arch with SYCL backend for kokkos
include (${CMAKE_CURRENT_LIST_DIR}/kokkos/intel-pvc.cmake)

Here's my case: (is this F2000-SCREAM-SA @ ne30pg2_ne30pg2 the best test case?)

./create_newcase --case test1 --compset F2000-SCREAM-SA --res ne30pg2_ne30pg2 --mach PVC1100 --compiler oneapi-ifxgpu --mpilib impi 

And here are all my log files, bld.zip and the error is:

/opt/intel/oneapi/compiler/2024.0/bin/compiler/../../include/sycl/types.hpp:2382:17: error: ambiguous partial specializations of 'is_device_copyable<const Kokkos::Experimental::Impl::SYCLFunctionWrapper<Kokkos::Impl::ViewCopy<Kokkos::View<double *****, Kokkos::LayoutRight, Kokkos::Device<Kokkos::Experimental::SYCL, Kokkos::AnonymousSpace>, Kokkos::MemoryTraits<0>>, Kokkos::View<const double *****, Kokkos::LayoutRight, Kokkos::Device<Kokkos::Serial, Kokkos::AnonymousSpace>, Kokkos::MemoryTraits<0>>, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, 5, long>, Kokkos::Experimental::Impl::SYCLInternal::USMObjectMem<sycl::usm::alloc::host>>>'
 2382 |   static_assert(is_device_copyable<FieldT>::value ||

I'm fresh on kokkos and SYCL, so I'm confused to the link between kokkos and SYCL, and the backend target with intel PVC GPU, is there anything wrong in my config files and lead to this error? Looking forward to the reply, thanks again!

Also, I have found some small bugs:

  1. in scream/externals/ekat/extern/kokkos/core/src/../../tpls/desul/include/desul/atomics/SYCLConversions.hpp, line 23: the name space seems like to be in ::sycl instead of ::sycl::ext::oneapi using intel oneapi 2024;
  2. in scream/components/homme/src/share/compose/cedr_kokkos.hpp: line 21: an unexpected > appeared here typedef Kokkos::Experimental::SYCL> CedrGpuSpace;

lulu1599 avatar Feb 28 '24 03:02 lulu1599