quda
quda copied to clipboard
GPU_MULTIGRID_DOUBLE build fail
Enabling GPU_MULTIGRID_DOUBLE in the generic_kernels branch cause a build fail with the following error:
/scratch/CPviolator/work/QUDA_COMPRESSED_TRLM/quda/lib/../include/kernels/coarse_op_preconditioned_mma.cuh(122): error: argument of type "double *" is incompatible with parameter of type "float *"
detected during:
instantiation of "void quda::mma::CalculateYhatMMA<Arg>::operator()() [with Arg=quda::mma::CalculateYhatMMAArg<const quda::CalculateYhatArg<double, quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, quda::gauge::FieldOrder<float, 64, 1, (QudaGaugeFieldOrder)13, true, float>, 64, 4, 2, false>, 64, 64, 64, 32, 16, 1>]"
/scratch/CPviolator/work/QUDA_COMPRESSED_TRLM/quda/lib/../include/targets/cuda/kernel.h(74): here
instantiation of "void quda::raw_kernel<Functor,Arg,grid_stride>(Arg) [with Functor=quda::mma::CalculateYhatMMA, Arg=quda::mma::CalculateYhatMMAArg<const quda::CalculateYhatArg<double, quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, quda::gauge::FieldOrder<float, 64, 1, (QudaGaugeFieldOrder)13, true, float>, 64, 4, 2, false>, 64, 64, 64, 32, 16, 1>, grid_stride=false]"
/scratch/CPviolator/work/QUDA_COMPRESSED_TRLM/quda/lib/../include/tunable_nd.h(58): here
instantiation of "void quda::TunableKernel::launch_cuda<Functor,Arg>(const quda::TuneParam &, const quda::qudaStream_t &, const Arg &) const [with Functor=quda::mma::CalculateYhatMMA, Arg=quda::mma::CalculateYhatMMAArg<const quda::CalculateYhatArg<double, quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, quda::gauge::FieldOrder<float, 64, 1, (QudaGaugeFieldOrder)13, true, float>, 64, 4, 2, false>, 64, 64, 64, 32, 16, 1>]"
/scratch/CPviolator/work/QUDA_COMPRESSED_TRLM/quda/lib/coarse_op_preconditioned_mma_launch.h(47): here
instantiation of "std::enable_if<Arg::is_mma_compatible, void>::type quda::mma::launch_kernel<bM,bN,bK,block_y,block_z,min_block_cta,Arg,Tunable>(quda::TuneParam &, const quda::qudaStream_t &, Arg &, Tunable &) [with bM=64, bN=64, bK=64, block_y=32, block_z=16, min_block_cta=1, Arg=const quda::CalculateYhatArg<double, quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, quda::gauge::FieldOrder<float, 64, 1, (QudaGaugeFieldOrder)13, true, float>, 64, 4, 2, false>, Tunable=const quda::CalculateYhat<(QudaFieldLocation)1, double, quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, quda::gauge::FieldOrder<float, 64, 1, (QudaGaugeFieldOrder)13, true, float>, 64, 4, 2, false>]"
/scratch/CPviolator/work/QUDA_COMPRESSED_TRLM/quda/lib/coarse_op_preconditioned_mma_launch.h(99): here
instantiation of "std::enable_if<<expression>, int>::type quda::mma::launch_yhat_kernel(quda::TuneParam &, const quda::qudaStream_t &, Arg &, Tunable &) [with query_max=true, Arg=const quda::CalculateYhatArg<double, quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, quda::gauge::FieldOrder<float, 64, 1, (QudaGaugeFieldOrder)13, true, float>, 64, 4, 2, false>, Tunable=const quda::CalculateYhat<(QudaFieldLocation)1, double, quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, quda::gauge::FieldOrder<float, 64, 1, (QudaGaugeFieldOrder)13, true, float>, 64, 4, 2, false>]"
/scratch/CPviolator/work/QUDA_COMPRESSED_TRLM/quda/lib/coarse_op_preconditioned.cu(89): here
instantiation of "__nv_bool quda::CalculateYhat<location, Float_, PreconditionedGauge, Gauge, GaugeInv, n, M, N, compute_max>::advanceTuneParam(quda::TuneParam &) const [with location=(QudaFieldLocation)1, Float_=double, PreconditionedGauge=quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, Gauge=quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, GaugeInv=quda::gauge::FieldOrder<float, 64, 1, (QudaGaugeFieldOrder)13, true, float>, n=64, M=4, N=2, compute_max=false]"
/scratch/CPviolator/work/QUDA_COMPRESSED_TRLM/quda/lib/coarse_op_preconditioned.cu(59): here
instantiation of "quda::CalculateYhat<location, Float_, PreconditionedGauge, Gauge, GaugeInv, n, M, N, compute_max>::CalculateYhat(quda::GaugeField &, const quda::GaugeField &, const quda::GaugeField &, __nv_bool) [with location=(QudaFieldLocation)1, Float_=double, PreconditionedGauge=quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, Gauge=quda::gauge::FieldOrder<double, 64, 1, (QudaGaugeFieldOrder)13, true, double>, GaugeInv=quda::gauge::FieldOrder<float, 64, 1, (QudaGaugeFieldOrder)13, true, float>, n=64, M=4, N=2, compute_max=false]"
/scratch/CPviolator/work/QUDA_COMPRESSED_TRLM/quda/lib/coarse_op_preconditioned.cu(203): here
instantiation of "void quda::calculateYhat<location,storeFloat,Float,N,gOrder>(quda::GaugeField &, quda::GaugeField &, const quda::GaugeField &, const quda::GaugeField &, __nv_bool) [with location=QUDA_CPU_FIELD_LOCATION, storeFloat=double, Float=double, N=64, gOrder=QUDA_QDP_GAUGE_ORDER]"
/scratch/CPviolator/work/QUDA_COMPRESSED_TRLM/quda/lib/coarse_op_preconditioned.cu(252): here
instantiation of "void quda::calculateYhat<storeFloat,Float,N>(quda::GaugeField &, quda::GaugeField &, const quda::GaugeField &, const quda::GaugeField &, __nv_bool) [with storeFloat=double, Float=double, N=64]"
/scratch/CPviolator/work/QUDA_COMPRESSED_TRLM/quda/lib/coarse_op_preconditioned.cu(268): here
instantiation of "void quda::calculateYhat<storeFloat,Float>(quda::GaugeField &, quda::GaugeField &, const quda::GaugeField &, const quda::GaugeField &, __nv_bool) [with storeFloat=double, Float=double]"
/scratch/CPviolator/work/QUDA_COMPRESSED_TRLM/quda/lib/coarse_op_preconditioned.cu(290): here
49 errors detected in the compilation of "/scratch/CPviolator/work/QUDA_COMPRESSED_TRLM/quda/lib/coarse_op_preconditioned.cu".
Compiled with QUDA_PRECISCION=12
I will take a look at this. Currently my thought is to disable MG-MMA code when using double precision, since that hasn't really been tested and won't be of any practical use.
@cpviolator Just to make sure, the GPU_MULTIGRID_DOUBLE is defined through compiling in HOSTDEBUG mode? Or is it set explicitly?
@hummingtree Yeah you have to set HOSTDEBUG to add the correct flag via CMake. However, if you just edit these lines https://github.com/lattice/quda/blob/feature/generic_kernel/include/multigrid.h#L10 you can trigger GPU_MULTIGRID_DOUBLE without the debugging flag.