qiskit-qcgpu-provider icon indicating copy to clipboard operation
qiskit-qcgpu-provider copied to clipboard

Power9 RHEL 7.6 BUILD_PROGRAM_FAILURE

Open jwoehr opened this issue 6 years ago • 1 comments

Problem

qcgpu fails on Power9 RHEL 7.6 as follows with BUILD_PROGRAM_FAILURE for any program I try.

Here is the example qcgpu_qasm.py provided with the module

qiskit_dev_env) [jwoehr@power9-vm8 examples]$ python qcgpu_qasm.py 
QCGPU backends:  [<QCGPUStatevectorSimulator('statevector_simulator') from QCGPU Provider()>, <QCGPUQasmSimulator('qasm_simulator') from QCGPU Provider()>]
Choose platform:
[0] <pyopencl.Platform 'NVIDIA CUDA' at 0x2d77bce0>
Choice [0]:0
Set the environment variable PYOPENCL_CTX='0' to avoid being asked again.
Traceback (most recent call last):
  File "qcgpu_qasm.py", line 34, in <module>
    job_sim = execute(qc, backend_sim)
  File "/scratch/work/Qiskit/DEV/qiskit-terra/qiskit/execute.py", line 222, in execute
    return backend.run(qobj, **run_config)
  File "/scratch/work/Qiskit/DEV/qiskit-community-qcgpu-provider/qiskit_qcgpu_provider/qasm_simulator.py", line 136, in run
    qcgpu.backend._create_context()
  File "/home/jwoehr/work/Qiskit/qiskit_dev_env/lib64/python3.6/site-packages/qcgpu/backend.py", line 455, in _create_context
    program = cl.Program(context, kernel).build(options="-cl-no-signed-zeros -cl-mad-enable -cl-fast-relaxed-math")
  File "/home/jwoehr/work/Qiskit/qiskit_dev_env/lib64/python3.6/site-packages/pyopencl/__init__.py", line 510, in build
    options_bytes=options_bytes, source=self._source)
  File "/home/jwoehr/work/Qiskit/qiskit_dev_env/lib64/python3.6/site-packages/pyopencl/__init__.py", line 554, in _build_and_catch_errors
    raise err
pyopencl._cl.RuntimeError: clBuildProgram failed: BUILD_PROGRAM_FAILURE - clBuildProgram failed: BUILD_PROGRAM_FAILURE - clBuildProgram failed: BUILD_PROGRAM_FAILURE

Build on <pyopencl.Device 'Tesla V100-SXM2-16GB' on 'NVIDIA CUDA' at 0x2d77a400>:


(options: -cl-no-signed-zeros -cl-mad-enable -cl-fast-relaxed-math -I /scratch/work/Qiskit/qiskit_dev_env/lib/python3.6/site-packages/pyopencl/cl)
(source saved as /tmp/tmpq416129z.cl)
(qiskit_dev_env) [jwoehr@power9-vm8 examples]$ cat /tmp/tmpq416129z.cl

#include <pyopencl-complex.h>

/*
 * Returns the nth number where a given digit
 * is cleared in the binary representation of the number
 */
static int nth_cleared(int n, int target)
{
    int mask = (1 << target) - 1;
    int not_mask = ~mask;

    return (n & mask) | ((n & not_mask) << 1);
}

///////////////////////////////////////////////
// KERNELS
///////////////////////////////////////////////

/*
 * Applies a single qubit gate to the register.
 * The gate matrix must be given in the form:
 *
 *  A B
 *  C D
 */
__kernel void apply_gate(
    __global cfloat_t *amplitudes,
    int target,
    cfloat_t A,
    cfloat_t B,
    cfloat_t C,
    cfloat_t D)
{
    int const global_id = get_global_id(0);

    int const zero_state = nth_cleared(global_id, target);

    // int const zero_state = state & (~(1 << target)); // Could just be state
    int const one_state = zero_state | (1 << target);

    cfloat_t const zero_amp = amplitudes[zero_state];
    cfloat_t const one_amp = amplitudes[one_state];

    amplitudes[zero_state] = cfloat_add(cfloat_mul(A, zero_amp), cfloat_mul(B, one_amp));
    amplitudes[one_state] = cfloat_add(cfloat_mul(D, one_amp), cfloat_mul(C, zero_amp));
}

/*
 * Applies a controlled single qubit gate to the register.
 */
__kernel void apply_controlled_gate(
    __global cfloat_t *amplitudes,
    int control,
    int target,
    cfloat_t A,
    cfloat_t B,
    cfloat_t C,
    cfloat_t D)
{
    int const global_id = get_global_id(0);
    int const zero_state = nth_cleared(global_id, target);
    int const one_state = zero_state | (1 << target); // Set the target bit

    int const control_val_zero = (((1 << control) & zero_state) > 0) ? 1 : 0;
    int const control_val_one = (((1 << control) & one_state) > 0) ? 1 : 0;

    cfloat_t const zero_amp = amplitudes[zero_state];
    cfloat_t const one_amp = amplitudes[one_state];

    if (control_val_zero == 1)
    {
        amplitudes[zero_state] = cfloat_add(cfloat_mul(A, zero_amp), cfloat_mul(B, one_amp));
    }

    if (control_val_one == 1)
    {
        amplitudes[one_state] = cfloat_add(cfloat_mul(D, one_amp), cfloat_mul(C, zero_amp));
    }
}

/*
 * Applies a controlled-controlled single qubit gate to the register.
 */
__kernel void apply_controlled_controlled_gate(
    __global cfloat_t *amplitudes,
    int control,
    int control_2,
    int target,
    cfloat_t A,
    cfloat_t B,
    cfloat_t C,
    cfloat_t D)
{
    int const global_id = get_global_id(0);
    int const zero_state = nth_cleared(global_id, target);
    int const one_state = zero_state | (1 << target); // Set the target bit

    int const control_val_zero = (((1 << control) & zero_state) > 0) ? 1 : 0;
    int const control_val_one = (((1 << control) & one_state) > 0) ? 1 : 0;
    int const control_val_two_zero = (((1 << control_2) & zero_state) > 0) ? 1 : 0;
    int const control_val_two_one = (((1 << control_2) & one_state) > 0) ? 1 : 0;

    cfloat_t const zero_amp = amplitudes[zero_state];
    cfloat_t const one_amp = amplitudes[one_state];

    if (control_val_zero == 1 && control_val_two_zero == 1)
    {
        amplitudes[zero_state] = cfloat_add(cfloat_mul(A, zero_amp), cfloat_mul(B, one_amp));
    }

    if (control_val_one == 1 && control_val_two_one == 1)
    {
        amplitudes[one_state] = cfloat_add(cfloat_mul(D, one_amp), cfloat_mul(C, zero_amp));
    }
}

/*
 * Swaps the states of two qubits in the register
 * NOT MIGRATED
 */
// __kernel void swap(
//     __global cfloat_t *const amplitudes,
//     __global cfloat_t *amps,
//     int first_qubit,
//     int second_qubit)
// {
//     int const state = get_global_id(0);

//     int const first_bit_mask = 1 << first_qubit;
//     int const second_bit_mask = 1 << second_qubit;

//     int const new_second_bit = ((state & first_bit_mask) >> first_qubit) << second_qubit;
//     int const new_first_bit = ((state & second_bit_mask) >> second_qubit) << first_qubit;

//     int const new_state = (state & !first_bit_mask & !second_bit_mask) | new_first_bit | new_second_bit;

//     amps[new_state] = amplitudes[state];
// }


/**
 * Get a single amplitude
 */
__kernel void get_single_amplitude(
    __global cfloat_t *const amplitudes,
    __global cfloat_t *out,
    int i)
{
    out[0] = amplitudes[i];
}

/**
 * Calculates The Probabilities Of A State Vector
 */
__kernel void calculate_probabilities(
    __global cfloat_t *const amplitudes,
    __global float *probabilities)
{
    int const state = get_global_id(0);
    cfloat_t amp = amplitudes[state];

    probabilities[state] = cfloat_abs(cfloat_mul(amp, amp));
}

/**
 * Initializes a register to the value 1|0..100...0>
 *                                          ^ target
 */
__kernel void initialize_register(
    __global cfloat_t *amplitudes,
    int const target)
{
    int const state = get_global_id(0);
    if (state == target)
    {
        amplitudes[state] = cfloat_new(1, 0);
    }
    else
    {
        amplitudes[state] = cfloat_new(0, 0);
    }
}

/**
 * Collapses a qubit in the register
 */
__kernel void collapse(
    __global cfloat_t *amplitudes, 
    int const target,
    int const outcome, 
    float const norm)
{
    int const state = get_global_id(0);

    if (((state >> target) & 1) == outcome) {
        amplitudes[state] = cfloat_mul(amplitudes[state], cfloat_new(norm, 0.0));
    }
    else
    {
        amplitudes[state] = cfloat_new(0.0, 0.0);
    }
}

System info

(qiskit_dev_env) [jwoehr@power9-vm8 examples]$ nvidia-smi 
Sun Sep 22 17:50:34 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.87.00    Driver Version: 418.87.00    CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  Tesla V100-SXM2...  Off  | 00000000:00:09.0 Off |                    0 |
| N/A   39C    P0    52W / 300W |      0MiB / 16130MiB |      3%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+
(qiskit_dev_env) [jwoehr@power9-vm8 examples]$ uname -a
Linux power9-vm8.xxx.xxx.xxx  4.14.0-115.10.1.el7a.ppc64le #1 SMP Wed Jun 26 09:32:17 UTC 2019 ppc64le ppc64le ppc64le GNU/Linux
(qiskit_dev_env) [jwoehr@power9-vm8 examples]$ cat /etc/os-release 
NAME="Red Hat Enterprise Linux Server"
VERSION="7.6 (Maipo)"
ID="rhel"
ID_LIKE="fedora"
VARIANT="Server"
VARIANT_ID="server"
VERSION_ID="7.6"
PRETTY_NAME="Red Hat Enterprise Linux"
ANSI_COLOR="0;31"
CPE_NAME="cpe:/o:redhat:enterprise_linux:7.6:GA:server"
HOME_URL="https://www.redhat.com/"
BUG_REPORT_URL="https://bugzilla.redhat.com/"

REDHAT_BUGZILLA_PRODUCT="Red Hat Enterprise Linux 7"
REDHAT_BUGZILLA_PRODUCT_VERSION=7.6
REDHAT_SUPPORT_PRODUCT="Red Hat Enterprise Linux"
REDHAT_SUPPORT_PRODUCT_VERSION="7.6"

jwoehr avatar Sep 23 '19 00:09 jwoehr

I don't have a way to reproduce this, do you know of somewhere to rent a power9 VM?

adamisntdead avatar Sep 23 '19 21:09 adamisntdead