vegasflow icon indicating copy to clipboard operation
vegasflow copied to clipboard

`tensorflow-metal` is not supported, errors when using tf with GPU on a mac - Some of the examples on doc does not work

Open jackaraz opened this issue 5 months ago • 4 comments

Description

Hi, I'm trying to execute some of the examples that you have on your documentation. However, it's having some device-related issues. I don't have NVIDIA GPU (it's an M1 Mac), but it automatically sets the device to GPU even though the device seems to be set to False.

print(sampler.devices)
# {'/device:GPU:0': False}

Is it possible to change the use to CPU instead? I couldn't find a set device function in Vegasflow. Should it be done through tensorflow?

Thanks for your help. Cheers

Code example

from vegasflow import VegasFlow, run_eager
import tensorflow as tf

run_eager(True)

def my_complicated_fun(xarr, **kwargs):
  return tf.reduce_sum(xarr, axis=1)

n_dim = 10
n_events = int(1e5)
sampler = VegasFlow(n_dim, n_events, verbose=False)
sampler.compile(my_complicated_fun)

# Now let's train the integrator for 10 iterations
_ = sampler.run_integration(10)

# Now we can use sampler to generate random numbers
rnds, _, px = sampler.generate_random_array(100)

Additional information

import tensorflow as tf
import vegasflow

print(f"Vegasflow: {vegasflow.__version__}")
# Vegasflow: 1.3.0
print(f"Tensorflow: {tf.__version__}")
# Tensorflow: 2.14.0
print(f"tf-mkl: {tf.python.framework.test_util.IsMklEnabled()}")
# AttributeError: module 'tensorflow' has no attribute 'python'
print(f"tf-cuda: {tf.test.is_built_with_cuda()}")
# tf-cuda: False
print(f"GPU available: {tf.test.is_gpu_available()}")
# GPU available: True

Traceback

[INFO] (vegasflow.monte_carlo) Checking whether the integrand outputs the correct shape (note, this will run a very small number of events and potentially trigger a retrace)
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
Cell In[103], line 15
     12 sampler.compile(my_complicated_fun)
     14 # Now let's train the integrator for 10 iterations
---> 15 _ = sampler.run_integration(10)
     17 # Now we can use sampler to generate random numbers
     18 rnds, _, px = sampler.generate_random_array(100)

File ~/packages/miniconda3/lib/python3.9/site-packages/vegasflow/monte_carlo.py:682, in MonteCarloFlow.run_integration(self, n_iter, log_time, histograms)
    679     start = time.time()
    681 # Run one single iteration and append results
--> 682 res, error = self._run_iteration()
    683 all_results.append((res, error))
    685 # If there is a histogram variable, store it and empty it

File ~/packages/miniconda3/lib/python3.9/site-packages/vegasflow/vflow.py:447, in VegasFlow._run_iteration(self)
    445 def _run_iteration(self):
    446     """Runs one iteration of the Vegas integrator"""
--> 447     return self._iteration_content()

File ~/packages/miniconda3/lib/python3.9/site-packages/vegasflow/vflow.py:442, in VegasFlow._iteration_content(self)
    440 # If training is active, act post integration
    441 if self.train:
--> 442     self.refine_grid(arr_res2)
    443 return res, sigma

File ~/packages/miniconda3/lib/python3.9/site-packages/vegasflow/vflow.py:363, in VegasFlow.refine_grid(self, arr_res2)
    361 for j in range(self.n_dim):
    362     new_divisions = refine_grid_per_dimension(arr_res2[j, :], self.divisions[j, :])
--> 363     self.divisions[j, :].assign(new_divisions)

File ~/packages/miniconda3/lib/python3.9/site-packages/tensorflow/python/ops/array_ops.py:1359, in strided_slice.<locals>.assign(val, name)
   1356 if name is None:
   1357   name = parent_name + "_assign"
-> 1359 return var._strided_slice_assign(
   1360     begin=begin,
   1361     end=end,
   1362     strides=strides,
   1363     value=val,
   1364     name=name,
   1365     begin_mask=begin_mask,
   1366     end_mask=end_mask,
   1367     ellipsis_mask=ellipsis_mask,
   1368     new_axis_mask=new_axis_mask,
   1369     shrink_axis_mask=shrink_axis_mask)

File ~/packages/miniconda3/lib/python3.9/site-packages/tensorflow/python/ops/resource_variable_ops.py:1523, in BaseResourceVariable._strided_slice_assign(self, begin, end, strides, value, name, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask)
   1518 def _strided_slice_assign(self, begin, end, strides, value, name, begin_mask,
   1519                           end_mask, ellipsis_mask, new_axis_mask,
   1520                           shrink_axis_mask):
   1521   with _handle_graph(self.handle), self._assign_dependencies():
   1522     return self._lazy_read(
-> 1523         gen_array_ops.resource_strided_slice_assign(
   1524             ref=self.handle,
   1525             begin=begin,
   1526             end=end,
   1527             strides=strides,
   1528             value=ops.convert_to_tensor(value, dtype=self.dtype),
   1529             name=name,
   1530             begin_mask=begin_mask,
   1531             end_mask=end_mask,
   1532             ellipsis_mask=ellipsis_mask,
   1533             new_axis_mask=new_axis_mask,
   1534             shrink_axis_mask=shrink_axis_mask))

File ~/packages/miniconda3/lib/python3.9/site-packages/tensorflow/python/ops/gen_array_ops.py:8826, in resource_strided_slice_assign(ref, begin, end, strides, value, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask, name)
   8824   return _result
   8825 except _core._NotOkStatusException as e:
-> 8826   _ops.raise_from_not_ok_status(e, name)
   8827 except _core._FallbackException:
   8828   pass

File ~/packages/miniconda3/lib/python3.9/site-packages/tensorflow/python/framework/ops.py:5888, in raise_from_not_ok_status(e, name)
   5886 def raise_from_not_ok_status(e, name) -> NoReturn:
   5887   e.message += (" name: " + str(name if name is not None else ""))
-> 5888   raise core._status_to_exception(e) from None

InvalidArgumentError: Cannot assign a device for operation ResourceStridedSliceAssign: Could not satisfy explicit device specification '/job:localhost/replica:0/task:0/device:GPU:0' because no supported kernel for GPU devices is available.
Colocation Debug Info:
Colocation group had the following types and supported devices: 
Root Member(assigned_device_name_index_=1 requested_device_name_='/job:localhost/replica:0/task:0/device:GPU:0' assigned_device_name_='/job:localhost/replica:0/task:0/device:GPU:0' resource_device_name_='/job:localhost/replica:0/task:0/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]
ResourceStridedSliceAssign: CPU 
_Arg: GPU CPU 

Colocation members, user-requested devices, and framework assigned devices, if any:
  ref (_Arg)  framework assigned device=/job:localhost/replica:0/task:0/device:GPU:0
  ResourceStridedSliceAssign (ResourceStridedSliceAssign) /job:localhost/replica:0/task:0/device:GPU:0

Op: ResourceStridedSliceAssign
Node attrs: new_axis_mask=0, Index=DT_INT32, shrink_axis_mask=1, end_mask=2, ellipsis_mask=0, begin_mask=2, T=DT_DOUBLE
Registered kernels:
  device='XLA_CPU_JIT'; Index in [DT_INT32, DT_INT64]; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16, DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8, DT_QUINT8, DT_QINT32, DT_BFLOAT16, DT_UINT16, DT_COMPLEX128, DT_HALF, DT_UINT32, DT_UINT64, DT_FLOAT8_E5M2, DT_FLOAT8_E4M3FN]
  device='DEFAULT'; T in [DT_INT32]
  device='CPU'; T in [DT_UINT64]
  device='CPU'; T in [DT_INT64]
  device='CPU'; T in [DT_UINT32]
  device='CPU'; T in [DT_UINT16]
  device='CPU'; T in [DT_INT16]
  device='CPU'; T in [DT_UINT8]
  device='CPU'; T in [DT_INT8]
  device='CPU'; T in [DT_INT32]
  device='CPU'; T in [DT_HALF]
  device='CPU'; T in [DT_BFLOAT16]
  device='CPU'; T in [DT_FLOAT]
  device='CPU'; T in [DT_DOUBLE]
  device='CPU'; T in [DT_COMPLEX64]
  device='CPU'; T in [DT_COMPLEX128]
  device='CPU'; T in [DT_BOOL]
  device='CPU'; T in [DT_STRING]
  device='CPU'; T in [DT_RESOURCE]
  device='CPU'; T in [DT_VARIANT]
  device='CPU'; T in [DT_QINT8]
  device='CPU'; T in [DT_QUINT8]
  device='CPU'; T in [DT_QINT32]

	 [[{{node ResourceStridedSliceAssign}}]] [Op:ResourceStridedSliceAssign] name: strided_slice/_assign

jackaraz avatar Jan 26 '24 16:01 jackaraz