DenseVariational on Apple Silicon with GPU

Open isomorphicdude opened this issue 3 years ago • 0 comments

Issue:

The dense variational layer does not appear to return random values when used with Apple M1 GPU but works fine with CPU only.

Expected Behaviour:

When calling the model on the same input data, the output should be random (as the weights are sampled from probability distributions).

To reproduce:

The following code is taken from this stackoverflow post.

This code generates synthetic data

import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
tfd = tfp.distributions
tfpl = tfp.layers

physical_devices = tf.config.list_physical_devices('CPU')
print(physical_devices)  

# the following line sets the deivce
# tf.config.set_visible_devices([], 'GPU') # disable GPU

x_train = np.linspace(-1, 2, 5000)[:, np.newaxis]
y_train = np.power(x_train, 3) + 0.1*(2+x_train)*np.random.randn(5000)[:, np.newaxis]

plt.scatter(x_train, y_train, alpha=0.1)
plt.show()

The following section defines the model.

def prior(kernel_size, bias_size, dtype = None):
    
    n = kernel_size + bias_size
    
    prior_model = tf.keras.Sequential([
        
        tfpl.DistributionLambda(
        
            lambda t: tfd.MultivariateNormalDiag(loc = tf.zeros(n)  ,  scale_diag = tf.ones(n)
                                                
                                                ))
        
    ])
    
    return(prior_model)


def posterior(kernel_size, bias_size, dtype = None):
    
    n = kernel_size + bias_size
    
    posterior_model = tf.keras.Sequential([
        
        tfpl.VariableLayer(tfpl.MultivariateNormalTriL.params_size(n)  , dtype = dtype),   
        
        tfpl.MultivariateNormalTriL(n)  
        
        
    ])
    
    return(posterior_model)

x_in = tf.keras.layers.Input(shape = (1,))

x =   tfpl.DenseVariational(units=tfpl.IndependentNormal.params_size(1),
                          make_prior_fn=prior,
                          make_posterior_fn=posterior,
                          kl_weight=1/x_train.shape[0])(x_in)

y_out =    tfpl.DenseVariational(units=1,
                  make_prior_fn=prior,
                  make_posterior_fn=posterior,
                  kl_weight=1/x_train.shape[0])(x)

model = tf.keras.Model(inputs = x_in, outputs = y_out)

def nll(y_true, y_pred):
    dist = tfp.distributions.Normal(loc=y_pred, scale=1.0)
    return tf.reduce_sum(-dist.log_prob(y_true))

model.compile(loss=nll, optimizer= 'Adam')
model.summary()
history = model.fit(x_train, y_train, epochs=1, verbose=False)

The subsequent code block tries to plot the trajectories of different predictions but when it has unexpected behaviour on a GPU.

predicted = [model(x_train) for _ in range(100)]
for i, res in enumerate(predicted):
                plt.plot(x_train, res , alpha=0.1)
plt.scatter(x_train, y_train, alpha=0.1)
plt.show()

Sep 10 '22 00:09 isomorphicdude