finetune-transformer-lm
finetune-transformer-lm copied to clipboard
CPU support
Hi
Does the code run on CPU?
yes.
FYI it takes around 15 minutes on ~80 CPU cores (2xIntel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz) to run python train.py --dataset rocstories --desc rocstories --submit --analysis --data_dir ./roc
.
vs. around 8 minutes on 1 GPU (GeForce GTX 1080), and ~5 minutes on 2 GPU (GeForce GTX 1080),
@FrankWork do you mind sharing your cpu version (train.py file) which works?, I tried running on cpu but got some issues, thanks
I didn't make any change to the code.
On 18 July 2018 at 15:56, Mehdi Mashayekhi [email protected] wrote:
@FrankWork https://github.com/FrankWork do you mind sharing your cpu version (train.py file) which works?, I tried running on cpu but got some issues, thanks
— You are receiving this because you commented. Reply to this email directly, view it on GitHub https://github.com/openai/finetune-transformer-lm/issues/3#issuecomment-406099495, or mute the thread https://github.com/notifications/unsubscribe-auth/AAA740_tXHMJgd5NWe_1HSv5kGs9wc69ks5uH704gaJpZM4UoRqx .
change the following functions (it will be slow)
def mgpu_train(*xs):
# gpu_ops = []
# gpu_grads = []
# xs = (tf.split(x, n_gpu, 0) for x in xs)
# for i, xs in enumerate(zip(*xs)):
# do_reuse = True if i > 0 else None
# with tf.device(assign_to_gpu(i, "/gpu:0")), tf.variable_scope(tf.get_variable_scope(), reuse=do_reuse):
clf_logits, clf_losses, lm_losses = model(*xs, train=True, reuse=do_reuse)
if lm_coef > 0:
train_loss = tf.reduce_mean(clf_losses) + lm_coef*tf.reduce_mean(lm_losses)
else:
train_loss = tf.reduce_mean(clf_losses)
params = find_trainable_variables("model")
grads = tf.gradients(train_loss, params)
# grads = list(zip(grads, params))
# gpu_grads.append(grads)
# gpu_ops.append([clf_logits, clf_losses, lm_losses])
# ops = [tf.concat(op, 0) for op in zip(*gpu_ops)]
# grads = average_grads(gpu_grads)
# grads = [g for g, p in grads]
train = opt_fns[opt](params, grads, lr, partial(lr_schedules[lr_schedule], warmup=lr_warmup), n_updates_total, l2=l2, max_grad_norm=max_grad_norm, vector_l2=vector_l2, b1=b1, b2=b2, e=e)
# return [train]+ops
return [train, clf_logits, clf_losses, lm_losses]
def mgpu_predict(*xs):
# gpu_ops = []
# xs = (tf.split(x, n_gpu, 0) for x in xs)
# for i, xs in enumerate(zip(*xs)):
# with tf.device(assign_to_gpu(i, "/gpu:0")), tf.variable_scope(tf.get_variable_scope(), reuse=True):
clf_logits, clf_losses, lm_losses = model(*xs, train=False, reuse=True)
# gpu_ops.append([clf_logits, clf_losses, lm_losses])
# ops = [tf.concat(op, 0) for op in zip(*gpu_ops)]
# return ops
return [clf_logits, clf_losses, lm_losses]
Thanks