swift-apis
swift-apis copied to clipboard
adamax/AMSGrad not working with differentiableReduce
all credit to @s1ddok for reporting this in https://github.com/tensorflow/swift-models/pull/394!
something is failing (i think block-based reduces) with the adamax/AMSGrad optimizers:
[edit: updated 2020-05-17 to match latest swift-models api changes, still failing against master]
import Datasets
import ImageClassificationModels
import TensorFlow
let batchSize = 32
let dataset = Imagewoof(batchSize: batchSize, inputSize: .full, outputSize: 224)
var model = VGG16(classCount: 10)
let optimizer = AdaMax(for: model)
print("Starting training...")
for epoch in 1...90 {
if epoch > 30 { optimizer.learningRate = 0.002 }
if epoch > 60 { optimizer.learningRate = 0.0002 }
Context.local.learningPhase = .training
var trainingLossSum: Float = 0
var trainingBatchCount = 0
for batch in dataset.training.sequenced() {
let (images, labels) = (batch.first, batch.second)
let (loss, gradients) = valueWithGradient(at: model) { model -> Tensor<Float> in
let logits = model(images)
return softmaxCrossEntropy(logits: logits, labels: labels)
}
trainingLossSum += loss.scalarized()
trainingBatchCount += 1
optimizer.update(&model, along: gradients)
}
Context.local.learningPhase = .inference
var testLossSum: Float = 0
var testBatchCount = 0
var correctGuessCount = 0
var totalGuessCount = 0
for batch in dataset.test.sequenced() {
let (images, labels) = (batch.first, batch.second)
let logits = model(images)
testLossSum += softmaxCrossEntropy(logits: logits, labels: labels).scalarized()
testBatchCount += 1
let correctPredictions = logits.argmax(squeezingAxis: 1) .== labels
correctGuessCount = correctGuessCount
+ Int(
Tensor<Int32>(correctPredictions).sum().scalarized())
totalGuessCount = totalGuessCount + batch.first.shape[0]
}
let accuracy = Float(correctGuessCount) / Float(totalGuessCount)
print(
"""
[Epoch \(epoch)] \
Accuracy: \(correctGuessCount)/\(totalGuessCount) (\(accuracy)) \
Loss: \(testLossSum / Float(testBatchCount))
"""
)
}
full trace:
Precondition failed: Count mismatch: 2 and 0: file /home/skoonce/swift/swift-sou
rce/tensorflow-swift-apis/Sources/TensorFlow/StdlibExtensions.swift, line 269
Current stack trace:
0 libswiftCore.so 0x00007f91aeca6910 swift_reportError + $
0
1 libswiftCore.so 0x00007f91aed17b90 _swift_stdlib_reportF
atalErrorInFile + 115
2 libswiftCore.so 0x00007f91ae9f7e31 <unavailable> + 14618
09
3 libswiftCore.so 0x00007f91ae9f7a77 <unavailable> + 14608
55
4 libswiftCore.so 0x00007f91ae9f8012 <unavailable> + 14622
90
5 libswiftCore.so 0x00007f91ae9f6460 _assertionFailure(_:_
:file:line:flags:) + 517
6 libswiftTensorFlow.so 0x00007f91af00e720 static Array.Differen
tiableView<A>..* infix(_:_:) + 977
7 MobileNet-Imagenette 0x00005560555c06b4 <unavailable> + 4044468
8 MobileNet-Imagenette 0x00005560555c8365 <unavailable> + 4076389
9 MobileNet-Imagenette 0x00005560555d16d2 <unavailable> + 4114130
10 libswiftCore.so 0x00007f91aec80980 dispatch thunk of static SignedInteger._maskingSubtract(_:_:) + 7
11 libswiftCore.so 0x00007f91aec9a2e0 dispatch thunk of static PointwiseMultiplicative..* infix(_:_:) + 9
12 libswiftTensorFlow.so 0x00007f91af460e00 static PointwiseMultiplicative../ infix(_:_:) + 177
13 libswiftTensorFlow.so 0x00007f91af4f5a50 AdaMax.update(_:along:) + 3626
14 MobileNet-Imagenette 0x000055605564f987 <unavailable> + 4630919
15 libc.so.6 0x00007f919666bab0 __libc_start_main + 231
16 MobileNet-Imagenette 0x000055605525292a <unavailable> + 448810
Illegal instruction (core dumped)
Thanks for opening up the issue alongside with repro case! I just want to add my 2 cents that I initially discovered this problem with AMSGrad which is not working as well.
ugh you're right i copy pasted the wrong call, fixed!