mgpusim
mgpusim copied to clipboard
Can fullyconnectedLayer becomes a benchmark on its own?
Hi, I try to simulate the fully connected layer on my own, but when I test it, whatever inputs I give it, it will run forever and never stop. I do not know what's wrong inside it. Can a fully connected layer be used as one benchmark?
import (
"github.com/sarchlab/mgpusim/v4/amd/benchmarks/dnn/gputensor"
"github.com/sarchlab/mgpusim/v4/amd/benchmarks/dnn/layers"
"github.com/sarchlab/mgpusim/v4/amd/benchmarks/dnn/tensor"
"github.com/sarchlab/mgpusim/v4/amd/driver"
)
// A Benchmark is a benchmark for the FFN Layer.
type Benchmark struct {
driver *driver.Driver
context *driver.Context
gpus []int
useUnifiedMemory bool
N int // Batch size
InputSize int
OutputSize int
EnableBackward bool
layer *layers.FullyConnectedLayer
operator *gputensor.GPUOperator
forwardIn tensor.Tensor
backwardIn tensor.Tensor
}
//new fully connected layer.
func NewBenchmark(driver *driver.Driver) *Benchmark {
b := &Benchmark{
driver: driver,
}
b.context = b.driver.Init()
b.operator = gputensor.NewGPUOperator(b.driver, b.context)
b.operator.ReportTime()
return b
}
// EnableVerification configures the benchmark to verify the result.
func (b *Benchmark) EnableVerification() {
b.operator.EnableVerification()
}
// SelectGPU selects the GPU to run the benchmark on.
func (b *Benchmark) SelectGPU(gpus []int) {
if len(gpus) > 1 {
panic("fully connected layer benchmark can only run on a single GPU for now.")
}
b.gpus = gpus
}
// SetUnifiedMemory configures the benchmark to use unified memory.
func (b *Benchmark) SetUnifiedMemory() {
b.useUnifiedMemory = true
}
// Run runs the benchmark.
func (b *Benchmark) Run() {
b.driver.SelectGPU(b.context, b.gpus[0])
b.initMem()
b.exec()
}
func (b *Benchmark) initMem() {
b.layer = layers.NewFullyConnectedLayer(
0,
b.operator,
b.InputSize,
b.OutputSize,
)
b.layer.Randomize()
// Forward input shape is (N, InputSize)
b.forwardIn = b.operator.Zeros([]int{b.N, b.InputSize})
if b.EnableBackward {
// Backward input shape is (N, OutputSize)
b.backwardIn = b.operator.Zeros([]int{b.N, b.OutputSize})
}
}
func (b *Benchmark) exec() {
b.layer.Forward(b.forwardIn)
if b.EnableBackward {
b.layer.Backward(b.backwardIn)
}
}
func (b *Benchmark) Verify() {
// Optional: verify correctness
}
A fully connected layer is basically a GEMM. So, you can use the GEMM benchmark instead. That is why we do not have a fully connected layer benchmark.