mgpusim icon indicating copy to clipboard operation
mgpusim copied to clipboard

Can fullyconnectedLayer becomes a benchmark on its own?

Open chz05 opened this issue 7 months ago • 1 comments

Hi, I try to simulate the fully connected layer on my own, but when I test it, whatever inputs I give it, it will run forever and never stop. I do not know what's wrong inside it. Can a fully connected layer be used as one benchmark?


import (
	"github.com/sarchlab/mgpusim/v4/amd/benchmarks/dnn/gputensor"
	"github.com/sarchlab/mgpusim/v4/amd/benchmarks/dnn/layers"
	"github.com/sarchlab/mgpusim/v4/amd/benchmarks/dnn/tensor"
	"github.com/sarchlab/mgpusim/v4/amd/driver"
)

// A Benchmark is a benchmark for the FFN Layer.
type Benchmark struct {
	driver           *driver.Driver
	context          *driver.Context
	gpus             []int
	useUnifiedMemory bool

	N           int // Batch size
	InputSize   int
	OutputSize  int
	EnableBackward bool

	layer       *layers.FullyConnectedLayer
	operator    *gputensor.GPUOperator

	forwardIn   tensor.Tensor
	backwardIn  tensor.Tensor
}

//new fully connected layer.
func NewBenchmark(driver *driver.Driver) *Benchmark {
	b := &Benchmark{
		driver: driver,
	}

	b.context = b.driver.Init()
	b.operator = gputensor.NewGPUOperator(b.driver, b.context)
	b.operator.ReportTime()

	return b
}

// EnableVerification configures the benchmark to verify the result.
func (b *Benchmark) EnableVerification() {
	b.operator.EnableVerification()
}

// SelectGPU selects the GPU to run the benchmark on.
func (b *Benchmark) SelectGPU(gpus []int) {
	if len(gpus) > 1 {
		panic("fully connected layer benchmark can only run on a single GPU for now.")
	}

	b.gpus = gpus
}

// SetUnifiedMemory configures the benchmark to use unified memory.
func (b *Benchmark) SetUnifiedMemory() {
	b.useUnifiedMemory = true
}

// Run runs the benchmark.
func (b *Benchmark) Run() {
	b.driver.SelectGPU(b.context, b.gpus[0])
	b.initMem()
	b.exec()
}


func (b *Benchmark) initMem() {
	b.layer = layers.NewFullyConnectedLayer(
		0,
		b.operator,
		b.InputSize,
		b.OutputSize,
	)
	b.layer.Randomize()

	// Forward input shape is (N, InputSize)
	b.forwardIn = b.operator.Zeros([]int{b.N, b.InputSize})

	if b.EnableBackward {
		// Backward input shape is (N, OutputSize)
		b.backwardIn = b.operator.Zeros([]int{b.N, b.OutputSize})
	}
}

func (b *Benchmark) exec() {
	b.layer.Forward(b.forwardIn)

	if b.EnableBackward {
		b.layer.Backward(b.backwardIn)
	}
}

func (b *Benchmark) Verify() {
	// Optional: verify correctness
}

chz05 avatar May 20 '25 21:05 chz05

A fully connected layer is basically a GEMM. So, you can use the GEMM benchmark instead. That is why we do not have a fully connected layer benchmark.

syifan avatar Jun 10 '25 23:06 syifan