VexRiscv icon indicating copy to clipboard operation
VexRiscv copied to clipboard

Instructions to save/restore register to stack is taking 2 clock each

Open ztachip opened this issue 5 months ago • 12 comments

I wrote a simple task switching that save registers and restore registers // Save registers sw s0,4(sp) sw s1,8(sp) sw s2,12(sp) sw s3,16(sp) : : // Restore registers lw s0,4(sp) lw s1,8(sp) lw s2,12(sp) : :

My stack fits entirely in data cache

But it seems it takes 2 clocks per each instruction above I have enabled all bypass in the HarzardPlugin Should the stack save/restore instructions above should take just 1 clock each.

Thanks

ztachip avatar Jan 21 '24 18:01 ztachip

Hi, What memory bus / Soc are you using ?

Dolu1990 avatar Jan 21 '24 18:01 Dolu1990

I am using AXI for bus.

package vexriscv.VexRiscvForSim

import spinal.core._ import spinal.lib._ import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} import spinal.lib.bus.amba3.apb._ import spinal.lib.bus.amba4.axi._ import spinal.lib.bus.misc.SizeMapping import spinal.lib.io.{InOutWrapper, TriStateArray} import spinal.lib.misc.{InterruptCtrl, Prescaler, Timer} import spinal.lib.soc.pinsec.{PinsecTimerCtrl, PinsecTimerCtrlExternal} import vexriscv.plugin._ import vexriscv.{VexRiscv, VexRiscvConfig, plugin} import spinal.lib.com.spi.ddr._ import spinal.lib.bus.simple._ import scala.collection.mutable.ArrayBuffer

case class RiscvConfig( coreFrequency : HertzNumber, cpuPlugins : ArrayBuffer[Plugin[VexRiscv]]){ }

object RiscvConfig{ def default : RiscvConfig = default(false) def default(bigEndian : Boolean = false) = RiscvConfig( coreFrequency = 166 MHz, cpuPlugins = ArrayBuffer( new IBusCachedPlugin( resetVector = 0x00004000l, prediction = STATIC, relaxedPcCalculation = true, config = InstructionCacheConfig( cacheSize = 40962, bytePerLine =32, wayCount = 2, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 32, catchIllegalAccess = true, catchAccessFault = true, asyncTagMemory = false, twoCycleRam = true, twoCycleCache = true ) ), new DBusCachedPlugin( config = new DataCacheConfig( cacheSize = 40962, bytePerLine = 32, wayCount = 2, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 32, catchAccessError = true, catchIllegal = true, catchUnaligned = true, withLrSc = true, withAmo = true ), memoryTranslatorPortConfig = null ),

  new CsrPlugin(CsrPluginConfig.smallest(mtvecInit = 0x80000020l)),
  new DecoderSimplePlugin(
    catchIllegalInstruction = true 
  ),
  new StaticMemoryTranslatorPlugin(
    ioRange      = _(31 downto 31) === 0x1
  ),
  new RegFilePlugin(
    regFileReadyKind = plugin.ASYNC,
    zeroBoot = false
  ),
  new IntAluPlugin,
  new SrcPlugin(
    separatedAddSub = false,
    executeInsertion = true 
  ),
  new FullBarrelShifterPlugin,
  new HazardSimplePlugin(
    bypassExecute           = true,
    bypassMemory            = true,
    bypassWriteBack         = true,
    bypassWriteBackBuffer   = true,
    pessimisticUseSrc       = false,
    pessimisticWriteRegFile = false,
    pessimisticAddressMatch = false
  ),
  new MulPlugin,
  new DivPlugin,
  new BranchPlugin(
    earlyBranch = true,
    catchAddressMisaligned = true 
  ),
  new YamlPlugin("cpu0.yaml")
)

)

def fast = { val config = default //Replace HazardSimplePlugin to get datapath bypass config.cpuPlugins(config.cpuPlugins.indexWhere(_.isInstanceOf[HazardSimplePlugin])) = new HazardSimplePlugin( bypassExecute = true, bypassMemory = true, bypassWriteBack = true, bypassWriteBackBuffer = true ) config } }

case class VexRiscvForSim(config : RiscvConfig) extends Component{ import config._

val io = new Bundle { //Clocks / reset val asyncReset = in Bool() val mainClk = in Bool() val iBus = master(Axi4ReadOnly(Axi4Config(addressWidth=32,dataWidth=32,idWidth=1).toFullConfig())) val dBus = master(Axi4(Axi4Config(addressWidth=32,dataWidth=32,idWidth=1).toFullConfig())) }

val resetCtrlClockDomain = ClockDomain( clock = io.mainClk, config = ClockDomainConfig( resetKind = BOOT ) )

val resetCtrl = new ClockingArea(resetCtrlClockDomain) { val mainClkResetUnbuffered = False

//Implement an counter to keep the reset axiResetOrder high 64 cycles
// Also this counter will automatically do a reset when the system boot.
val systemClkResetCounter = Reg(UInt(6 bits)) init(0)
when(systemClkResetCounter =/= U(systemClkResetCounter.range -> true)){
  systemClkResetCounter := systemClkResetCounter + 1
  mainClkResetUnbuffered := True
}
when(BufferCC(io.asyncReset)){
  systemClkResetCounter := 0
}

//Create all reset used later in the design
val mainClkReset = RegNext(mainClkResetUnbuffered)
val systemReset  = RegNext(mainClkResetUnbuffered)

}

val systemClockDomain = ClockDomain( clock = io.mainClk, reset = resetCtrl.systemReset, frequency = FixedFrequency(coreFrequency) )

val system = new ClockingArea(systemClockDomain) {

val bigEndianDBus = config.cpuPlugins.exists(_ match{ case plugin : DBusSimplePlugin => plugin.bigEndian case _ => false})

//Instanciate the CPU
val cpu = new VexRiscv(
  config = VexRiscvConfig(
    plugins = cpuPlugins
  )
)

//Checkout plugins used to instanciate the CPU to connect them to the SoC
val timerInterrupt = False
val externalInterrupt = False
var iBus : Axi4ReadOnly = null
var dBus : Axi4 = null
for(plugin <- cpu.plugins) plugin match{
  case plugin : IBusCachedPlugin =>
    iBus = plugin.iBus.toAxi4ReadOnly().toFullConfig()
  case plugin : DBusCachedPlugin =>
    dBus = plugin.dBus.toAxi4Shared().toAxi4().toFullConfig()
  case plugin : CsrPlugin        => {
    plugin.externalInterrupt := externalInterrupt
    plugin.timerInterrupt := timerInterrupt
  }
  case _ =>
}
io.iBus <> iBus;
io.dBus <> dBus;

} }

object VexRiscvForSim{ def main(args: Array[String]) { SpinalVhdl(VexRiscvForSim(RiscvConfig.default.copy())) } }

ztachip avatar Jan 21 '24 19:01 ztachip

where you accessing the cached memory region ? or the uncached one ? Else at that stage the best is to check the simulation to see what is happening. In VexRiscv there is a few "lastStage" signals to help figuring out what is commiting.

Dolu1990 avatar Jan 22 '24 10:01 Dolu1990

Outside of VexRiscv, I instantiated some logic to split iBus/dBus between an internal RAM block (16K) and external memory (256M) based on memory address region. During the test of doing continuously stack save/restore, I see VexRiscv flushing out the stack content but I see no read. I just noticed that the total number clocks of the operation is twice of what it should be. Do you have any internal signals I can tap to see if VexRiscv is stalled on some condition? Thanks Vuong


From: Dolu1990 @.> Sent: January 22, 2024 5:05 AM To: SpinalHDL/VexRiscv @.> Cc: ztachip @.>; Author @.> Subject: Re: [SpinalHDL/VexRiscv] Instructions to save/restore register to stack is taking 2 clock each (Issue #387)

where you accessing the cached memory region ? or the uncached one ? Else at that stage the best is to check the simulation to see what is happening. In VexRiscv there is a few "lastStage" signals to help figuring out what is commiting.

— Reply to this email directly, view it on GitHubhttps://github.com/SpinalHDL/VexRiscv/issues/387#issuecomment-1903648972, or unsubscribehttps://github.com/notifications/unsubscribe-auth/ACSDUFXK3VEDZONRCACYZGLYPY2V7AVCNFSM6AAAAABCEFCC26VHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTSMBTGY2DQOJXGI. You are receiving this because you authored the thread.

ztachip avatar Jan 22 '24 13:01 ztachip

To make sure all data access fit the DataCache, My stack is only 2K. And my DataCache is 8K/2way

ztachip avatar Jan 22 '24 13:01 ztachip

I see VexRiscv flushing out the stack content but I see no read.

Its cache is write-through, so it is normal. Maybe the SoC memory system can't follow that bandwidth ?

Do you have any internal signals I can tap to see if VexRiscv is stalled on some condition?

Yes, on every stage, there is a xxx_arbitration_xxx which contains signals.

But i would say, first check if the memory buses are stuck some cycles.

Dolu1990 avatar Jan 24 '24 10:01 Dolu1990

There are no external memory cycles. The program simply pushing and poping register values to/from stack that fit entirely in cache.

But can we expect code below to take just one clock per instruction?

sw s0,4(sp) sw s1,8(sp) sw s2,12(sp) sw s3,16(sp) : : // Restore registers lw s0,4(sp) lw s1,8(sp) lw s2,12(sp) : :

ztachip avatar Feb 07 '24 05:02 ztachip

But can we expect code below to take just one clock per instruction?

Yes, it should as far as i know.

Can you share a wave file?

Dolu1990 avatar Feb 07 '24 15:02 Dolu1990

Sure, are there any VexRiscv internal signals you like me to show on wave file?


From: Dolu1990 @.> Sent: February 7, 2024 11:00 AM To: SpinalHDL/VexRiscv @.> Cc: ztachip @.>; Author @.> Subject: Re: [SpinalHDL/VexRiscv] Instructions to save/restore register to stack is taking 2 clock each (Issue #387)

But can we expect code below to take just one clock per instruction?

Yes, it should as far as i know.

Can you share a wave file?

— Reply to this email directly, view it on GitHubhttps://github.com/SpinalHDL/VexRiscv/issues/387#issuecomment-1932359214, or unsubscribehttps://github.com/notifications/unsubscribe-auth/ACSDUFUQ4OUGMNCFFZBXKZLYSOQIRAVCNFSM6AAAAABCEFCC26VHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTSMZSGM2TSMRRGQ. You are receiving this because you authored the thread.Message ID: @.***>

ztachip avatar Feb 08 '24 04:02 ztachip

All of them ^^

Dolu1990 avatar Feb 12 '24 08:02 Dolu1990

I assume you want wavefile from Verilator? My code has VHDL so Verilator does not work for it unfortunately, but I can try to create a test program with Verilog+Verilator Does this work for you? Thanks


From: Dolu1990 @.> Sent: February 12, 2024 3:52 AM To: SpinalHDL/VexRiscv @.> Cc: ztachip @.>; Author @.> Subject: Re: [SpinalHDL/VexRiscv] Instructions to save/restore register to stack is taking 2 clock each (Issue #387)

All of them ^^

— Reply to this email directly, view it on GitHubhttps://github.com/SpinalHDL/VexRiscv/issues/387#issuecomment-1938255826, or unsubscribehttps://github.com/notifications/unsubscribe-auth/ACSDUFXRL4PTC6LY3QNRJILYTHJ3RAVCNFSM6AAAAABCEFCC26VHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTSMZYGI2TKOBSGY. You are receiving this because you authored the thread.Message ID: @.***>

ztachip avatar Feb 12 '24 13:02 ztachip

doesn't need to be a wave from verilator, either a VCD or a FST is fine.

Dolu1990 avatar Feb 12 '24 13:02 Dolu1990