circt icon indicating copy to clipboard operation
circt copied to clipboard

[FIRRTL] Dedup memory wrapper modules in LowerMemory

Open tymcauley opened this issue 1 year ago • 6 comments

Instead of just dedup-ing the external memory module, we include the memory wrapper module in the dedup calculation.

Resolves #6445.

Diff in circt-opt -firrtl-lower-memory test/Dialect/FIRRTL/lower-memory.mlir after this change:

--- main
+++ dedup-lower-memory-modules
@@ -75,7 +75,7 @@
   firrtl.circuit "Dedup" {
     firrtl.module @Dedup() {
       %mem0_W0_addr, %mem0_W0_en, %mem0_W0_clk, %mem0_W0_data = firrtl.instance mem0 @mem0(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
-      %mem1_W0_addr, %mem1_W0_en, %mem1_W0_clk, %mem1_W0_data = firrtl.instance mem1 @mem1(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
+      %mem1_W0_addr, %mem1_W0_en, %mem1_W0_clk, %mem1_W0_data = firrtl.instance mem1 @mem0(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
     }
     firrtl.module private @mem0(in %W0_addr: !firrtl.uint<4>, in %W0_en: !firrtl.uint<1>, in %W0_clk: !firrtl.clock, in %W0_data: !firrtl.uint<42>) {
       %mem0_ext_W0_addr, %mem0_ext_W0_en, %mem0_ext_W0_clk, %mem0_ext_W0_data = firrtl.instance mem0_ext @mem0_ext(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
@@ -85,13 +85,6 @@
       firrtl.strictconnect %mem0_ext_W0_data, %W0_data : !firrtl.uint<42>
     }
     firrtl.memmodule private @mem0_ext(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>) attributes {dataWidth = 42 : ui32, depth = 12 : ui64, extraPorts = [], maskBits = 1 : ui32, numReadPorts = 0 : ui32, numReadWritePorts = 0 : ui32, numWritePorts = 1 : ui32, readLatency = 1 : ui32, writeLatency = 1 : ui32}
-    firrtl.module private @mem1(in %W0_addr: !firrtl.uint<4>, in %W0_en: !firrtl.uint<1>, in %W0_clk: !firrtl.clock, in %W0_data: !firrtl.uint<42>) {
-      %mem0_ext_W0_addr, %mem0_ext_W0_en, %mem0_ext_W0_clk, %mem0_ext_W0_data = firrtl.instance mem0_ext @mem0_ext(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
-      firrtl.strictconnect %mem0_ext_W0_addr, %W0_addr : !firrtl.uint<4>
-      firrtl.strictconnect %mem0_ext_W0_en, %W0_en : !firrtl.uint<1>
-      firrtl.strictconnect %mem0_ext_W0_clk, %W0_clk : !firrtl.clock
-      firrtl.strictconnect %mem0_ext_W0_data, %W0_data : !firrtl.uint<42>
-    }
   }
   firrtl.circuit "NoTestharnessDedup0" {
     firrtl.module @NoTestharnessDedup0() {
@@ -257,30 +250,23 @@
     hw.hierpath private @nla0 [@NonLocalAnnotation::@dut, @DUT::@mem0]
     hw.hierpath private @nla0_0 [@NonLocalAnnotation::@dut, @DUT::@mem0, @mem0]
     hw.hierpath private @nla1 [@NonLocalAnnotation::@dut, @DUT]
-    hw.hierpath private @nla1_0 [@NonLocalAnnotation::@dut, @DUT::@sym, @mem1]
+    hw.hierpath private @nla1_0 [@NonLocalAnnotation::@dut, @DUT::@sym, @mem0]
     firrtl.module @NonLocalAnnotation() {
       firrtl.instance dut sym @dut @DUT()
     }
     firrtl.module @DUT() {
       %mem0_W0_addr, %mem0_W0_en, %mem0_W0_clk, %mem0_W0_data = firrtl.instance mem0 sym @mem0 @mem0(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
-      %mem1_W0_addr, %mem1_W0_en, %mem1_W0_clk, %mem1_W0_data = firrtl.instance mem1 sym @sym @mem1(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
+      %mem1_W0_addr, %mem1_W0_en, %mem1_W0_clk, %mem1_W0_data = firrtl.instance mem1 sym @sym @mem0(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
       %MRead_read = firrtl.mem Undefined {depth = 12 : i64, name = "MRead", portNames = ["read"], readLatency = 0 : i32, writeLatency = 1 : i32} : !firrtl.bundle<addr: uint<4>, en: uint<1>, clk: clock, data flip: uint<42>>
     }
     firrtl.module private @mem0(in %W0_addr: !firrtl.uint<4>, in %W0_en: !firrtl.uint<1>, in %W0_clk: !firrtl.clock, in %W0_data: !firrtl.uint<42>) {
-      %mem0_ext_W0_addr, %mem0_ext_W0_en, %mem0_ext_W0_clk, %mem0_ext_W0_data = firrtl.instance mem0_ext sym @mem0_ext {annotations = [{circt.nonlocal = @nla0_0, class = "test0"}]} @mem0_ext(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
+      %mem0_ext_W0_addr, %mem0_ext_W0_en, %mem0_ext_W0_clk, %mem0_ext_W0_data = firrtl.instance mem0_ext sym @mem0_ext {annotations = [{circt.nonlocal = @nla0_0, class = "test0"}, {circt.nonlocal = @nla1_0, class = "test1"}]} @mem0_ext(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
       firrtl.strictconnect %mem0_ext_W0_addr, %W0_addr : !firrtl.uint<4>
       firrtl.strictconnect %mem0_ext_W0_en, %W0_en : !firrtl.uint<1>
       firrtl.strictconnect %mem0_ext_W0_clk, %W0_clk : !firrtl.clock
       firrtl.strictconnect %mem0_ext_W0_data, %W0_data : !firrtl.uint<42>
     }
     firrtl.memmodule private @mem0_ext(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>) attributes {dataWidth = 42 : ui32, depth = 12 : ui64, extraPorts = [], maskBits = 1 : ui32, numReadPorts = 0 : ui32, numReadWritePorts = 0 : ui32, numWritePorts = 1 : ui32, readLatency = 1 : ui32, writeLatency = 1 : ui32}
-    firrtl.module private @mem1(in %W0_addr: !firrtl.uint<4>, in %W0_en: !firrtl.uint<1>, in %W0_clk: !firrtl.clock, in %W0_data: !firrtl.uint<42>) {
-      %mem0_ext_W0_addr, %mem0_ext_W0_en, %mem0_ext_W0_clk, %mem0_ext_W0_data = firrtl.instance mem0_ext sym @mem0_ext {annotations = [{circt.nonlocal = @nla1_0, class = "test1"}]} @mem0_ext(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
-      firrtl.strictconnect %mem0_ext_W0_addr, %W0_addr : !firrtl.uint<4>
-      firrtl.strictconnect %mem0_ext_W0_en, %W0_en : !firrtl.uint<1>
-      firrtl.strictconnect %mem0_ext_W0_clk, %W0_clk : !firrtl.clock
-      firrtl.strictconnect %mem0_ext_W0_data, %W0_data : !firrtl.uint<42>
-    }
   }
 }

tymcauley avatar Feb 19 '24 18:02 tymcauley

In the PR comment, I don't understand @mem0 instantiating mem0_ext_0 and mem0_ext. Shouldn't @Dedup simply be instantiating mem0 twice? The change to the test in the PR looks more like what I'd expect.

Yup, you're right. There's a bug in my change right now, I need to move the memInst creation to be at the point where we define the wrapper module. I think this should also explain the issue I was having with the duplicate inner symbols.

Thanks for pointing that out!

tymcauley avatar Feb 21 '24 17:02 tymcauley

Alright, I've made a few updates:

  • MemOps is a struct instead of a pair
  • We create the external memory instance when we create the wrapper module definition, to ensure that we don't create duplicate instances inside that wrapper module.
  • We strip all NLAs from that external memory instance when we create it, and then port over the NLAs of every MemOp that now refers to that new instance.

I've updated the PR comment with the new output of LowerMemory on lower-memory.mlir.

tymcauley avatar Feb 21 '24 19:02 tymcauley

Side question: in the NonLocalAnnotation circuit in lower-memory.mlir, should the @nla0 and @nla1 non-local annotations be deleted during this pass (since they refer to the original firrtl.mem instances), or are they cleaned up elsewhere, or should we just leave them be?

I'm referring to the fact that all four of these hw.hierpath operations are in the output from this pass:

firrtl.circuit "NonLocalAnnotation" {
  hw.hierpath private @nla0 [@NonLocalAnnotation::@dut, @DUT::@mem0]
  hw.hierpath private @nla0_0 [@NonLocalAnnotation::@dut, @DUT::@mem0, @mem0]
  hw.hierpath private @nla1 [@NonLocalAnnotation::@dut, @DUT]
  hw.hierpath private @nla1_0 [@NonLocalAnnotation::@dut, @DUT::@sym, @mem0]
  ...

tymcauley avatar Feb 25 '24 22:02 tymcauley

Side question: in the NonLocalAnnotation circuit in lower-memory.mlir, should the @nla0 and @nla1 non-local annotations be deleted during this pass (since they refer to the original firrtl.mem instances), or are they cleaned up elsewhere, or should we just leave them be?

If you change then hierarchy, you need to update the hierpaths. Dedup is somewhat unique in that it has to interpret the NLAs to decide if they matter or not (e.g. block dedup). (this is part of what makes dedup fragile)

darthscsi avatar Feb 26 '24 18:02 darthscsi

Pinging @youngar and @prithayan as they are more familiar with the memory op related constraints.

darthscsi avatar Feb 26 '24 18:02 darthscsi

Sorry for the delay, I will take a look at this today.

prithayan avatar Feb 26 '24 20:02 prithayan

The changes make sense. Dead HierPathOp can be left for cleanup by later passes, but if symbol names are updated the HierPathOp needs to be updated and cannot be left in an inconsistent state. The pass is already handling them correctly.

Thanks for clearing that up @prithayan! Is there anything I should take care of before this can be merged? Let me know if you'd like me to rebase on the latest main.

tymcauley avatar Mar 01 '24 18:03 tymcauley

The changes make sense. Dead HierPathOp can be left for cleanup by later passes, but if symbol names are updated the HierPathOp needs to be updated and cannot be left in an inconsistent state. The pass is already handling them correctly.

Thanks for clearing that up @prithayan! Is there anything I should take care of before this can be merged? Let me know if you'd like me to rebase on the latest main.

Please rebase and merge it.

prithayan avatar Mar 05 '24 00:03 prithayan

The changes make sense. Dead HierPathOp can be left for cleanup by later passes, but if symbol names are updated the HierPathOp needs to be updated and cannot be left in an inconsistent state. The pass is already handling them correctly.

Thanks for clearing that up @prithayan! Is there anything I should take care of before this can be merged? Let me know if you'd like me to rebase on the latest main.

Please rebase and merge it.

I've rebased, but I don't have write permissions, so someone else will have to merge it.

tymcauley avatar Mar 05 '24 00:03 tymcauley

@prithayan just rebased again, can you merge this please? Thanks!

tymcauley avatar Mar 10 '24 22:03 tymcauley

Sorry, missed to merge it last week. Thanks for fixing the issue :) Looking forward to more future contribution @tymcauley .

prithayan avatar Mar 11 '24 02:03 prithayan

Sorry, missed to merge it last week. Thanks for fixing the issue :) Looking forward to more future contribution @tymcauley .

No problem @prithayan, thanks so much for taking care of this!

tymcauley avatar Mar 11 '24 15:03 tymcauley