circt
circt copied to clipboard
[FIRRTL] Dedup memory wrapper modules in LowerMemory
Instead of just dedup-ing the external memory module, we include the memory wrapper module in the dedup calculation.
Resolves #6445.
Diff in circt-opt -firrtl-lower-memory test/Dialect/FIRRTL/lower-memory.mlir after this change:
--- main
+++ dedup-lower-memory-modules
@@ -75,7 +75,7 @@
firrtl.circuit "Dedup" {
firrtl.module @Dedup() {
%mem0_W0_addr, %mem0_W0_en, %mem0_W0_clk, %mem0_W0_data = firrtl.instance mem0 @mem0(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
- %mem1_W0_addr, %mem1_W0_en, %mem1_W0_clk, %mem1_W0_data = firrtl.instance mem1 @mem1(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
+ %mem1_W0_addr, %mem1_W0_en, %mem1_W0_clk, %mem1_W0_data = firrtl.instance mem1 @mem0(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
}
firrtl.module private @mem0(in %W0_addr: !firrtl.uint<4>, in %W0_en: !firrtl.uint<1>, in %W0_clk: !firrtl.clock, in %W0_data: !firrtl.uint<42>) {
%mem0_ext_W0_addr, %mem0_ext_W0_en, %mem0_ext_W0_clk, %mem0_ext_W0_data = firrtl.instance mem0_ext @mem0_ext(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
@@ -85,13 +85,6 @@
firrtl.strictconnect %mem0_ext_W0_data, %W0_data : !firrtl.uint<42>
}
firrtl.memmodule private @mem0_ext(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>) attributes {dataWidth = 42 : ui32, depth = 12 : ui64, extraPorts = [], maskBits = 1 : ui32, numReadPorts = 0 : ui32, numReadWritePorts = 0 : ui32, numWritePorts = 1 : ui32, readLatency = 1 : ui32, writeLatency = 1 : ui32}
- firrtl.module private @mem1(in %W0_addr: !firrtl.uint<4>, in %W0_en: !firrtl.uint<1>, in %W0_clk: !firrtl.clock, in %W0_data: !firrtl.uint<42>) {
- %mem0_ext_W0_addr, %mem0_ext_W0_en, %mem0_ext_W0_clk, %mem0_ext_W0_data = firrtl.instance mem0_ext @mem0_ext(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
- firrtl.strictconnect %mem0_ext_W0_addr, %W0_addr : !firrtl.uint<4>
- firrtl.strictconnect %mem0_ext_W0_en, %W0_en : !firrtl.uint<1>
- firrtl.strictconnect %mem0_ext_W0_clk, %W0_clk : !firrtl.clock
- firrtl.strictconnect %mem0_ext_W0_data, %W0_data : !firrtl.uint<42>
- }
}
firrtl.circuit "NoTestharnessDedup0" {
firrtl.module @NoTestharnessDedup0() {
@@ -257,30 +250,23 @@
hw.hierpath private @nla0 [@NonLocalAnnotation::@dut, @DUT::@mem0]
hw.hierpath private @nla0_0 [@NonLocalAnnotation::@dut, @DUT::@mem0, @mem0]
hw.hierpath private @nla1 [@NonLocalAnnotation::@dut, @DUT]
- hw.hierpath private @nla1_0 [@NonLocalAnnotation::@dut, @DUT::@sym, @mem1]
+ hw.hierpath private @nla1_0 [@NonLocalAnnotation::@dut, @DUT::@sym, @mem0]
firrtl.module @NonLocalAnnotation() {
firrtl.instance dut sym @dut @DUT()
}
firrtl.module @DUT() {
%mem0_W0_addr, %mem0_W0_en, %mem0_W0_clk, %mem0_W0_data = firrtl.instance mem0 sym @mem0 @mem0(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
- %mem1_W0_addr, %mem1_W0_en, %mem1_W0_clk, %mem1_W0_data = firrtl.instance mem1 sym @sym @mem1(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
+ %mem1_W0_addr, %mem1_W0_en, %mem1_W0_clk, %mem1_W0_data = firrtl.instance mem1 sym @sym @mem0(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
%MRead_read = firrtl.mem Undefined {depth = 12 : i64, name = "MRead", portNames = ["read"], readLatency = 0 : i32, writeLatency = 1 : i32} : !firrtl.bundle<addr: uint<4>, en: uint<1>, clk: clock, data flip: uint<42>>
}
firrtl.module private @mem0(in %W0_addr: !firrtl.uint<4>, in %W0_en: !firrtl.uint<1>, in %W0_clk: !firrtl.clock, in %W0_data: !firrtl.uint<42>) {
- %mem0_ext_W0_addr, %mem0_ext_W0_en, %mem0_ext_W0_clk, %mem0_ext_W0_data = firrtl.instance mem0_ext sym @mem0_ext {annotations = [{circt.nonlocal = @nla0_0, class = "test0"}]} @mem0_ext(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
+ %mem0_ext_W0_addr, %mem0_ext_W0_en, %mem0_ext_W0_clk, %mem0_ext_W0_data = firrtl.instance mem0_ext sym @mem0_ext {annotations = [{circt.nonlocal = @nla0_0, class = "test0"}, {circt.nonlocal = @nla1_0, class = "test1"}]} @mem0_ext(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
firrtl.strictconnect %mem0_ext_W0_addr, %W0_addr : !firrtl.uint<4>
firrtl.strictconnect %mem0_ext_W0_en, %W0_en : !firrtl.uint<1>
firrtl.strictconnect %mem0_ext_W0_clk, %W0_clk : !firrtl.clock
firrtl.strictconnect %mem0_ext_W0_data, %W0_data : !firrtl.uint<42>
}
firrtl.memmodule private @mem0_ext(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>) attributes {dataWidth = 42 : ui32, depth = 12 : ui64, extraPorts = [], maskBits = 1 : ui32, numReadPorts = 0 : ui32, numReadWritePorts = 0 : ui32, numWritePorts = 1 : ui32, readLatency = 1 : ui32, writeLatency = 1 : ui32}
- firrtl.module private @mem1(in %W0_addr: !firrtl.uint<4>, in %W0_en: !firrtl.uint<1>, in %W0_clk: !firrtl.clock, in %W0_data: !firrtl.uint<42>) {
- %mem0_ext_W0_addr, %mem0_ext_W0_en, %mem0_ext_W0_clk, %mem0_ext_W0_data = firrtl.instance mem0_ext sym @mem0_ext {annotations = [{circt.nonlocal = @nla1_0, class = "test1"}]} @mem0_ext(in W0_addr: !firrtl.uint<4>, in W0_en: !firrtl.uint<1>, in W0_clk: !firrtl.clock, in W0_data: !firrtl.uint<42>)
- firrtl.strictconnect %mem0_ext_W0_addr, %W0_addr : !firrtl.uint<4>
- firrtl.strictconnect %mem0_ext_W0_en, %W0_en : !firrtl.uint<1>
- firrtl.strictconnect %mem0_ext_W0_clk, %W0_clk : !firrtl.clock
- firrtl.strictconnect %mem0_ext_W0_data, %W0_data : !firrtl.uint<42>
- }
}
}
In the PR comment, I don't understand
@mem0instantiating mem0_ext_0 and mem0_ext. Shouldn't@Dedupsimply be instantiating mem0 twice? The change to the test in the PR looks more like what I'd expect.
Yup, you're right. There's a bug in my change right now, I need to move the memInst creation to be at the point where we define the wrapper module. I think this should also explain the issue I was having with the duplicate inner symbols.
Thanks for pointing that out!
Alright, I've made a few updates:
MemOpsis astructinstead of apair- We create the external memory instance when we create the wrapper module definition, to ensure that we don't create duplicate instances inside that wrapper module.
- We strip all NLAs from that external memory instance when we create it, and then port over the NLAs of every
MemOpthat now refers to that new instance.
I've updated the PR comment with the new output of LowerMemory on lower-memory.mlir.
Side question: in the NonLocalAnnotation circuit in lower-memory.mlir, should the @nla0 and @nla1 non-local annotations be deleted during this pass (since they refer to the original firrtl.mem instances), or are they cleaned up elsewhere, or should we just leave them be?
I'm referring to the fact that all four of these hw.hierpath operations are in the output from this pass:
firrtl.circuit "NonLocalAnnotation" {
hw.hierpath private @nla0 [@NonLocalAnnotation::@dut, @DUT::@mem0]
hw.hierpath private @nla0_0 [@NonLocalAnnotation::@dut, @DUT::@mem0, @mem0]
hw.hierpath private @nla1 [@NonLocalAnnotation::@dut, @DUT]
hw.hierpath private @nla1_0 [@NonLocalAnnotation::@dut, @DUT::@sym, @mem0]
...
Side question: in the
NonLocalAnnotationcircuit inlower-memory.mlir, should the@nla0and@nla1non-local annotations be deleted during this pass (since they refer to the originalfirrtl.meminstances), or are they cleaned up elsewhere, or should we just leave them be?
If you change then hierarchy, you need to update the hierpaths. Dedup is somewhat unique in that it has to interpret the NLAs to decide if they matter or not (e.g. block dedup). (this is part of what makes dedup fragile)
Pinging @youngar and @prithayan as they are more familiar with the memory op related constraints.
Sorry for the delay, I will take a look at this today.
The changes make sense. Dead
HierPathOpcan be left for cleanup by later passes, but if symbol names are updated theHierPathOpneeds to be updated and cannot be left in an inconsistent state. The pass is already handling them correctly.
Thanks for clearing that up @prithayan! Is there anything I should take care of before this can be merged? Let me know if you'd like me to rebase on the latest main.
The changes make sense. Dead
HierPathOpcan be left for cleanup by later passes, but if symbol names are updated theHierPathOpneeds to be updated and cannot be left in an inconsistent state. The pass is already handling them correctly.Thanks for clearing that up @prithayan! Is there anything I should take care of before this can be merged? Let me know if you'd like me to rebase on the latest
main.
Please rebase and merge it.
The changes make sense. Dead
HierPathOpcan be left for cleanup by later passes, but if symbol names are updated theHierPathOpneeds to be updated and cannot be left in an inconsistent state. The pass is already handling them correctly.Thanks for clearing that up @prithayan! Is there anything I should take care of before this can be merged? Let me know if you'd like me to rebase on the latest
main.Please rebase and merge it.
I've rebased, but I don't have write permissions, so someone else will have to merge it.
@prithayan just rebased again, can you merge this please? Thanks!
Sorry, missed to merge it last week. Thanks for fixing the issue :) Looking forward to more future contribution @tymcauley .
Sorry, missed to merge it last week. Thanks for fixing the issue :) Looking forward to more future contribution @tymcauley .
No problem @prithayan, thanks so much for taking care of this!