gfx icon indicating copy to clipboard operation
gfx copied to clipboard

Re-expose a smart DOWNLOAD memory type on Metal

Open kvark opened this issue 4 years ago • 1 comments

The main problem with the old DOWNLOAD type, which got removed in #3514, was that invalidating it resulted in a pipeline stall, since we need to submit that to a queue as a command. Perhaps, instead we can do the following:

  • re-enable the type in the same way it existed
  • invalidate() would do nothing
  • the transition from anything to HOST_READ access would be implemented as synchronize_resource

I'm not 100% sure that this is a valid implementation, but it sounds like it could be one, and it wouldn't include any stalls.

kvark avatar Dec 06 '20 16:12 kvark

I played around it, and it appears to be more painful than it seems. Our CPU-visible memory has an associated buffer to it, but Metal's synchronize_resource doesn't take a range in arguments. That means, we could have a giant download memory allocation, and it will be synchronized for each little change in each of its buffers.

I also tried to refactor it to have dedicated allocation for these non-coherent buffers, but then mapping memory becomes complicated, just like in DX11. At this point, I'm not convinced it's worth it.

My WIP patch for the records:

diff --git a/src/backend/gl/src/command.rs b/src/backend/gl/src/command.rs
index 3a7b47e0a..de213928b 100644
--- a/src/backend/gl/src/command.rs
+++ b/src/backend/gl/src/command.rs
@@ -688,7 +688,7 @@ impl command::CommandBuffer<Backend> for CommandBuffer {
         T: IntoIterator,
         T::Item: Borrow<memory::Barrier<'a, Backend>>,
     {
-        // TODO
+        //TODO
     }
 
     unsafe fn fill_buffer(&mut self, _buffer: &n::Buffer, _range: buffer::SubRange, _data: u32) {
diff --git a/src/backend/metal/src/command.rs b/src/backend/metal/src/command.rs
index 4db9a098a..05fcb8532 100644
--- a/src/backend/metal/src/command.rs
+++ b/src/backend/metal/src/command.rs
@@ -2035,6 +2035,9 @@ where
                 );
             }
         }
+        Cmd::InvalidateBuffer(buffer) => {
+            encoder.synchronize_resource(buffer.as_native());
+        }
     }
 }
 
@@ -2675,11 +2678,27 @@ impl com::CommandBuffer<Backend> for CommandBuffer {
         &mut self,
         _stages: Range<pso::PipelineStage>,
         _dependencies: memory::Dependencies,
-        _barriers: T,
+        barriers: T,
     ) where
         T: IntoIterator,
         T::Item: Borrow<memory::Barrier<'a, Backend>>,
     {
+        for barrier in barriers {
+            match *barrier.borrow() {
+                // take care of memory invalidations
+                memory::Barrier::Buffer {
+                    states, target, range: _, families: _,
+                } => {
+                    if let native::Buffer::Bound { ref raw, non_coherent: true, .. } = *target {
+                        if states.end.contains(buffer::Access::HOST_READ) {
+                            let command = soft::BlitCommand::InvalidateBuffer(AsNative::from(&**raw));
+                            self.inner.borrow_mut().sink().blit_commands(iter::once(command));
+                        }
+                    }
+                }
+                _ => {}
+            }
+        }
     }
 
     unsafe fn fill_buffer(&mut self, buffer: &native::Buffer, sub: buffer::SubRange, data: u32) {
diff --git a/src/backend/metal/src/device.rs b/src/backend/metal/src/device.rs
index 160076ced..895608346 100644
--- a/src/backend/metal/src/device.rs
+++ b/src/backend/metal/src/device.rs
@@ -25,10 +25,7 @@ use metal::{
     MTLPrimitiveType, MTLResourceOptions, MTLSamplerMipFilter, MTLStorageMode, MTLTextureType,
     MTLVertexStepFunction,
 };
-use objc::{
-    rc::autoreleasepool,
-    runtime::{Object, BOOL, NO},
-};
+use objc::runtime::{Object, BOOL, NO};
 use parking_lot::Mutex;
 use spirv_cross::{msl, spirv, ErrorCode as SpirvErrorCode};
 
@@ -170,10 +167,10 @@ bitflags! {
         const SHARED = 1<<1;
         // = `DEVICE_LOCAL | CPU_VISIBLE`
         const MANAGED_UPLOAD = 1<<2;
-        // = `DEVICE_LOCAL | CPU_VISIBLE | CACHED`
+        // = `DEVICE_LOCAL | CPU_VISIBLE | CPU_CACHED`
         // Memory range invalidation is implemented to stall the whole pipeline.
         // It's inefficient, therefore we aren't going to expose this type.
-        //const MANAGED_DOWNLOAD = 1<<3;
+        const MANAGED_DOWNLOAD = 1<<3;
     }
 }
 
@@ -183,7 +180,7 @@ impl MemoryTypes {
             Self::PRIVATE => (MTLStorageMode::Private, MTLCPUCacheMode::DefaultCache),
             Self::SHARED => (MTLStorageMode::Shared, MTLCPUCacheMode::DefaultCache),
             Self::MANAGED_UPLOAD => (MTLStorageMode::Managed, MTLCPUCacheMode::WriteCombined),
-            //Self::MANAGED_DOWNLOAD => (MTLStorageMode::Managed, MTLCPUCacheMode::DefaultCache),
+            Self::MANAGED_DOWNLOAD => (MTLStorageMode::Managed, MTLCPUCacheMode::DefaultCache),
             _ => unreachable!(),
         }
     }
@@ -216,7 +213,11 @@ impl PhysicalDevice {
                     properties: Properties::DEVICE_LOCAL | Properties::CPU_VISIBLE,
                     heap_index: 1,
                 },
-                // MANAGED_DOWNLOAD (removed)
+                adapter::MemoryType {
+                    // MANAGED_DOWNLOAD
+                    properties: Properties::DEVICE_LOCAL | Properties::CPU_VISIBLE | Properties::CPU_CACHED,
+                    heap_index: 1,
+                },
             ]
         } else {
             vec![
@@ -562,6 +563,7 @@ impl Device {
     fn _is_heap_coherent(&self, heap: &n::MemoryHeap) -> bool {
         match *heap {
             n::MemoryHeap::Private => false,
+            n::MemoryHeap::PublicNonCoherent => false,
             n::MemoryHeap::Public(memory_type, _) => self.memory_types[memory_type.0]
                 .properties
                 .contains(Properties::COHERENT),
@@ -1917,48 +1919,11 @@ impl hal::device::Device<Backend> for Device {
         Ok(())
     }
 
-    unsafe fn invalidate_mapped_memory_ranges<'a, I>(&self, iter: I) -> Result<(), d::OutOfMemory>
+    unsafe fn invalidate_mapped_memory_ranges<'a, I>(&self, _iter: I) -> Result<(), d::OutOfMemory>
     where
         I: IntoIterator,
         I::Item: Borrow<(&'a n::Memory, memory::Segment)>,
     {
-        let mut num_syncs = 0;
-        debug!("invalidate_mapped_memory_ranges");
-
-        // temporary command buffer to copy the contents from
-        // the given buffers into the allocated CPU-visible buffers
-        // Note: using a separate internal queue in order to avoid a stall
-        let cmd_buffer = self.invalidation_queue.spawn_temp();
-        autoreleasepool(|| {
-            let encoder = cmd_buffer.new_blit_command_encoder();
-
-            for item in iter {
-                let (memory, ref segment) = *item.borrow();
-                let range = memory.resolve(segment);
-                debug!("\trange {:?}", range);
-
-                match memory.heap {
-                    n::MemoryHeap::Native(_) => unimplemented!(),
-                    n::MemoryHeap::Public(mt, ref cpu_buffer)
-                        if 1 << mt.0 != MemoryTypes::SHARED.bits() as usize =>
-                    {
-                        num_syncs += 1;
-                        encoder.synchronize_resource(cpu_buffer);
-                    }
-                    n::MemoryHeap::Public(..) => continue,
-                    n::MemoryHeap::Private => panic!("Can't map private memory!"),
-                };
-            }
-            encoder.end_encoding();
-        });
-
-        if num_syncs != 0 {
-            debug!("\twaiting...");
-            cmd_buffer.set_label("invalidate_mapped_memory_ranges");
-            cmd_buffer.commit();
-            cmd_buffer.wait_until_completed();
-        }
-
         Ok(())
     }
 
@@ -2397,6 +2362,8 @@ impl hal::device::Device<Backend> for Device {
             n::MemoryHeap::Native(heap_raw)
         } else if storage == MTLStorageMode::Private {
             n::MemoryHeap::Private
+        } else if storage == MTLStorageMode::Managed && cache == MTLCPUCacheMode::Default {
+            n::MemoryHeap::PublicNonCoherent
         } else {
             let options = conv::resource_options_from_storage_and_cache(storage, cache);
             let cpu_buffer = device.new_buffer(size, options);
@@ -2480,20 +2447,39 @@ impl hal::device::Device<Backend> for Device {
         };
         debug!("bind_buffer_memory of size {} at offset {}", size, offset);
         *buffer = match memory.heap {
-            n::MemoryHeap::Native(ref heap) => {
-                let options = conv::resource_options_from_storage_and_cache(
-                    heap.storage_mode(),
-                    heap.cpu_cache_mode(),
-                );
-                let raw = heap.new_buffer(size, options).unwrap_or_else(|| {
-                    // TODO: disable hazard tracking?
-                    self.shared.device.lock().new_buffer(size, options)
-                });
+            n::MemoryHeap::Private => {
+                //TODO: check for aliasing
+                let options = MTLResourceOptions::StorageModePrivate
+                    | MTLResourceOptions::CPUCacheModeDefaultCache;
+                let raw = self.shared.device.lock().new_buffer(size, options);
                 raw.set_label(name);
                 n::Buffer::Bound {
                     raw,
                     options,
-                    range: 0..size, //TODO?
+                    range: 0..size,
+                    non_coherent: false,
+                }
+            }
+            n::MemoryHeap::PublicNonCoherent => {
+                debug!("\tmapped to public non-coherent heap");
+                let (storage, cache) = MemoryTypes::describe(mt.0);
+                let options = conv::resource_options_from_storage_and_cache(storage, cache);
+                if offset == 0x0 && size == cpu_buffer.length() {
+                    cpu_buffer.set_label(name);
+                } else if self.shared.private_caps.supports_debug_markers {
+                    cpu_buffer.add_debug_marker(
+                        name,
+                        NSRange {
+                            location: offset,
+                            length: size,
+                        },
+                    );
+                }
+                n::Buffer::Bound {
+                    raw: cpu_buffer.clone(),
+                    options,
+                    range: offset..offset + size,
+                    non_coherent: false,
                 }
             }
             n::MemoryHeap::Public(mt, ref cpu_buffer) => {
@@ -2518,18 +2504,24 @@ impl hal::device::Device<Backend> for Device {
                     raw: cpu_buffer.clone(),
                     options,
                     range: offset..offset + size,
+                    non_coherent: false,
                 }
             }
-            n::MemoryHeap::Private => {
-                //TODO: check for aliasing
-                let options = MTLResourceOptions::StorageModePrivate
-                    | MTLResourceOptions::CPUCacheModeDefaultCache;
-                let raw = self.shared.device.lock().new_buffer(size, options);
+            n::MemoryHeap::Native(ref heap) => {
+                let options = conv::resource_options_from_storage_and_cache(
+                    heap.storage_mode(),
+                    heap.cpu_cache_mode(),
+                );
+                let raw = heap.new_buffer(size, options).unwrap_or_else(|| {
+                    // TODO: disable hazard tracking?
+                    self.shared.device.lock().new_buffer(size, options)
+                });
                 raw.set_label(name);
                 n::Buffer::Bound {
                     raw,
                     options,
-                    range: 0..size,
+                    range: 0..size, //TODO?
+                    non_coherent: false,
                 }
             }
         };
@@ -2558,6 +2550,7 @@ impl hal::device::Device<Backend> for Device {
                 ref raw,
                 ref range,
                 options,
+                non_coherent: _,
             } => (raw, range, options),
             n::Buffer::Unbound { .. } => panic!("Unexpected Buffer::Unbound"),
         };
diff --git a/src/backend/metal/src/native.rs b/src/backend/metal/src/native.rs
index c25e8cabe..948da57e2 100644
--- a/src/backend/metal/src/native.rs
+++ b/src/backend/metal/src/native.rs
@@ -412,6 +412,7 @@ pub enum Buffer {
         raw: metal::Buffer,
         range: Range<u64>,
         options: metal::MTLResourceOptions,
+        non_coherent: bool,
     },
 }
 
@@ -918,6 +919,7 @@ unsafe impl Sync for Memory {}
 #[derive(Debug)]
 pub(crate) enum MemoryHeap {
     Private,
+    PublicNonCoherent,
     Public(MemoryTypeId, metal::Buffer),
     Native(metal::Heap),
 }
diff --git a/src/backend/metal/src/soft.rs b/src/backend/metal/src/soft.rs
index 278df1f27..0b1438f88 100644
--- a/src/backend/metal/src/soft.rs
+++ b/src/backend/metal/src/soft.rs
@@ -148,6 +148,7 @@ pub enum BlitCommand {
         dst: BufferPtr,
         region: hal::command::BufferImageCopy,
     },
+    InvalidateBuffer(BufferPtr),
 }
 
 #[derive(Clone, Debug)]

kvark avatar Dec 07 '20 04:12 kvark