gfx
gfx copied to clipboard
Re-expose a smart DOWNLOAD memory type on Metal
The main problem with the old DOWNLOAD type, which got removed in #3514, was that invalidating it resulted in a pipeline stall, since we need to submit that to a queue as a command. Perhaps, instead we can do the following:
- re-enable the type in the same way it existed
- invalidate() would do nothing
- the transition from anything to
HOST_READaccess would be implemented assynchronize_resource
I'm not 100% sure that this is a valid implementation, but it sounds like it could be one, and it wouldn't include any stalls.
I played around it, and it appears to be more painful than it seems. Our CPU-visible memory has an associated buffer to it, but Metal's synchronize_resource doesn't take a range in arguments. That means, we could have a giant download memory allocation, and it will be synchronized for each little change in each of its buffers.
I also tried to refactor it to have dedicated allocation for these non-coherent buffers, but then mapping memory becomes complicated, just like in DX11. At this point, I'm not convinced it's worth it.
My WIP patch for the records:
diff --git a/src/backend/gl/src/command.rs b/src/backend/gl/src/command.rs
index 3a7b47e0a..de213928b 100644
--- a/src/backend/gl/src/command.rs
+++ b/src/backend/gl/src/command.rs
@@ -688,7 +688,7 @@ impl command::CommandBuffer<Backend> for CommandBuffer {
T: IntoIterator,
T::Item: Borrow<memory::Barrier<'a, Backend>>,
{
- // TODO
+ //TODO
}
unsafe fn fill_buffer(&mut self, _buffer: &n::Buffer, _range: buffer::SubRange, _data: u32) {
diff --git a/src/backend/metal/src/command.rs b/src/backend/metal/src/command.rs
index 4db9a098a..05fcb8532 100644
--- a/src/backend/metal/src/command.rs
+++ b/src/backend/metal/src/command.rs
@@ -2035,6 +2035,9 @@ where
);
}
}
+ Cmd::InvalidateBuffer(buffer) => {
+ encoder.synchronize_resource(buffer.as_native());
+ }
}
}
@@ -2675,11 +2678,27 @@ impl com::CommandBuffer<Backend> for CommandBuffer {
&mut self,
_stages: Range<pso::PipelineStage>,
_dependencies: memory::Dependencies,
- _barriers: T,
+ barriers: T,
) where
T: IntoIterator,
T::Item: Borrow<memory::Barrier<'a, Backend>>,
{
+ for barrier in barriers {
+ match *barrier.borrow() {
+ // take care of memory invalidations
+ memory::Barrier::Buffer {
+ states, target, range: _, families: _,
+ } => {
+ if let native::Buffer::Bound { ref raw, non_coherent: true, .. } = *target {
+ if states.end.contains(buffer::Access::HOST_READ) {
+ let command = soft::BlitCommand::InvalidateBuffer(AsNative::from(&**raw));
+ self.inner.borrow_mut().sink().blit_commands(iter::once(command));
+ }
+ }
+ }
+ _ => {}
+ }
+ }
}
unsafe fn fill_buffer(&mut self, buffer: &native::Buffer, sub: buffer::SubRange, data: u32) {
diff --git a/src/backend/metal/src/device.rs b/src/backend/metal/src/device.rs
index 160076ced..895608346 100644
--- a/src/backend/metal/src/device.rs
+++ b/src/backend/metal/src/device.rs
@@ -25,10 +25,7 @@ use metal::{
MTLPrimitiveType, MTLResourceOptions, MTLSamplerMipFilter, MTLStorageMode, MTLTextureType,
MTLVertexStepFunction,
};
-use objc::{
- rc::autoreleasepool,
- runtime::{Object, BOOL, NO},
-};
+use objc::runtime::{Object, BOOL, NO};
use parking_lot::Mutex;
use spirv_cross::{msl, spirv, ErrorCode as SpirvErrorCode};
@@ -170,10 +167,10 @@ bitflags! {
const SHARED = 1<<1;
// = `DEVICE_LOCAL | CPU_VISIBLE`
const MANAGED_UPLOAD = 1<<2;
- // = `DEVICE_LOCAL | CPU_VISIBLE | CACHED`
+ // = `DEVICE_LOCAL | CPU_VISIBLE | CPU_CACHED`
// Memory range invalidation is implemented to stall the whole pipeline.
// It's inefficient, therefore we aren't going to expose this type.
- //const MANAGED_DOWNLOAD = 1<<3;
+ const MANAGED_DOWNLOAD = 1<<3;
}
}
@@ -183,7 +180,7 @@ impl MemoryTypes {
Self::PRIVATE => (MTLStorageMode::Private, MTLCPUCacheMode::DefaultCache),
Self::SHARED => (MTLStorageMode::Shared, MTLCPUCacheMode::DefaultCache),
Self::MANAGED_UPLOAD => (MTLStorageMode::Managed, MTLCPUCacheMode::WriteCombined),
- //Self::MANAGED_DOWNLOAD => (MTLStorageMode::Managed, MTLCPUCacheMode::DefaultCache),
+ Self::MANAGED_DOWNLOAD => (MTLStorageMode::Managed, MTLCPUCacheMode::DefaultCache),
_ => unreachable!(),
}
}
@@ -216,7 +213,11 @@ impl PhysicalDevice {
properties: Properties::DEVICE_LOCAL | Properties::CPU_VISIBLE,
heap_index: 1,
},
- // MANAGED_DOWNLOAD (removed)
+ adapter::MemoryType {
+ // MANAGED_DOWNLOAD
+ properties: Properties::DEVICE_LOCAL | Properties::CPU_VISIBLE | Properties::CPU_CACHED,
+ heap_index: 1,
+ },
]
} else {
vec![
@@ -562,6 +563,7 @@ impl Device {
fn _is_heap_coherent(&self, heap: &n::MemoryHeap) -> bool {
match *heap {
n::MemoryHeap::Private => false,
+ n::MemoryHeap::PublicNonCoherent => false,
n::MemoryHeap::Public(memory_type, _) => self.memory_types[memory_type.0]
.properties
.contains(Properties::COHERENT),
@@ -1917,48 +1919,11 @@ impl hal::device::Device<Backend> for Device {
Ok(())
}
- unsafe fn invalidate_mapped_memory_ranges<'a, I>(&self, iter: I) -> Result<(), d::OutOfMemory>
+ unsafe fn invalidate_mapped_memory_ranges<'a, I>(&self, _iter: I) -> Result<(), d::OutOfMemory>
where
I: IntoIterator,
I::Item: Borrow<(&'a n::Memory, memory::Segment)>,
{
- let mut num_syncs = 0;
- debug!("invalidate_mapped_memory_ranges");
-
- // temporary command buffer to copy the contents from
- // the given buffers into the allocated CPU-visible buffers
- // Note: using a separate internal queue in order to avoid a stall
- let cmd_buffer = self.invalidation_queue.spawn_temp();
- autoreleasepool(|| {
- let encoder = cmd_buffer.new_blit_command_encoder();
-
- for item in iter {
- let (memory, ref segment) = *item.borrow();
- let range = memory.resolve(segment);
- debug!("\trange {:?}", range);
-
- match memory.heap {
- n::MemoryHeap::Native(_) => unimplemented!(),
- n::MemoryHeap::Public(mt, ref cpu_buffer)
- if 1 << mt.0 != MemoryTypes::SHARED.bits() as usize =>
- {
- num_syncs += 1;
- encoder.synchronize_resource(cpu_buffer);
- }
- n::MemoryHeap::Public(..) => continue,
- n::MemoryHeap::Private => panic!("Can't map private memory!"),
- };
- }
- encoder.end_encoding();
- });
-
- if num_syncs != 0 {
- debug!("\twaiting...");
- cmd_buffer.set_label("invalidate_mapped_memory_ranges");
- cmd_buffer.commit();
- cmd_buffer.wait_until_completed();
- }
-
Ok(())
}
@@ -2397,6 +2362,8 @@ impl hal::device::Device<Backend> for Device {
n::MemoryHeap::Native(heap_raw)
} else if storage == MTLStorageMode::Private {
n::MemoryHeap::Private
+ } else if storage == MTLStorageMode::Managed && cache == MTLCPUCacheMode::Default {
+ n::MemoryHeap::PublicNonCoherent
} else {
let options = conv::resource_options_from_storage_and_cache(storage, cache);
let cpu_buffer = device.new_buffer(size, options);
@@ -2480,20 +2447,39 @@ impl hal::device::Device<Backend> for Device {
};
debug!("bind_buffer_memory of size {} at offset {}", size, offset);
*buffer = match memory.heap {
- n::MemoryHeap::Native(ref heap) => {
- let options = conv::resource_options_from_storage_and_cache(
- heap.storage_mode(),
- heap.cpu_cache_mode(),
- );
- let raw = heap.new_buffer(size, options).unwrap_or_else(|| {
- // TODO: disable hazard tracking?
- self.shared.device.lock().new_buffer(size, options)
- });
+ n::MemoryHeap::Private => {
+ //TODO: check for aliasing
+ let options = MTLResourceOptions::StorageModePrivate
+ | MTLResourceOptions::CPUCacheModeDefaultCache;
+ let raw = self.shared.device.lock().new_buffer(size, options);
raw.set_label(name);
n::Buffer::Bound {
raw,
options,
- range: 0..size, //TODO?
+ range: 0..size,
+ non_coherent: false,
+ }
+ }
+ n::MemoryHeap::PublicNonCoherent => {
+ debug!("\tmapped to public non-coherent heap");
+ let (storage, cache) = MemoryTypes::describe(mt.0);
+ let options = conv::resource_options_from_storage_and_cache(storage, cache);
+ if offset == 0x0 && size == cpu_buffer.length() {
+ cpu_buffer.set_label(name);
+ } else if self.shared.private_caps.supports_debug_markers {
+ cpu_buffer.add_debug_marker(
+ name,
+ NSRange {
+ location: offset,
+ length: size,
+ },
+ );
+ }
+ n::Buffer::Bound {
+ raw: cpu_buffer.clone(),
+ options,
+ range: offset..offset + size,
+ non_coherent: false,
}
}
n::MemoryHeap::Public(mt, ref cpu_buffer) => {
@@ -2518,18 +2504,24 @@ impl hal::device::Device<Backend> for Device {
raw: cpu_buffer.clone(),
options,
range: offset..offset + size,
+ non_coherent: false,
}
}
- n::MemoryHeap::Private => {
- //TODO: check for aliasing
- let options = MTLResourceOptions::StorageModePrivate
- | MTLResourceOptions::CPUCacheModeDefaultCache;
- let raw = self.shared.device.lock().new_buffer(size, options);
+ n::MemoryHeap::Native(ref heap) => {
+ let options = conv::resource_options_from_storage_and_cache(
+ heap.storage_mode(),
+ heap.cpu_cache_mode(),
+ );
+ let raw = heap.new_buffer(size, options).unwrap_or_else(|| {
+ // TODO: disable hazard tracking?
+ self.shared.device.lock().new_buffer(size, options)
+ });
raw.set_label(name);
n::Buffer::Bound {
raw,
options,
- range: 0..size,
+ range: 0..size, //TODO?
+ non_coherent: false,
}
}
};
@@ -2558,6 +2550,7 @@ impl hal::device::Device<Backend> for Device {
ref raw,
ref range,
options,
+ non_coherent: _,
} => (raw, range, options),
n::Buffer::Unbound { .. } => panic!("Unexpected Buffer::Unbound"),
};
diff --git a/src/backend/metal/src/native.rs b/src/backend/metal/src/native.rs
index c25e8cabe..948da57e2 100644
--- a/src/backend/metal/src/native.rs
+++ b/src/backend/metal/src/native.rs
@@ -412,6 +412,7 @@ pub enum Buffer {
raw: metal::Buffer,
range: Range<u64>,
options: metal::MTLResourceOptions,
+ non_coherent: bool,
},
}
@@ -918,6 +919,7 @@ unsafe impl Sync for Memory {}
#[derive(Debug)]
pub(crate) enum MemoryHeap {
Private,
+ PublicNonCoherent,
Public(MemoryTypeId, metal::Buffer),
Native(metal::Heap),
}
diff --git a/src/backend/metal/src/soft.rs b/src/backend/metal/src/soft.rs
index 278df1f27..0b1438f88 100644
--- a/src/backend/metal/src/soft.rs
+++ b/src/backend/metal/src/soft.rs
@@ -148,6 +148,7 @@ pub enum BlitCommand {
dst: BufferPtr,
region: hal::command::BufferImageCopy,
},
+ InvalidateBuffer(BufferPtr),
}
#[derive(Clone, Debug)]