hcc
hcc copied to clipboard
How to convert LDS memory address to the address can be passed into DS_READ_* and DS_WRITE_* instructions?
Have followed here to write the following code: The hip file:
#include "hip/hip_runtime.h"
#include "hip/hcc_detail/device_library_decls.h"
__global__ void halfVec_v_pk_sts_then_lds( uint16_t * dst , uint32_t * ptr , uint16_t * val , int n )
{
__shared__ uint16_t temp_local_share_memory_allocated[5];
int id = blockIdx.x * blockDim.x + threadIdx.x;
if ( id < n )
{
asm volatile("DS_WRITE_B16 %0, %1;\nS_WAITCNT lgkmcnt(0);\n" : : "v"( __to_local( &temp_local_share_memory_allocated[ptr[id]] ) ), "v"(val[id]) );
printf("Store value: %d into local shared memory address: %d.\n", val , ptr[id] );
asm volatile("DS_READ_U16 %0, %1;\nS_WAITCNT lgkmcnt(0);\n" : "=v"(dst[id]) : "v"( __to_local( &temp_local_share_memory_allocated[ptr[id]] ) ));
printf("Load value: %d from local shared memory address: %d.\n", dst[id] , ptr[id] );
}
}
The cpp file:
#pragma once
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include "test_simple_kernel.hip"
int main(int argc,char**vargs)
{
uint16_t* to_be_stored = new uint16_t[1];
uint16_t* to_be_loaded = new uint16_t[1];
uint32_t* local_share_memory_address = new uint32_t[1];
uint16_t* to_be_stored_d;
uint16_t* to_be_loaded_d;
uint32_t* local_share_memory_address_d;
to_be_stored[ 0 ] = 1;
local_share_memory_address[ 0 ] = 0;
hipSetDevice(0);
hipMalloc(&to_be_stored_d, 2);
hipMalloc(&to_be_loaded_d, 2);
hipMalloc(&local_share_memory_address_d, 4);
hipMemcpy(to_be_stored_d, to_be_stored, 2, hipMemcpyHostToDevice);
hipMemcpy(local_share_memory_address_d, local_share_memory_address, 4, hipMemcpyHostToDevice);
printf("Stored value: %d at local share memory address: %d.\n" , to_be_stored[ 0 ] , local_share_memory_address[ 0 ] );
hipLaunchKernelGGL( halfVec_v_pk_sts_then_lds , dim3( 1 ) , dim3( 1 ) , sizeof(uint16_t), 0, (uint16_t*)(to_be_loaded_d), (uint32_t*)(local_share_memory_address_d) , (uint16_t*)(to_be_stored_d) , 1 );
hipMemcpy(to_be_loaded, to_be_loaded_d, 2, hipMemcpyDeviceToHost);
hipDeviceSynchronize();
printf("And then load value: %d from local share memory address: %d.\n", to_be_loaded[ 0 ] , local_share_memory_address[ 0 ] );
}
The code just doesn't compile! How to correct it? Thanks in advance!
Accessing __shared__
memory via inline ASM is not recommended and will probably cause more harm than good. What problem are you trying to solve? Why not simply code temp_local...[i]
?
@b-sumner I'm trying to test and let the inline assembly working. How to make this program(inline assembly) workable? Or could you please provide a program architecture to let these inline assembly workable?