ROCmValidationSuite icon indicating copy to clipboard operation
ROCmValidationSuite copied to clipboard

[Issue]: rvs attempts to build 8-length ops for gfx803 (rvs_blas.cpp)

Open chboishabba opened this issue 10 months ago • 3 comments

Problem Description

echo "OS:" && cat /etc/os-release | grep -E "^(NAME=|VERSION=)";
  echo "CPU: " && cat /proc/cpuinfo | grep "model name" | sort --unique;
  echo "GPU:" && /opt/**rocm_sdk_612**/bin/rocminfo | grep -E "^\s*(Name|Marketing Name)";
OS:
NAME="Arch Linux"
CPU: 
model name	: Intel(R) Core(TM) i7-7700K CPU @ 4.20GHz
GPU:
  Name:                    Intel(R) Core(TM) i7-7700K CPU @ 4.20GHz
  Marketing Name:          Intel(R) Core(TM) i7-7700K CPU @ 4.20GHz
  Name:                    gfx803                             
  Marketing Name:          AMD Radeon RX 580 Series           
      Name:                    amdgcn-amd-amdhsa--gfx803        

Several instances

#if !defined(__HIP_ARCH_GFX803__) // Add this conditional compilation
  if(data_type == "fp8_r") {
    if (blas_source == "rocblas") {
      return copy_data_to_gpu<rocblas_f8, rocblas_f8>();
    }
    else if (blas_source == "hipblaslt") {
      return copy_data_to_gpu<hipblaslt_f8, float>();
    }
  }

  if(data_type == "fp8_e4m3_r") {
    return copy_data_to_gpu<hipblaslt_f8, float>();
  }

  if(data_type == "fp8_e5m2_r") {
    return copy_data_to_gpu<hipblaslt_bf8, float>();
  }
#endif // !defined(__HIP_ARCH_GFX803__) // Add this closing condition
  if(data_type == "fp16_r") {
    if (blas_source == "rocblas") {
      return copy_data_to_gpu<rocblas_half, rocblas_half>();
    }
    else if (blas_source == "hipblaslt") {
      return copy_data_to_gpu<hipblasLtHalf, hipblasLtHalf>();
    }
  }

  if(data_type == "bf16_r") {
    if (blas_source == "rocblas") {
      return copy_data_to_gpu<rocblas_bfloat16, rocblas_bfloat16>();
    }
    else if (blas_source == "hipblaslt") {
      return copy_data_to_gpu<hipblasLtBfloat16, hipblasLtBfloat16>();
    }
  }

  if(data_type == "i8_r") {
    return copy_data_to_gpu<int8_t, int8_t>();
  }

  if(data_type == "fp32_r") {
    return copy_data_to_gpu<float, float>();
  }

  is_error = false;
  return true;
}

Operating System

uname -a Linux archb 6.13.4-arch1-1 #1 SMP PREEMPT_DYNAMIC Sat, 22 Feb 2025 00:37:05 +0000 x86_64 GNU/Linux

CPU

i7 7700k

GPU

RX 580

ROCm Version

6.2.1

ROCm Component

ROCmValidationSuite

Steps to Reproduce

/********************************************************************************
 *
 * Copyright (c) 2018-2025 Advanced Micro Devices, Inc. All rights reserved.
 *
 * MIT LICENSE:
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
 * this software and associated documentation files (the "Software"), to deal in
 * the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is furnished to do
 * so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 *******************************************************************************/
#include "include/rvs_blas.h"

#include <time.h>
#include <iostream>
#include <cmath>
#include <random>
#include <thread>

/* ============================================================================================ */
// Random number generator
using rvsblas_rng_t = std::mt19937;

// Random number generator
rvsblas_rng_t rvsblas_seed(69069); // A fixed seed to start at

// This records the main thread ID at startup
std::thread::id rvsblas_main_thread_id = std::this_thread::get_id();

// For the main thread, we use g_rocblas_seed; for other threads, we start with a different seed but
// deterministically based on the thread id's hash function.
inline rvsblas_rng_t get_seed()
{
  auto tid = std::this_thread::get_id();
  return tid == rvsblas_main_thread_id ? rvsblas_seed
    : rvsblas_rng_t(std::hash<std::thread::id>{}(tid));
}

// For the main thread, we use g_rvsblas_seed; for other threads, we start with a different seed but
// deterministically based on the thread id's hash function.
thread_local rvsblas_rng_t rvsblas_t_rng = get_seed();

thread_local int rvsblas_t_rand_idx;

// length to allow use as bitmask to wraparound
#define RANDLEN 1024
#define RANDWIN 256
#define RANDBUF RANDLEN + RANDWIN
static thread_local int    rvsblas_t_rand_init = 0;
static thread_local float  rvsblas_t_rand_f_array[RANDBUF];
static thread_local double rvsblas_t_rand_d_array[RANDBUF];

/* ============================================================================================ */

#define RANDOM_CT               320000
#define RANDOM_DIV_CT           0.1234

/**
 * @brief class constructor
 * @param _gpu_device_index the gpu that will run the GEMM
 * @param _m matrix A rows
 * @param _n matrix B cols
 * @param _k matrix A/B cols/rows respectively
 * @param transA matrix A transpose operation type
 * @param transB matrix B transpose operation type
 * @param alpha scalar for matrix A*B
 * @param beta scalar for matrix C
 * @param lda leading dimension for matrix A
 * @param ldb leading dimension for matrix B
 * @param ldc leading dimension for matrix C
 * @param _ops_type type of BLAS operation to test with
 */
rvs_blas::rvs_blas(int _gpu_device_index, int _m, int _n, int _k, std::string _matrix_init, int transA, int transB,
    float alpha , float beta, int lda, int ldb, int ldc, int ldd,
    std::string _ops_type, std::string _data_type, std::string _gemm_mode, int _batch_size,
    uint64_t _stride_a, uint64_t _stride_b, uint64_t _stride_c, uint64_t _stride_d,
    std::string _blas_source, std::string _compute_type)
  : gpu_device_index(_gpu_device_index)
  , ops_type(_ops_type)
  , data_type(_data_type)
  , m(_m), n(_n), k(_k)
  , matrix_init (_matrix_init)
  , size_a(0), size_b(0), size_c(0), size_d(0)
  , da(nullptr), db(nullptr), dc(nullptr), dd(nullptr)
  , ha(nullptr), hb(nullptr), hc(nullptr)
  , hpo(nullptr), hco(nullptr)
  , hout(nullptr), hdout(nullptr)
  , hip_stream(nullptr)
  , hiprand_generator(nullptr)
  , blas_handle(nullptr)
  , is_handle_init(false)
  , is_error(false)
  , check_count(1)
  , gemm_mode(_gemm_mode)
  , batch_size(_batch_size)
  , stride_a(_stride_a), stride_b(_stride_b), stride_c(_stride_c), stride_d(_stride_d)
  , blas_source(_blas_source)
  , compute_type(_compute_type)
  , hbl_handle(nullptr), hbl_workspace(nullptr)
  , hbl_layout_a(nullptr), hbl_layout_b(nullptr)
  , hbl_layout_c(nullptr), hbl_layout_d(nullptr)
  , hbl_matmul(nullptr)
{

  if (blas_source == "rocblas") {

    // Matrix a & b transpose
    transa = (transA == 0) ? rocblas_operation_none : rocblas_operation_transpose;
    transb = (transB == 0) ? rocblas_operation_none : rocblas_operation_transpose;

    // minimum leading dimensions
    rocblas_int min_lda = transA == rocblas_operation_none ? m : k;
    rocblas_int min_ldb = transB == rocblas_operation_none ? k : n;
    rocblas_int min_ldc = m;
    rocblas_int min_ldd = m;

    // setting actual leading dimensions
    blas_lda_offset = (lda < min_lda) ? min_lda : lda;
    blas_ldb_offset = (ldb < min_ldb) ? min_ldb : ldb;
    blas_ldc_offset = (ldc < min_ldc) ? min_ldc : ldc;
    blas_ldd_offset = (ldd < min_ldd) ? min_ldd : ldd;

    // Setting matrix a, b & c sizes
    size_a = (transa == rocblas_operation_none) ? size_t(k) * blas_lda_offset : size_t(m) * blas_lda_offset;
    size_b = (transb == rocblas_operation_none) ? size_t(n) * blas_ldb_offset : size_t(k) * blas_ldb_offset;
    size_c = size_t(n) * blas_ldc_offset;

    // gemm based on data type, size of output matrix d.
    if (!data_type.empty()) {
      size_d = size_t(n) * blas_ldd_offset;
    }

    if(gemm_mode == "strided_batched") {

      if(stride_a == 0)
        stride_a = (transA == rocblas_operation_none) ? blas_lda_offset * k : blas_lda_offset * m;

      if(stride_b == 0)
        stride_b = (transB == rocblas_operation_none) ? blas_ldb_offset * n : blas_ldb_offset * k;

      if(stride_c == 0)
        stride_c = blas_ldc_offset * n;

      if(stride_d == 0)
        stride_d = blas_ldd_offset * n;

      size_a = (batch_size == 0) ? size_a : size_a + stride_a * (batch_size - 1);
      size_b = (batch_size == 0) ? size_b : size_b + stride_b * (batch_size - 1);
      size_c = (batch_size == 0) ? size_c : size_c + stride_c * (batch_size - 1);

      if (!data_type.empty()) {
        size_d = (batch_size == 0) ? size_d : size_d + stride_d * (batch_size - 1);
      }
    }
  }
  else if (blas_source == "hipblaslt") {
  
#if !defined(__HIP_ARCH_GFX803__)  // Add this conditional compilation

    // Matrix a & b transpose
    hbl_trans_a = (transA == 0) ? HIPBLAS_OP_N  : HIPBLAS_OP_T;
    hbl_trans_b = (transB == 0) ? HIPBLAS_OP_N : HIPBLAS_OP_T;

    // minimum leading dimensions
    int64_t min_lda = (hbl_trans_a == HIPBLAS_OP_N) ? m : k;
    int64_t min_ldb = (hbl_trans_b == HIPBLAS_OP_N) ? k : n;
    int64_t min_ldc = m;
    int64_t min_ldd = m;

    hbl_row_a = (hbl_trans_a == HIPBLAS_OP_N) ? m : k;
    hbl_col_a = (hbl_trans_a == HIPBLAS_OP_N) ? k : m;

    hbl_row_b = (hbl_trans_b == HIPBLAS_OP_N) ? k : n;
    hbl_col_b = (hbl_trans_b == HIPBLAS_OP_N) ? n : k;

    // setting actual leading dimensions
    hbl_lda_offset = ((int64_t)lda < min_lda) ? min_lda : (int64_t)lda;
    hbl_ldb_offset = ((int64_t)ldb < min_ldb) ? min_ldb : (int64_t)ldb;
    hbl_ldc_offset = ((int64_t)ldc < min_ldc) ? min_ldc : (int64_t)ldc;
    hbl_ldd_offset = ((int64_t)ldd < min_ldd) ? min_ldd : (int64_t)ldd;

    // Setting matrix a, b & c sizes
    size_a = (hbl_trans_a == HIPBLAS_OP_N) ? size_t(k) * hbl_lda_offset : size_t(m) * hbl_lda_offset;
    size_b = (hbl_trans_b == HIPBLAS_OP_N) ? size_t(n) * hbl_ldb_offset : size_t(k) * hbl_ldb_offset;
    size_c = size_t(n) * hbl_ldc_offset;

    // gemm based on data type, size of output matrix d.
    if (!data_type.empty()) {
      size_d = size_t(n) * hbl_ldd_offset;
    }

    // Get hip data type
    hbl_datatype = datatype_to_hip_datatype(data_type);
    if(RVS_BLAS_HIP_DATATYPE_INVALID == hbl_datatype) {
      is_error = true;
      std::cout << "\n Invalid data-type !!!" << "\n";
      return;
    }

    // output hip data type
    if((HIP_R_8F_E4M3 == hbl_datatype) || (HIP_R_8F_E5M2 == hbl_datatype)) {
     hbl_out_datatype = HIP_R_32F;
    }
    else {
     hbl_out_datatype = hbl_datatype;
    }

    // Get hipblas compute type
    hbl_computetype = computetype_to_hipblas_computetype(compute_type);
    if(RVS_BLAS_HIPBLAS_COMPUTETYPE_INVALID == hbl_computetype) {
      is_error = true;
      std::cout << "\n Invalid compute-type !!!" << "\n";
      return;
    }
#endif // !defined(__HIP_ARCH_GFX803__)  // Add this closing condition
  }
  else {
    is_error = true;
    std::cout << "\n Invalid blas source !!!" << "\n";
    return;
  }

  //setting alpha and beta val
  blas_alpha_val = alpha;
  blas_beta_val = beta;

  if (allocate_host_matrix_mem()) {
    if (!init_gpu_device())
      is_error = true;
  } else {
    is_error = true;
  }
}

/**
 * @brief class destructor
 */
rvs_blas::~rvs_blas() {
    release_host_matrix_mem();
    release_gpu_matrix_mem();
}

/**
 * @brief selects GPU device, allocates GPU memory, creates a rocBlas
 * handle and get a reference to the rocBlas's stream
 * @return true if everything went fine, otherwise false
 */
bool rvs_blas::init_gpu_device(void) {

  // select GPU device & allocate memory
  if (hipSetDevice(gpu_device_index) != hipSuccess) {
    // cannot select the given GPU device
    return false;
  }

  if (hipStreamCreate(&hip_stream) != hipSuccess) {
    std::cout << "\n hipStreamCreate() failed !!!" << "\n";
    return false;
  }

  if (!allocate_gpu_matrix_mem()) {
    std::cout << "\n allocate_gpu_matrix_mem() failed !!!" << "\n";
    return false;
  }

  if (blas_source == "rocblas") {

    // rocblas initialize
    rocblas_initialize();

    if (rocblas_create_handle(&blas_handle) != rocblas_status_success) {
      std::cout << "\n rocblas_create_handle() failed !!!" << "\n";
      return false;
    }

    if (rocblas_set_stream(blas_handle, hip_stream) != rocblas_status_success) {
      std::cout << "\n rocblas_set_stream() failed !!!" << "\n";
      return false;
    }
  }
  else if (blas_source == "hipblaslt") {

    // Create hipblaslt handle
    if(hipblasLtCreate(&hbl_handle) != HIPBLAS_STATUS_SUCCESS) {
      std::cout << "\n hipblasLtCreate() failed !!!" << "\n";
      return false;
    }

    // Create Matrix Layouts
    if(hipblasLtMatrixLayoutCreate(&hbl_layout_a, hbl_datatype, hbl_row_a, hbl_col_a, hbl_lda_offset) != HIPBLAS_STATUS_SUCCESS) {
      std::cout << "\nLayout_A hipblasLtMatrixLayoutCreate() failed !!!" << "\n";
      return false;
    }

    if(hipblasLtMatrixLayoutCreate(&hbl_layout_b, hbl_datatype, hbl_row_b, hbl_col_b, hbl_ldb_offset) != HIPBLAS_STATUS_SUCCESS) {
      std::cout << "\nLayout_B hipblasLtMatrixLayoutCreate() failed !!!" << "\n";
      return false;
    }

    if(hipblasLtMatrixLayoutCreate(&hbl_layout_c, hbl_out_datatype, m, n, hbl_ldc_offset) != HIPBLAS_STATUS_SUCCESS) {
      std::cout << "\nLayout_C hipblasLtMatrixLayoutCreate() failed !!!" << "\n";
      return false;
    }

    if(hipblasLtMatrixLayoutCreate(&hbl_layout_d, hbl_out_datatype, m, n, hbl_ldd_offset) != HIPBLAS_STATUS_SUCCESS) {
      std::cout << "\nLayout_D hipblasLtMatrixLayoutCreate() failed !!!" << "\n";
      return false;
    }

    // Create Matrix Multiplication descriptor & set attributes
    if(hipblasLtMatmulDescCreate(&hbl_matmul, hbl_computetype, HIP_R_32F) != HIPBLAS_STATUS_SUCCESS) {
      std::cout << "\nhipblasLtMatmulDescCreate() failed !!!" << "\n";
      return false;
    }

    if(hipblasLtMatmulDescSetAttribute(hbl_matmul, HIPBLASLT_MATMUL_DESC_COMPUTE_INPUT_TYPE_A_EXT, &hbl_datatype, sizeof(void*)) != HIPBLAS_STATUS_SUCCESS) {
      std::cout << "\nhipblasLtMatmulDescSetAttribute() failed !!!" << "\n";
      return false;
    }

    if(hipblasLtMatmulDescSetAttribute(hbl_matmul, HIPBLASLT_MATMUL_DESC_COMPUTE_INPUT_TYPE_B_EXT, &hbl_datatype, sizeof(void*)) != HIPBLAS_STATUS_SUCCESS) {
      std::cout << "\nhipblasLtMatmulDescSetAttribute() failed !!!" << "\n";
      return false;
    }

    if(hipblasLtMatmulDescSetAttribute(hbl_matmul, HIPBLASLT_MATMUL_DESC_TRANSA, &hbl_trans_a, sizeof(int32_t)) != HIPBLAS_STATUS_SUCCESS) {
      std::cout << "\nhipblasLtMatmulDescSetAttribute() failed !!!" << "\n";
      return false;
    }

    if(hipblasLtMatmulDescSetAttribute(hbl_matmul, HIPBLASLT_MATMUL_DESC_TRANSB, &hbl_trans_b, sizeof(int32_t)) != HIPBLAS_STATUS_SUCCESS) {
      std::cout << "\nhipblasLtMatmulDescSetAttribute() failed !!!" << "\n";
      return false;
    }

    // Request only 1 algorithm
    constexpr int request_algo_count = 1;
    int returned_algo_count = 0;

    // Set max. workspace size to 32MB
    constexpr size_t max_workspace_size = 32 * 1024 * 1024;

    hipblasLtMatmulPreference_t pref;

    if(hipblasLtMatmulPreferenceCreate(&pref) != HIPBLAS_STATUS_SUCCESS) {
      std::cout << "\nhipblasLtMatmulPreferenceCreate() failed !!!" << "\n";
      return false;
    }

    if(hipblasLtMatmulPreferenceSetAttribute(pref, HIPBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES,
          &max_workspace_size, sizeof(max_workspace_size)) != HIPBLAS_STATUS_SUCCESS) {
      std::cout << "\nhipblasLtMatmulPreferenceSetAttribute() failed !!!" << "\n";
      return false;
    }

    if(hipblasLtMatmulAlgoGetHeuristic(hbl_handle,
          hbl_matmul,
          hbl_layout_a,
          hbl_layout_b,
          hbl_layout_c,
          hbl_layout_d,
          pref,
          request_algo_count,
          &hbl_heuristic_result,
          &returned_algo_count) != HIPBLAS_STATUS_SUCCESS) {

      std::cout << "\nError in hipblasLtMatmulAlgoGetHeuristic() !!!" << "\n";
      hipblasLtMatmulPreferenceDestroy(pref);
      return false;
    }

    hipblasLtMatmulPreferenceDestroy(pref);

    if(returned_algo_count != request_algo_count) {
      std::cout << "\nIncorrect Heuristic algo. count !!!" << "\n";
      return false;
    }

    if(hbl_heuristic_result.workspaceSize) {
      // Allocate workspace for matrix multiplication
      hipMalloc(&hbl_workspace, hbl_heuristic_result.workspaceSize);
    }
  }
  else {
    std::cout << "\n Invalid blas source type !!!" << "\n";
    return false;
  }

  if("hiprand" == matrix_init) {

    // Create hipRAND generator, assign stream.
    if(hiprandCreateGenerator(&hiprand_generator, HIPRAND_RNG_PSEUDO_DEFAULT) != HIPRAND_STATUS_SUCCESS) {
      std::cout << "\n hiprandCreateGenerator() failed !!!" << "\n";
      return false;
    }

    if(hiprandSetStream(hiprand_generator, hip_stream) != HIPRAND_STATUS_SUCCESS) {
      std::cout << "\n hiprandSetStream() failed !!!" << "\n";
      return false;
    }
  }

  is_handle_init = true;
  return true;
}

template <typename Ti, typename To>
bool rvs_blas::copy_data_to_gpu(void) {

  if (da) {
    if (hipMemcpy(da, ha, sizeof(Ti) * size_a, hipMemcpyHostToDevice)
        != hipSuccess) {
      is_error = true;
      return false;
    }
  }

  if (db) {
    if (hipMemcpy(db, hb, sizeof(Ti) * size_b, hipMemcpyHostToDevice)
        != hipSuccess) {
      is_error = true;
      return false;
    }
  }

  if (dc) {
    if (hipMemcpy(dc, hc, sizeof(To) * size_c, hipMemcpyHostToDevice)
        != hipSuccess) {
      is_error = true;
      return false;
    }
  }

  is_error = false;
  return true;
}

/**
 * @brief copy data matrix from host to gpu
 * @return true if everything went fine, otherwise false
 */
bool rvs_blas::copy_data_to_gpu(void) {

  if("hiprand" == matrix_init) {

    // hipRAND no need for allocation in host memory, so no host to device copy !
    return true;
  }

  if(ops_type == "sgemm") {
    return copy_data_to_gpu<float, float>();
  }

  if(ops_type == "dgemm") {
    return copy_data_to_gpu<double, double>();
  }

  if(ops_type == "hgemm") {
    return copy_data_to_gpu<rocblas_half, rocblas_half>();
  }
#if !defined(__HIP_ARCH_GFX803__) // Add this conditional compilation
  if(data_type == "fp8_r") {
    if (blas_source == "rocblas") {
      return copy_data_to_gpu<rocblas_f8, rocblas_f8>();
    }
    else if (blas_source == "hipblaslt") {
      return copy_data_to_gpu<hipblaslt_f8, float>();
    }
  }

  if(data_type == "fp8_e4m3_r") {
    return copy_data_to_gpu<hipblaslt_f8, float>();
  }

  if(data_type == "fp8_e5m2_r") {
    return copy_data_to_gpu<hipblaslt_bf8, float>();
  }
#endif // !defined(__HIP_ARCH_GFX803__) // Add this closing condition
  if(data_type == "fp16_r") {
    if (blas_source == "rocblas") {
      return copy_data_to_gpu<rocblas_half, rocblas_half>();
    }
    else if (blas_source == "hipblaslt") {
      return copy_data_to_gpu<hipblasLtHalf, hipblasLtHalf>();
    }
  }

  if(data_type == "bf16_r") {
    if (blas_source == "rocblas") {
      return copy_data_to_gpu<rocblas_bfloat16, rocblas_bfloat16>();
    }
    else if (blas_source == "hipblaslt") {
      return copy_data_to_gpu<hipblasLtBfloat16, hipblasLtBfloat16>();
    }
  }

  if(data_type == "i8_r") {
    return copy_data_to_gpu<int8_t, int8_t>();
  }

  if(data_type == "fp32_r") {
    return copy_data_to_gpu<float, float>();
  }

  is_error = false;
  return true;
}

template <typename Ti, typename To>
bool rvs_blas::allocate_gpu_matrix_mem(void) {

  if (hipMalloc(&da, size_a * sizeof(Ti)) != hipSuccess)
    return false;
  if (hipMalloc(&db, size_b * sizeof(Ti)) != hipSuccess)
    return false;
  if (hipMalloc(&dc, size_c * sizeof(To)) != hipSuccess)
    return false;

  if(size_d)
    if (hipMalloc(&dd, size_d * sizeof(To)) != hipSuccess)
      return false;

  return true;
}

/**
 * @brief allocates memory (for matrix multiplication) on the selected GPU
 * @return true if everything went fine, otherwise false
 */
bool rvs_blas::allocate_gpu_matrix_mem(void) {

  if(ops_type == "sgemm") {
    return allocate_gpu_matrix_mem<float, float>();
  }

  if(ops_type == "dgemm") {
    return allocate_gpu_matrix_mem<double, double>();
  }

  if(ops_type == "hgemm") {
    return allocate_gpu_matrix_mem<rocblas_half, rocblas_half>();
  }
#if !defined(__HIP_ARCH_GFX803__) // Add this conditional compilation
  if(data_type == "fp8_r") {
    if (blas_source == "rocblas") {
      return allocate_gpu_matrix_mem<rocblas_f8, rocblas_f8>();
    }
    else if (blas_source == "hipblaslt") {
      return allocate_gpu_matrix_mem<hipblaslt_f8, float>();
    }
  }

  if(data_type == "fp8_e4m3_r") {
    return allocate_gpu_matrix_mem<hipblaslt_f8, float>();
  }

  if(data_type == "fp8_e5m2_r") {
    return allocate_gpu_matrix_mem<hipblaslt_bf8, float>();
  }
#endif // !defined(__HIP_ARCH_GFX803__) // Add this closing condition
  if(data_type == "fp16_r") {
    if (blas_source == "rocblas") {
      return allocate_gpu_matrix_mem<rocblas_half, rocblas_half>();
    }
    else if (blas_source == "hipblaslt") {
      return allocate_gpu_matrix_mem<hipblasLtHalf, hipblasLtHalf>();
    }
  }

  if(data_type == "bf16_r") {
    if (blas_source == "rocblas") {
      return allocate_gpu_matrix_mem<rocblas_bfloat16, rocblas_bfloat16>();
    }
    else if (blas_source == "hipblaslt") {
      return allocate_gpu_matrix_mem<hipblasLtBfloat16, hipblasLtBfloat16>();
    }
  }

  if(data_type == "i8_r") {
    return allocate_gpu_matrix_mem<int8_t, int8_t>();
  }

  if(data_type == "fp32_r") {
    return allocate_gpu_matrix_mem<float, float>();
  }

  return true;
}

/**
 * @brief gets steady clock time since epoch in microseconds
 */
double rvs_blas::get_time_us(void) {

  // Get steady clock now
  auto now = std::chrono::steady_clock::now();

  // Get duration since epoch in microseconds
  auto duration
    = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();

  return (static_cast<double>(duration));
}

/**
 * @brief releases GPU mem & destroys the rocBlas handle
 */
void rvs_blas::release_gpu_matrix_mem(void) {

  if (da)
    hipFree(da);
  if (db)
    hipFree(db);
  if (dc)
    hipFree(dc);
  if (dd)
    hipFree(dd);

  if (is_handle_init) {

    if(blas_handle)
      rocblas_destroy_handle(blas_handle);

    if(hiprand_generator)
      hiprandDestroyGenerator(hiprand_generator);

    hipStreamDestroy(hip_stream);

    if(hbl_layout_a)
      hipblasLtMatrixLayoutDestroy(hbl_layout_a);
    if(hbl_layout_b)
      hipblasLtMatrixLayoutDestroy(hbl_layout_b);
    if(hbl_layout_c)
      hipblasLtMatrixLayoutDestroy(hbl_layout_c);
    if(hbl_layout_d)
      hipblasLtMatrixLayoutDestroy(hbl_layout_d);

    if(hbl_matmul)
      hipblasLtMatmulDescDestroy(hbl_matmul);

    if(hbl_workspace)
      hipFree(hbl_workspace);

    if(hbl_handle)
      hipblasLtDestroy(hbl_handle);
  }
}

/**
 * @brief allocate host matrix memory
 * @return true if everything went fine, otherwise false
 */
bool rvs_blas::allocate_host_matrix_mem(void) {

  if("hiprand" == matrix_init) {

    // hipRAND no need for allocation in host memory
    return true;
  }

  try {

    if(ops_type == "sgemm") {

      ha = new float[size_a];
      hb = new float[size_b];
      hc = new float[size_c];
    }

    if(ops_type == "dgemm") {

      ha = new double[size_a];
      hb = new double[size_b];
      hc = new double[size_c];
    }

    if(ops_type == "hgemm") {

      ha = new rocblas_half[size_a];
      hb = new rocblas_half[size_b];
      hc = new rocblas_half[size_c];
    }
#if !defined(__HIP_ARCH_GFX803__) // Add this conditional compilation
    if(data_type == "fp8_r") {

      if (blas_source == "rocblas") {
        ha = new rocblas_f8[size_a];
        hb = new rocblas_f8[size_b];
        hc = new rocblas_f8[size_c];
      }
      else if (blas_source == "hipblaslt") {
        ha = new hipblaslt_f8[size_a];
        hb = new hipblaslt_f8[size_b];
        hc = new float[size_c];
      }
    }

    if(data_type == "fp8_e4m3_r") {

      ha = new hipblaslt_f8[size_a];
      hb = new hipblaslt_f8[size_b];
      hc = new float[size_c];
    }

    if(data_type == "fp8_e5m2_r") {

      ha = new hipblaslt_bf8[size_a];
      hb = new hipblaslt_bf8[size_b];
      hc = new float[size_c];
    }
#endif // !defined(__HIP_ARCH_GFX803__) // Add this closing condition
    if(data_type == "fp16_r") {

      if (blas_source == "rocblas") {
        ha = new rocblas_half[size_a];
        hb = new rocblas_half[size_b];
        hc = new rocblas_half[size_c];
      }
      else if (blas_source == "hipblaslt") {
        ha = new hipblasLtHalf[size_a];
        hb = new hipblasLtHalf[size_b];
        hc = new hipblasLtHalf[size_c];
      }
    }

    if(data_type == "bf16_r") {

      if (blas_source == "rocblas") {
        ha = new rocblas_bfloat16[size_a];
        hb = new rocblas_bfloat16[size_b];
        hc = new rocblas_bfloat16[size_c];
      }
      else if (blas_source == "hipblaslt") {
        ha = new hipblasLtBfloat16[size_a];
        hb = new hipblasLtBfloat16[size_b];
        hc = new hipblasLtBfloat16[size_c];
      }
    }

    if(data_type == "i8_r") {

      ha = new int8_t[size_a];
      hb = new int8_t[size_b];
      hc = new int8_t[size_c];
    }

    if(data_type == "fp32_r") {

      ha = new float[size_a];
      hb = new float[size_b];
      hc = new float[size_c];
    }

    return true;
  } catch (std::bad_alloc&) {
    return false;
  }
}

/**
 * @brief releases the host matrix memory
 */
void rvs_blas::release_host_matrix_mem(void) {

  if (ha)
    delete []ha;
  if (hb)
    delete []hb;
  if (hc)
    delete []hc;

  if (hpo)
    hipHostFree(hpo);
  if (hco)
    hipHostFree(hco);
  if(hout)
    hipHostFree(hout);
  if(hdout)
    hipHostFree(hdout);
}

/**
 * @brief checks whether all the gemm operations enqueued in the stream is completed
 * @return true if GPU finished with matrix multiplication, otherwise false
 */
bool rvs_blas::is_gemm_op_complete(void) {

  if (is_error)
    return false;

  if(hipStreamSynchronize(hip_stream) != hipSuccess) {
    std::cout << "hipStreamSynchronize() failed !!! for stream " << hip_stream << std::endl;
    return false;
  }

  return true;
}

/**
 * @brief performs the GEMM matrix multiplication operations
 * @return true if GPU was able to enqueue the GEMM operation, otherwise false
 */
bool rvs_blas::run_blas_gemm(void) {

  if (is_error)
    return false;

  if(blas_source == "rocblas") {

    if(ops_type == "sgemm") {

      float alpha = blas_alpha_val, beta = blas_beta_val;

      if(gemm_mode == "strided_batched") {

        if (rocblas_sgemm_strided_batched(blas_handle, transa, transb,
              rvs_blas::m, rvs_blas::n, rvs_blas::k,
              &alpha, (float *)da, blas_lda_offset, stride_a,
              (float *)db, blas_ldb_offset, stride_b, &beta,
              (float *)dc, blas_ldc_offset, stride_c, batch_size) != rocblas_status_success) {
          is_error = true;  // GPU cannot enqueue the gemm
          std::cout << "\nError in rocblas_sgemm_strided_batched() !!!" << "\n";
          return false;
        } else {
          return true;
        }
      }
      else {

        if (rocblas_sgemm(blas_handle, transa, transb,
              rvs_blas::m, rvs_blas::n, rvs_blas::k,
              &alpha, (float *)da, blas_lda_offset,
              (float *)db, blas_ldb_offset, &beta,
              (float *)dc, blas_ldc_offset) != rocblas_status_success) {
          is_error = true;  // GPU cannot enqueue the gemm
          std::cout << "\nError in rocblas_sgemm() !!!" << "\n";
          return false;
        } else {
          return true;
        }
      }
    }

    if(ops_type == "dgemm") {

      double alpha = blas_alpha_val, beta = blas_beta_val;

      if(gemm_mode == "strided_batched") {

        if (rocblas_dgemm_strided_batched(blas_handle, transa, transb,
              rvs_blas::m, rvs_blas::n, rvs_blas::k,
              &alpha, (double *)da, blas_lda_offset, stride_a,
              (double *)db, blas_ldb_offset, stride_b, &beta,
              (double *)dc, blas_ldc_offset, stride_c, batch_size) != rocblas_status_success) {
          is_error = true;  // GPU cannot enqueue the gemm
          std::cout << "\nError in rocblas_dgemm_strided_batched() !!!" << "\n";
          return false;
        } else {
          return true;
        }
      }
      else {
        if (rocblas_dgemm(blas_handle, transa, transb,
              rvs_blas::m, rvs_blas::n, rvs_blas::k,
              &alpha, (double *)da, blas_lda_offset,
              (double *)db, blas_ldb_offset, &beta,
              (double *)dc, blas_ldc_offset) != rocblas_status_success) {
          is_error = true;  // GPU cannot enqueue the gemm
          std::cout << "\nError in rocblas_dgemm() !!!" << "\n";
          return false;
        } else {
          return true;
        }
      }
    }

    if(ops_type == "hgemm") {

      _Float16 alpha = (float)blas_alpha_val;
      _Float16 beta = (float)blas_beta_val;

      if(gemm_mode == "strided_batched") {

        if (rocblas_hgemm_strided_batched(blas_handle, transa, transb,
              rvs_blas::m, rvs_blas::n, rvs_blas::k,
              &alpha, (rocblas_half *)da, blas_lda_offset, stride_a,
              (rocblas_half *)db, blas_ldb_offset, stride_b, &beta,
              (rocblas_half *)dc, blas_ldc_offset, stride_c, batch_size) != rocblas_status_success) {
          is_error = true;  // GPU cannot enqueue the gemm
          std::cout << "\nError in rocblas_hgemm_strided_batched() !!!" << "\n";
          return false;
        } else {
          return true;
        }
      }
      else {

        if (rocblas_hgemm(blas_handle, transa, transb,
              rvs_blas::m, rvs_blas::n, rvs_blas::k,
              &alpha, (rocblas_half *)da, blas_lda_offset,
              (rocblas_half *)db, blas_ldb_offset, &beta,
              (rocblas_half *)dc, blas_ldc_offset) != rocblas_status_success) {
          is_error = true;  // GPU cannot enqueue the gemm
          std::cout << "\nError in rocblas_hgemm() !!!" << "\n";
          return false;
        } else {
          return true;
        }
      }

    }
#if !defined(__HIP_ARCH_GFX803__) // Add this conditional compilation
    if(data_type == "fp8_r") {

      rocblas_datatype a_type = rocblas_datatype_f8_r;
      rocblas_datatype b_type = rocblas_datatype_f8_r;
      rocblas_datatype c_type = rocblas_datatype_f8_r;
      rocblas_datatype d_type = rocblas_datatype_f8_r;

      rocblas_computetype compute_type = rocblas_compute_type_f32;
      rocblas_gemm_algo algo = rocblas_gemm_algo_standard;
      int32_t sol_index = 0;
      uint32_t flags = 0;

      rocblas_float alpha = (rocblas_float) blas_alpha_val;
      rocblas_float beta = (rocblas_float) blas_beta_val;

      if(gemm_mode == "strided_batched") {

        if (rocblas_gemm_strided_batched_ex3(blas_handle, transa, transb,
              rvs_blas::m, rvs_blas::n, rvs_blas::k, &alpha,
              da, a_type, blas_lda_offset, stride_a,
              db, b_type, blas_ldb_offset, stride_b, &beta,
              dc, c_type, blas_ldc_offset, stride_c,
              dd, d_type, blas_ldd_offset, stride_d, batch_size,
              compute_type, algo, sol_index, flags) != rocblas_status_success) {

          is_error = true;  // GPU cannot enqueue the gemm
          std::cout << "\nError in rocblas_gemm_strided_batched_ex3() !!! " << "\n";
          return false;
        } else {
          return true;
        }
      }
      else {

        if (rocblas_gemm_ex3(blas_handle, transa, transb,
              rvs_blas::m, rvs_blas::n, rvs_blas::k, &alpha,
              da, a_type, blas_lda_offset,
              db, b_type, blas_ldb_offset, &beta,
              dc, c_type, blas_ldc_offset,
              dd, d_type, blas_ldd_offset,
              compute_type, algo, sol_index, flags) != rocblas_status_success) {

          is_error = true;  // GPU cannot enqueue the gemm
          std::cout << "\nError in rocblas_gemm_ex3() !!! " << "\n";
          return false;
        } else {
          return true;
        }
      }
    }
#endif // !defined(__HIP_ARCH_GFX803__) // Add this closing condition
    if(data_type == "fp16_r") {

      rocblas_datatype a_type = rocblas_datatype_f16_r;
      rocblas_datatype b_type = rocblas_datatype_f16_r;
      rocblas_datatype c_type = rocblas_datatype_f16_r;
      rocblas_datatype d_type = rocblas_datatype_f16_r;

      rocblas_datatype compute_type = rocblas_datatype_f32_r;
      rocblas_gemm_algo algo = rocblas_gemm_algo_standard;
      int32_t sol_index = 0;
      uint32_t flags = 0;

      rocblas_float alpha = (rocblas_float) blas_alpha_val;
      rocblas_float beta = (rocblas_float) blas_beta_val;

      if(gemm_mode == "strided_batched") {

        if (rocblas_gemm_strided_batched_ex(blas_handle, transa, transb,
              rvs_blas::m, rvs_blas::n, rvs_blas::k, &alpha,
              da, a_type, blas_lda_offset, stride_a,
              db, b_type, blas_ldb_offset, stride_b, &beta,
              dc, c_type, blas_ldc_offset, stride_c,
              dd, d_type, blas_ldd_offset, stride_d, batch_size,
              compute_type, algo, sol_index, flags) != rocblas_status_success) {

          is_error = true;  // GPU cannot enqueue the gemm
          std::cout << "\nError in rocblas_gemm_strided_batched_ex() !!!" << "\n";
          return false;
        } else {
          return true;
        }
      }
      else {

        if (rocblas_gemm_ex(blas_handle, transa, transb,
              rvs_blas::m, rvs_blas::n, rvs_blas::k, &alpha,
              da, a_type, blas_lda_offset,
              db, b_type, blas_ldb_offset, &beta,
              dc, c_type, blas_ldc_offset,
              dd, d_type, blas_ldd_offset,
              compute_type, algo, sol_index, flags) != rocblas_status_success) {

          is_error = true;  // GPU cannot enqueue the gemm
          std::cout << "\nError in rocblas_gemm_ex() !!!" << "\n";
          return false;
        } else {
          return true;
        }
      }
    }

    if(data_type == "bf16_r") {

      rocblas_datatype a_type = rocblas_datatype_bf16_r;
      rocblas_datatype b_type = rocblas_datatype_bf16_r;
      rocblas_datatype c_type = rocblas_datatype_bf16_r;
      rocblas_datatype d_type = rocblas_datatype_bf16_r;

      rocblas_datatype compute_type = rocblas_datatype_f32_r;
      rocblas_gemm_algo algo = rocblas_gemm_algo_standard;
      int32_t sol_index = 0;
      uint32_t flags = 0;

      rocblas_float alpha = (rocblas_float) blas_alpha_val;
      rocblas_float beta = (rocblas_float) blas_beta_val;

      if(gemm_mode == "strided_batched") {

        if (rocblas_gemm_strided_batched_ex(blas_handle, transa, transb,
              rvs_blas::m, rvs_blas::n, rvs_blas::k, &alpha,
              da, a_type, blas_lda_offset, stride_a,
              db, b_type, blas_ldb_offset, stride_b, &beta,
              dc, c_type, blas_ldc_offset, stride_c,
              dd, d_type, blas_ldd_offset, stride_d, batch_size,
              compute_type, algo, sol_index, flags) != rocblas_status_success) {

          is_error = true;  // GPU cannot enqueue the gemm
          std::cout << "\nError in rocblas_gemm_strided_batched_ex() !!!" << "\n";
          return false;
        } else {
          return true;
        }
      }
      else {

        if (rocblas_gemm_ex(blas_handle, transa, transb,
              rvs_blas::m, rvs_blas::n, rvs_blas::k, &alpha,
              da, a_type, blas_lda_offset,
              db, b_type, blas_ldb_offset, &beta,
              dc, c_type, blas_ldc_offset,
              dd, d_type, blas_ldd_offset,
              compute_type, algo, sol_index, flags) != rocblas_status_success) {

          is_error = true;  // GPU cannot enqueue the gemm
          std::cout << "\nError in rocblas_gemm_ex() !!!" << "\n";
          return false;
        } else {
          return true;
        }
      }
    }
  }
  else if(blas_source == "hipblaslt") {

    if (hipblasLtMatmul(hbl_handle, hbl_matmul,
          &blas_alpha_val, da, hbl_layout_a,
          db, hbl_layout_b, &blas_beta_val,
          dc, hbl_layout_c,
          dd, hbl_layout_d,
          &hbl_heuristic_result.algo, hbl_workspace, hbl_heuristic_result.workspaceSize,
          hip_stream) != HIPBLAS_STATUS_SUCCESS) {

      is_error = true;  // GPU cannot enqueue the gemm
      std::cout << "\nError in hipblasLtMatmul() !!!" << "\n";
      return false;
    }
  }
  else {
    return false;
  }

  return true;
}

/**
 * @brief generate matrix random data
 * it should be called before rocBlas GEMM
 */
void rvs_blas::generate_random_matrix_data(void) {

  if (!is_error) {

    if("hiprand" == matrix_init) {

      if(ops_type == "dgemm") {

        if(hiprandGenerateUniformDouble(hiprand_generator, (double *)da, size_a) != HIPRAND_STATUS_SUCCESS) {
          std::cout << "\n hiprandGenerateUniformDouble() failed !!!" << "\n";
          is_error = true;
          return;
        }

        if(hiprandGenerateUniformDouble(hiprand_generator, (double *)db, size_b) != HIPRAND_STATUS_SUCCESS) {
          std::cout << "\n hiprandGenerateUniformDouble() failed !!!" << "\n";
          is_error = true;
          return;
        }

        if(hiprandGenerateUniformDouble(hiprand_generator, (double *)dc, size_c) != HIPRAND_STATUS_SUCCESS) {
          std::cout << "\n hiprandGenerateUniformDouble() failed !!!" << "\n";
          is_error = true;
          return;
        }

        if(hipStreamSynchronize(hip_stream) != hipSuccess) {
          std::cout << "hipStreamSynchronize() failed !!! for stream " << hip_stream << std::endl;
          is_error = true;
          return;
        }
      }
    }
    else {

      size_t i;
      uint64_t nextr = (uint64_t) time(NULL);

      //SGEMM (float fp32_r)
      if(ops_type == "sgemm") {

        for (i = 0; i < size_a; ++i)
         ((float *) ha)[i] = fast_pseudo_rand(&nextr, i);

        for (i = 0; i < size_b; ++i)
          ((float *) hb)[i] = fast_pseudo_rand(&nextr, i);

        for (int i = 0; i < size_c; ++i)
          ((float *) hc)[i] = fast_pseudo_rand(&nextr, i);
      }

      //DGEMM (double fp64_r)
      if(ops_type == "dgemm") {

        for (i = 0; i < size_a; ++i)
          ((double *) ha)[i] = (double)fast_pseudo_rand(&nextr, i);

        for (i = 0; i < size_b; ++i)
          ((double *) hb)[i] = (double)fast_pseudo_rand(&nextr, i);

        for (int i = 0; i < size_c; ++i)
          ((double *) hc)[i] = (double)fast_pseudo_rand(&nextr, i);
      }

      //HGEMM (half-float fp16_r)
      if(ops_type == "hgemm") {

        for (i = 0; i < size_a; ++i)
          ((rocblas_half* )ha)[i] = fast_pseudo_rand(&nextr, i);

        for (i = 0; i < size_b; ++i)
          ((rocblas_half* )hb)[i] = fast_pseudo_rand(&nextr, i);

        for (int i = 0; i < size_c; ++i)
          ((rocblas_half* )hc)[i] = fast_pseudo_rand(&nextr, i);
      }

      // 8-bit floating point real (fp8_r) format
      if(data_type == "fp8_r") {

        if (blas_source == "rocblas") {
          for (i = 0; i < size_a; ++i)
            ((rocblas_f8* )ha)[i] = rocblas_f8(fast_pseudo_rand(&nextr, i));

          for (i = 0; i < size_b; ++i)
            ((rocblas_f8* )hb)[i] = rocblas_f8(fast_pseudo_rand(&nextr, i));

          for (i = 0; i < size_c; ++i)
            ((rocblas_f8* )hc)[i] = rocblas_f8(fast_pseudo_rand(&nextr, i));
        }
        else if (blas_source == "hipblaslt") {

          for (i = 0; i < size_a; ++i)
            ((hipblaslt_f8* )ha)[i] = hipblaslt_f8(fast_pseudo_rand(&nextr, i));

          for (i = 0; i < size_b; ++i)
            ((hipblaslt_f8* )hb)[i] = hipblaslt_f8(fast_pseudo_rand(&nextr, i));

          for (i = 0; i < size_c; ++i)
            ((float* )hc)[i] = float(fast_pseudo_rand(&nextr, i));
        }
      }

      // 8-bit floating point real OCP E4M3 (fp8_e4m3_r) format
      if(data_type == "fp8_e4m3_r") {

        for (i = 0; i < size_a; ++i)
          ((hipblaslt_f8* )ha)[i] = hipblaslt_f8(fast_pseudo_rand(&nextr, i));

        for (i = 0; i < size_b; ++i)
          ((hipblaslt_f8* )hb)[i] = hipblaslt_f8(fast_pseudo_rand(&nextr, i));

        for (i = 0; i < size_c; ++i)
          ((float* )hc)[i] = float(fast_pseudo_rand(&nextr, i));
      }

      // 8-bit floating point real OCP E5M2 (fp8_e5m2_r) format
      if(data_type == "fp8_e5m2_r") {

        for (i = 0; i < size_a; ++i)
          ((hipblaslt_bf8* )ha)[i] = hipblaslt_bf8(fast_pseudo_rand(&nextr, i));

        for (i = 0; i < size_b; ++i)
          ((hipblaslt_bf8* )hb)[i] = hipblaslt_bf8(fast_pseudo_rand(&nextr, i));

        for (i = 0; i < size_c; ++i)
          ((float* )hc)[i] = float(fast_pseudo_rand(&nextr, i));
      }

      // 16-bit floating point real (fp16_r) format
      if(data_type == "fp16_r") {

        if (blas_source == "rocblas") {
          for (i = 0; i < size_a; ++i)
            ((rocblas_half* )ha)[i] = rocblas_half(fast_pseudo_rand(&nextr, i));

          for (i = 0; i < size_b; ++i)
            ((rocblas_half* )hb)[i] = rocblas_half(fast_pseudo_rand(&nextr, i));

          for (i = 0; i < size_c; ++i)
            ((rocblas_half* )hc)[i] = rocblas_half(fast_pseudo_rand(&nextr, i));
        }
        else if (blas_source == "hipblaslt") {
          for (i = 0; i < size_a; ++i)
            ((hipblasLtHalf* )ha)[i] = hipblasLtHalf(fast_pseudo_rand(&nextr, i));

          for (i = 0; i < size_b; ++i)
            ((hipblasLtHalf* )hb)[i] = hipblasLtHalf(fast_pseudo_rand(&nextr, i));

          for (i = 0; i < size_c; ++i)
            ((hipblasLtHalf* )hc)[i] = hipblasLtHalf(fast_pseudo_rand(&nextr, i));
        }
      }

      // 16-bit brain floating point real (bf16_r) format
      if(data_type == "bf16_r") {

        if (blas_source == "rocblas") {
          for (i = 0; i < size_a; ++i)
            ((rocblas_bfloat16* )ha)[i] = rocblas_bfloat16(fast_pseudo_rand(&nextr, i));

          for (i = 0; i < size_b; ++i)
            ((rocblas_bfloat16* )hb)[i] = rocblas_bfloat16(fast_pseudo_rand(&nextr, i));

          for (i = 0; i < size_c; ++i)
            ((rocblas_bfloat16* )hc)[i] = rocblas_bfloat16(fast_pseudo_rand(&nextr, i));
        }
        else if (blas_source == "hipblaslt") {

          for (i = 0; i < size_a; ++i)
            ((hipblasLtBfloat16* )ha)[i] = hipblasLtBfloat16(fast_pseudo_rand(&nextr, i));

          for (i = 0; i < size_b; ++i)
            ((hipblasLtBfloat16* )hb)[i] = hipblasLtBfloat16(fast_pseudo_rand(&nextr, i));

          for (i = 0; i < size_c; ++i)
            ((hipblasLtBfloat16* )hc)[i] = hipblasLtBfloat16(fast_pseudo_rand(&nextr, i));
        }
      }

      // 8-bit integer real (i8_r) format
      if(data_type == "i8_r") {

        for (i = 0; i < size_a; ++i)
          ((int8_t* )ha)[i] = int8_t(fast_pseudo_rand(&nextr, i));

        for (i = 0; i < size_b; ++i)
          ((int8_t* )hb)[i] = int8_t(fast_pseudo_rand(&nextr, i));

        for (i = 0; i < size_c; ++i)
          ((int8_t* )hc)[i] = int8_t(fast_pseudo_rand(&nextr, i));
      }
    }
  }
}

float rvsblas_uniform_int_1_10()
{
  if(!rvsblas_t_rand_init)
  {
    for(int i = 0; i < RANDBUF; i++)
    {
      rvsblas_t_rand_f_array[i]
        = (float)std::uniform_int_distribution<unsigned>(1, 10)(rvsblas_t_rng);
      rvsblas_t_rand_d_array[i] = (double)rvsblas_t_rand_f_array[i];
    }
    rvsblas_t_rand_init = 1;
  }
  rvsblas_t_rand_idx = (rvsblas_t_rand_idx + 1) & (RANDLEN - 1);
  return rvsblas_t_rand_f_array[rvsblas_t_rand_idx];
}

/**
 * @brief fast pseudo random generator 
 * @return floating point random number
 */
float rvs_blas::fast_pseudo_rand(uint64_t *nextr, size_t i) {

  if ("rand" == matrix_init) {

    if(("fp8_r" == data_type) && (blas_source == "rocblas")) {
      return (float)std::uniform_int_distribution<int>(1, 2)(rvsblas_t_rng);
    }
    else if (("fp16_r" == data_type) || ("hgemm" == ops_type))
    {
      return (float)std::uniform_int_distribution<int>(-2, 2)(rvsblas_t_rng);
    }
    else if("bf16_r" == data_type)
    {
      return (float)std::uniform_int_distribution<int>(-2, 2)(rvsblas_t_rng);
    }
    else if("i8_r" == data_type)
    {
      return (float)std::uniform_int_distribution<unsigned short>(1, 3)(rvsblas_t_rng);
    }
    else { /* sgemm, dgemm, fp8_e4m3_r, fp8_e5m2_r */
      return rvsblas_uniform_int_1_10();
    }
  }
  else if ("trig" == matrix_init) {
    return sin(static_cast<float>(i));
  }
  else {
    *nextr = *nextr * 1103515245 + 12345;
    return static_cast<float>(static_cast<uint32_t>
        ((*nextr / 65536) % RANDOM_CT)) / RANDOM_DIV_CT;
  }
}

/**
 * @brief HIP callback function
 * @param stream stream identifier
 * @param status status of stream operations
 * @param user_data user specified data
 * @return true if everything went fine, otherwise false
 */
void rvs_blas::hip_stream_callback (hipStream_t stream, hipError_t status, void *user_data) {

  bool error = false;

  if(nullptr == user_data)
  {
    return;
  }

  /* Call the registered callback function */
  rvs_blas *rvsblas = (rvs_blas *)user_data;

  if (hipSuccess == status) {
    error = true;
  }
  rvsblas->callback(error, rvsblas->user_data);
}

/**
 * @brief Set rvs blas callback
 * @param callback registered callback function
 * @param user_data user data
 * @return true if everything went fine, otherwise false
 */
bool rvs_blas::set_callback(rvsBlasCallback_t callback, void *user_data) {

  if(nullptr == callback) {
    return false;
  }

  this->callback = callback;
  this->user_data = user_data;

  /* Add callback to be called items in stream is completed */
  if(hipSuccess != hipStreamAddCallback (hip_stream, this->hip_stream_callback , (void *)this, 0)) {
    return false;
  }

  return true;
}

/**
 * Host(CPU) based Matrix multiplication -> C = alpha (A*B) + beta (C).
 * @param[in] alpha scalar for matrix A*B
 * @param[in] beta scalar for matrix C
 * @param[in] M matrix A rows
 * @param[in] N matrix B cols
 * @param[in] K matrix A/B cols/rows respectively
 * @param[in] A matrix A
 * @param[in] B matrix B
 * @param[in,out] C matrix C
 */
template <typename T>
void host_matrix_mul(T alpha,
    T        beta,
    int      M,
    int      N,
    int      K,
    const T* A,
    int      As1,
    int      As2,
    const T* B,
    int      Bs1,
    int      Bs2,
    T*       C,
    int      Cs1,
    int      Cs2)
{
  for(int i1 = 0; i1 < M; i1++)
  {
    for(int i2 = 0; i2 < N; i2++)
    {
      T t = 0.0;
      for(int i3 = 0; i3 < K; i3++)
      {
        t += A[i1 * As1 + i3 * As2] * B[i3 * Bs1 + i2 * Bs2];
      }
      C[i1 * Cs1 + i2 * Cs2] = beta * C[i1 * Cs1 + i2 * Cs2] + alpha * t;
    }
  }
}

/*! \brief F-norm utility function that computes the scale
  (largest absolute element in the column) and sum sqrt of the matric column */
  template <typename T>
void lapack_xlassq(int64_t n, T* X, int64_t incx, double& scale, double& sumsq) {

  if(n > 0)
  {
    double abs_X = 0.0;
    for(int64_t i = 0; i < n; i++)
    {
      abs_X = std::abs(X[i * incx]);

      if(abs_X > 0 || std::isnan(abs_X))
      {
        if(scale < abs_X)
        {
          sumsq = 1 + sumsq * std::sqrt(scale / abs_X);
          scale = abs_X;
        }
        else
        {
          sumsq = sumsq + std::sqrt(abs_X / scale);
        }
      }
    }
  }
}

/*! \brief F-norm utility function that acculatively computes
  the sum sqrt of the matrix from sum sqrt of the matric column */
template <typename T>
void lapack_xcombssq(T* ssq, T* colssq) {

  if(ssq[0] >= colssq[0])
  {
    if(ssq[0] != 0)
    {
      ssq[1] = ssq[1] + std::sqrt(colssq[0] / ssq[0]) * colssq[1];
    }
    else
    {
      ssq[1] = ssq[1] + colssq[1];
    }
  }
  else
  {
    ssq[1] = colssq[1] + std::sqrt(ssq[0] / colssq[0]) * ssq[1];
    ssq[0] = colssq[0];
  }
  return;
}

/*! \brief matrix norm function calculates the one norm,
  infinity norm or the frobenius norm of the matrix A */
template <typename T>
double calculate_norm(char norm_type, int64_t m, int64_t n, T* A, int64_t lda, double* work) {

  double value = 0.0;
  double sum   = 0.0;

  if(std::min(m, n) == 0)
    return value;

  int64_t a_offset = lda >= 0 ? 0 : lda * (1 - n); // e.g. vectors with negative inc
  if(norm_type == 'O' || norm_type == 'o' || norm_type == '1')
  {
    //Find the one norm of Matrix A.
    for(int64_t j = 0; j < n; j++)
    {
      sum = 0.0;
      for(int64_t i = 0; i < m; i++)
        sum = sum + std::abs(A[a_offset + i + j * lda]);

      if(value < sum || std::isnan(sum))
        value = sum;
    }
  }
  else if(norm_type == 'I' || norm_type == 'i')
  {
    //Find the infinity norm of Matrix A.
    for(int64_t j = 0; j < n; j++)
      for(int64_t i = 0; i < m; i++)
      {
        work[i] = work[i] + std::abs(A[a_offset + i + j * lda]);
      }
    for(int64_t i = 0; i < m; i++)
      if(value < work[i] || std::isnan(work[i]))
        value = work[i];
  }
  else if(norm_type == 'F' || norm_type == 'f')
  {
    //Find the Frobenius norm of Matrix A.
    //SSQ(1) is scale
    //SSQ(2) is sum-of-squares
    //For better accuracy, sum each column separately.
    std::vector<double> ssq(2);
    std::vector<double> colssq(2);

    ssq[0] = 0.0;
    ssq[1] = 1.0;
    for(int64_t j = 0; j < n; j++)
    {
      colssq[0] = 0.0;
      colssq[1] = 1.0;
      lapack_xlassq(m, A + a_offset + j * lda, 1, colssq[0], colssq[1]);
      lapack_xcombssq(ssq.data(), colssq.data());
    }
    value = ssq[0] * std::sqrt(ssq[1]);
  }

  return value;
}

/*! \brief Matrix utility function to create difference matrix from two matrices */
template <typename T>
void m_axpy_64(int64_t N, T* alpha, T* x, int64_t incx, T* y, int64_t incy) {

  int64_t x_offset = incx >= 0 ? 0 : incx * (1 - N);
  int64_t y_offset = incy >= 0 ? 0 : incy * (1 - N);
  for(int64_t i = 0; i < N; i++)
  {
    y[y_offset + i * incy] = (*alpha) * x[x_offset + i * incx] + y[y_offset + i * incy];
  }
}

/**
 * Get relative norm error for float/double data type matrices.
 * @param[in] norm_type matrix norm type to execute.
 * @param[in] M matrix rows
 * @param[in] N matrix columns
 * @param[in] Ida matrix leading dimension
 * @param[in] hA host memory matrix A
 * @param[in] hB host memory matrix B
 */
template <
    typename T,
    std::enable_if<(std::is_same<T, float>{} || std::is_same<T, double>{}),int>::type = 0>
double check_norm_error(char norm_type, int64_t M, int64_t N, int64_t lda, T* hA, T* hB) {

  // norm type can be 'O', 'I', 'F', 'o', 'i', 'f' for one, infinity or Frobenius norm
  // one norm is max column sum
  // infinity norm is max row sum
  // Frobenius is l2 norm of matrix entries

  std::vector<double> work(std::max(int64_t(1), M));
  int64_t             incx  = 1;
  double              alpha = -1.0;

  size_t size = M * size_t(N); // copying data so lda is M

  std::vector<double> hA_double(size);
  std::vector<double> hB_double(size);

  for(int64_t i = 0; i < N; i++)
  {
    int64_t src_col = i * int64_t(lda);
    int64_t dst_col = i * int64_t(M);
    for(int64_t j = 0; j < M; j++)
    {
      hA_double[size_t(dst_col + j)] = double(hA[src_col + j]);
      hB_double[size_t(dst_col + j)] = double(hB[src_col + j]);
    }
  }

  double a_norm = calculate_norm(norm_type, M, N, hA_double.data(), M, work.data());
  m_axpy_64(size, &alpha, hA_double.data(), incx, hB_double.data(), incx);
  double error = calculate_norm(norm_type, M, N, hB_double.data(), M, work.data()) / a_norm;

  return error;
}

/**
 * Get relative norm error for fp8 data type matrices.
 * @param[in] norm_type matrix norm type to execute.
 * @param[in] M matrix rows
 * @param[in] N matrix columns
 * @param[in] Ida matrix leading dimension
 * @param[in] hA host memory matrix A
 * @param[in] hB host memory matrix B
 */
template <
    typename T,
    std::enable_if<std::is_same<T, rocblas_f8>{}, int>::type = 0>
double check_norm_error(char norm_type, int64_t M, int64_t N, int64_t lda, T* hA, T* hB) {

  // norm type can be 'O', 'I', 'F', 'o', 'i', 'f' for one, infinity or Frobenius norm
  // one norm is max column sum
  // infinity norm is max row sum
  // Frobenius is l2 norm of matrix entries
  size_t size = M * size_t(N); // copying data so lda is M

  std::vector<double> hA_double(size);
  std::vector<double> hB_double(size);

  for(int64_t i = 0; i < N; i++)
  {
    int64_t src_col = i * int64_t(lda);
    int64_t dst_col = i * int64_t(M);
    for(int64_t j = 0; j < M; j++)
    {
      hA_double[size_t(dst_col + j)] = double(float(hA[src_col + j]));
      hB_double[size_t(dst_col + j)] = double(float(hB[src_col + j]));
    }
  }

  std::vector<double> work(std::max(int64_t(1), M));
  int64_t             incx  = 1;
  double              alpha = -1.0;

  double a_norm = calculate_norm(norm_type, M, N, hA_double.data(), M, work.data());
  m_axpy_64(size, &alpha, hA_double.data(), incx, hB_double.data(), incx);
  double error = calculate_norm(norm_type, M, N, hB_double.data(), M, work.data()) / a_norm;

  return error;
}

/**
 * Get relative norm error for bf16 data type matrices.
 * @param[in] norm_type matrix norm type to execute.
 * @param[in] M matrix rows
 * @param[in] N matrix columns
 * @param[in] Ida matrix leading dimension
 * @param[in] hA host memory matrix A
 * @param[in] hB host memory matrix B
 */
template <typename T,
         std::enable_if<(std::is_same<T, rocblas_bfloat16>{}), int>::type = 0>
double check_norm_error(char norm_type, int64_t M, int64_t N, int64_t lda, T* hA, T* hB) {

  size_t              size = N * (size_t)lda;
  std::vector<double> hA_double(size);
  std::vector<double> hB_double(size);

  for(int64_t i = 0; i < N; i++)
  {
    for(int64_t j = 0; j < M; j++)
    {
      size_t idx = j + i * (size_t)lda;

      // zero extend lower 16 bits of bfloat16 to convert to IEEE float/double
      hA_double[idx] = double(float((uint32_t)hA[idx].data << 16));
      hB_double[idx] = double(float((uint32_t)hB[idx].data << 16));
    }
  }

  return check_norm_error<double>(norm_type, M, N, lda, hA_double.data(), hB_double.data());
}

/**
 * Get relative norm error for fp16 (half) data type matrices.
 * @param[in] norm_type matrix norm type to execute.
 * @param[in] M matrix rows
 * @param[in] N matrix columns
 * @param[in] Ida matrix leading dimension
 * @param[in] hA host memory matrix A
 * @param[in] hB host memory matrix B
 */
template <typename T,
         std::enable_if<(std::is_same<T, rocblas_half>{}), int>::type = 0>
double check_norm_error(char norm_type, int64_t M, int64_t N, int64_t lda, T* hA, T* hB) {

  size_t              size = N * (size_t)lda;
  std::vector<double> hA_double(size);
  std::vector<double> hB_double(size);

  for(int64_t i = 0; i < N; i++)
  {
    for(int64_t j = 0; j < M; j++)
    {
      size_t idx       = j + i * (size_t)lda;
      hA_double[idx] = double(hA[idx]);
      hB_double[idx] = double(hB[idx]);
    }
  }

  return check_norm_error<double>(norm_type, M, N, lda, hA_double.data(), hB_double.data());
}

/**
 * Check gemm output for consistency (current output vs previous output).
 * @param[in] dout Device (GPU) matrix output.
 * @param[in] size No of elements in matrix output.
 * @param[out] error Relative F-norm self error.
 */
template <typename T>
bool rvs_blas::check_result_consistency(void * dout, size_t size, double &error) {

  /* Allocate host memory for current gemm output */
  if (!hco) {
    if (hipHostMalloc(&hco, size * sizeof(T), 0) != hipSuccess)
      return false;

    if (hipMemset(hco, 0, size * sizeof(T)) != hipSuccess)
      return false;
  }

  /* Copy current device gemm output to host memory */
  if (hipMemcpy(hco, dout, sizeof(T) * size, hipMemcpyDeviceToHost) != hipSuccess)
    return false;

  /* Allocate host memory for previous gemm output */
  if (!hpo) {
    if (hipHostMalloc(&hpo, size * sizeof(T), 0) != hipSuccess)
      return false;

    if (hipMemset(hpo, 0, size * sizeof(T)) != hipSuccess)
      return false;

    /* Copy current device gemm output to host memory */
    if (hipMemcpy(hpo, dout, sizeof(T) * size, hipMemcpyDeviceToHost) != hipSuccess)
      return false;

    /* Exit first iteration of self-check as there is no previous result yet ! */
    return true;
  }

  /* If error injection is enabled, insert error in gemm output */
  if(error_freq && error_count && check_count) {

    /* Insert error at set error frequency */
    if(check_count%error_freq == 0) {

      if(error_count <= size) {

        if (hipMemset(hco, 0,  sizeof(T) * error_count) != hipSuccess)
          return false;
      }
    }
  }

  /* Norm checking */

  T * fp = (T *)hpo;
  T * fc = (T *)hco;

  int64_t M = (int64_t)m;
  int64_t N = (int64_t)n;
  int64_t _ldc = (int64_t) blas_ldc_offset;

  /* Set norm error if any by checking current vs previous gemm outputs */
  error = std::abs(check_norm_error('F', M, N, _ldc, fp, fc));

  /* Copy current device gemm output to host previous gemm output memory */
  if (hipMemcpy(hpo, dout, sizeof(T) * size, hipMemcpyDeviceToHost) != hipSuccess)
    return false;

  return true;
}

/**
 * Check gemm output for accuracy (GPU output vs CPU output).
 * @param[in] dout Device (GPU) matrix output.
 * @param[in] size No of elements in matrix output.
 * @param[out] error Relative accuracy error.
 */
template <typename T>
bool rvs_blas::check_result_accuracy(void * dout, size_t size, double &error) {

  int a_stride_1 = 1,
      a_stride_2 = blas_lda_offset,
      b_stride_1 = 1,
      b_stride_2 = blas_ldb_offset;

  if(transa == rocblas_operation_transpose) {
    a_stride_1 = blas_lda_offset;
    a_stride_2 = 1;
  }

  if(transb == rocblas_operation_transpose) {
    b_stride_1 = blas_ldb_offset;
    b_stride_2 = 1;
  }

  /* Allocate host memory for host (CPU) gemm output */
  if(!hout) {
    if(hipHostMalloc(&hout, size * sizeof(T), 0) != hipSuccess)
      return false;

    if (hipMemset(hout, 0, size * sizeof(T)) != hipSuccess)
      return false;
  }

  /* Allocate host memory for device (GPU) gemm output */
  if(!hdout) {
    if(hipHostMalloc(&hdout, size * sizeof(T), 0) != hipSuccess)
      return false;

    if (hipMemset(hdout, 0, size * sizeof(T)) != hipSuccess)
      return false;
  }

  T * _ha;
  T * _hb;
  T * _hc;
  T alpha = (T) blas_alpha_val;
  T beta = (T) blas_beta_val;

  if (std::is_same<T, float>{}) {
    _ha = (T *)ha;
    _hb = (T *)hb;
    _hc = (T *)hc;
  }
  else {
    _ha = (T *)ha;
    _hb = (T *)hb;
    _hc = (T *)hc;
  }

  /* Copy Matrix C to host gemm output memory */
  if(hipMemcpy(hout, _hc, sizeof(T) * size, hipMemcpyHostToHost) != hipSuccess)
    return false;

  /* Host (CPU) based matrix multiplication */
  host_matrix_mul<T>(alpha,
      beta,
      m,
      n,
      k,
      _ha,
      a_stride_1,
      a_stride_2,
      _hb,
      b_stride_1,
      b_stride_2,
      (T *)hout,
      1,
      blas_ldc_offset);

  /* Copy device gemm output to host memory */
  if (hipMemcpy(hdout, dout, sizeof(T) * size, hipMemcpyDeviceToHost) != hipSuccess)
    return false;

  /* If error injection is enabled, insert error in gemm output */
  if(error_freq && error_count && check_count) {

    /* Insert error at set error frequency */
    if(check_count%error_freq == 0) {

      if(error_count <= size) {

        if (hipMemset(hdout, 0,  sizeof(T) * error_count) != hipSuccess)
          return false;
      }
    }
  }

  /* Calculate max. relative error */

  T max_relative_error = 0.0;

  for(size_t i = 0; i < size; i++)
  {
    T relative_error = (((T *)hout)[i] - ((T *)hdout)[i]) / ((T *)hout)[i];

    relative_error = relative_error > 0 ? relative_error : -relative_error;

    max_relative_error
      = relative_error < max_relative_error ? max_relative_error : relative_error;
  }

  T eps = std::numeric_limits<T>::epsilon();
  T tolerance = 10;

  /* Set error if max. relative error greater than tolerance level */
  if(max_relative_error > eps * tolerance)
  {
    error = max_relative_error;
  }

  return true;
}

/**
 * Validate gemm output for consistency and accuracy.
 * @param[in] self_check Enable self checking of gemm outputs (previous vs current).
 * @param[in] accu_check Enable accuracy checking of gemm outputs (GPU vs CPU).
 * @param[out] self_error Relative F-norm self error.
 * @param[out] accu_error Relative accuracy error.
 */
bool rvs_blas::validate_gemm(bool self_check, bool accu_check, double &self_error, double &accu_error) {

  /* Gemm output checked for consistency/repeatability
     by comparing current output with previous output */
  if(self_check) {

    if(ops_type == "sgemm") {
      check_result_consistency<float>(dc, size_c, self_error);
    }
    else if(ops_type == "dgemm") {
      check_result_consistency<double>(dc, size_c, self_error);
    }
    else if(data_type == "fp8_r") {
      check_result_consistency<rocblas_f8>(dd, size_d, self_error);
    }
    else if(data_type == "fp16_r") {
      check_result_consistency<rocblas_half>(dd, size_d, self_error);
    }
    else if(data_type == "bf16_r") {
      check_result_consistency<rocblas_bfloat16>(dd, size_d, self_error);
    }
    else {
      return false;
    }
  }

  /* Gemm output checked for accuracy/correctness by comparing
     host(CPU) output with device(GPU) output */
  if(accu_check) {

    if(ops_type == "sgemm") {
      check_result_accuracy<float>(dc, size_c, accu_error);
    }
    else if(ops_type == "dgemm") {
      check_result_accuracy<double>(dc, size_c, accu_error);
    }
    else {
      return false;
    }
  }

  /* Error injection is enabled */
  if(error_freq && error_count) {
    /* Increment the gemm check counter */
    check_count++;
  }

  return true;
}

/**
 * Set gemm error stimulation parameters.
 * Note: This function is meant only for test purpose !!!
 * @param[in] _error_freq gemm calls per error injection.
 * @param[in] _error_count no. of errors injected in gemm result.
 */
void rvs_blas::set_gemm_error(uint64_t _error_freq, uint64_t _error_count) {

  error_freq = _error_freq;
  error_count = _error_count;
}

(Optional for Linux users) Output of /opt/rocm/bin/rocminfo --support

`

Additional Information

No response

chboishabba avatar Mar 04 '25 03:03 chboishabba

``

rocminfo --support
ROCk module is loaded
=====================    
HSA System Attributes    
=====================    
Runtime Version:         1.1
Runtime Ext Version:     1.4
System Timestamp Freq.:  1000.000000MHz
Sig. Max Wait Duration:  18446744073709551615 (0xFFFFFFFFFFFFFFFF) (timestamp count)
Machine Model:           LARGE                              
System Endianness:       LITTLE                             
Mwaitx:                  DISABLED
DMAbuf Support:          YES

==========               
HSA Agents               
==========               
*******                  
Agent 1                  
*******                  
  Name:                    Intel(R) Core(TM) i7-7700K CPU @ 4.20GHz
  Uuid:                    CPU-XX                             
  Marketing Name:          Intel(R) Core(TM) i7-7700K CPU @ 4.20GHz
  Vendor Name:             CPU                                
  Feature:                 None specified                     
  Profile:                 FULL_PROFILE                       
  Float Round Mode:        NEAR                               
  Max Queue Number:        0(0x0)                             
  Queue Min Size:          0(0x0)                             
  Queue Max Size:          0(0x0)                             
  Queue Type:              MULTI                              
  Node:                    0                                  
  Device Type:             CPU                                
  Cache Info:              
    L1:                      32768(0x8000) KB                   
  Chip ID:                 0(0x0)                             
  ASIC Revision:           0(0x0)                             
  Cacheline Size:          64(0x40)                           
  Max Clock Freq. (MHz):   4500                               
  BDFID:                   0                                  
  Internal Node ID:        0                                  
  Compute Unit:            8                                  
  SIMDs per CU:            0                                  
  Shader Engines:          0                                  
  Shader Arrs. per Eng.:   0                                  
  WatchPts on Addr. Ranges:1                                  
  Features:                None
  Pool Info:               
    Pool 1                   
      Segment:                 GLOBAL; FLAGS: FINE GRAINED        
      Size:                    32826884(0x1f4e604) KB             
      Allocatable:             TRUE                               
      Alloc Granule:           4KB                                
      Alloc Recommended Granule:4KB                                
      Alloc Alignment:         4KB                                
      Accessible by all:       TRUE                               
    Pool 2                   
      Segment:                 GLOBAL; FLAGS: KERNARG, FINE GRAINED
      Size:                    32826884(0x1f4e604) KB             
      Allocatable:             TRUE                               
      Alloc Granule:           4KB                                
      Alloc Recommended Granule:4KB                                
      Alloc Alignment:         4KB                                
      Accessible by all:       TRUE                               
    Pool 3                   
      Segment:                 GLOBAL; FLAGS: COARSE GRAINED      
      Size:                    32826884(0x1f4e604) KB             
      Allocatable:             TRUE                               
      Alloc Granule:           4KB                                
      Alloc Recommended Granule:4KB                                
      Alloc Alignment:         4KB                                
      Accessible by all:       TRUE                               
  ISA Info:                
*******                  
Agent 2                  
*******                  
  Name:                    gfx803                             
  Uuid:                    GPU-XX                             
  Marketing Name:          AMD Radeon RX 580 Series           
  Vendor Name:             AMD                                
  Feature:                 KERNEL_DISPATCH                    
  Profile:                 BASE_PROFILE                       
  Float Round Mode:        NEAR                               
  Max Queue Number:        128(0x80)                          
  Queue Min Size:          64(0x40)                           
  Queue Max Size:          131072(0x20000)                    
  Queue Type:              MULTI                              
  Node:                    1                                  
  Device Type:             GPU                                
  Cache Info:              
    L1:                      16(0x10) KB                        
  Chip ID:                 26591(0x67df)                      
  ASIC Revision:           1(0x1)                             
  Cacheline Size:          64(0x40)                           
  Max Clock Freq. (MHz):   1411                               
  BDFID:                   256                                
  Internal Node ID:        1                                  
  Compute Unit:            36                                 
  SIMDs per CU:            4                                  
  Shader Engines:          4                                  
  Shader Arrs. per Eng.:   1                                  
  WatchPts on Addr. Ranges:4                                  
  Coherent Host Access:    FALSE                              
  Features:                KERNEL_DISPATCH 
  Fast F16 Operation:      TRUE                               
  Wavefront Size:          64(0x40)                           
  Workgroup Max Size:      1024(0x400)                        
  Workgroup Max Size per Dimension:
    x                        1024(0x400)                        
    y                        1024(0x400)                        
    z                        1024(0x400)                        
  Max Waves Per CU:        40(0x28)                           
  Max Work-item Per CU:    2560(0xa00)                        
  Grid Max Size:           4294967295(0xffffffff)             
  Grid Max Size per Dimension:
    x                        4294967295(0xffffffff)             
    y                        4294967295(0xffffffff)             
    z                        4294967295(0xffffffff)             
  Max fbarriers/Workgrp:   32                                 
  Packet Processor uCode:: 730                                
  SDMA engine uCode::      58                                 
  IOMMU Support::          None                               
  Pool Info:               
    Pool 1                   
      Segment:                 GLOBAL; FLAGS: COARSE GRAINED      
      Size:                    8388608(0x800000) KB               
      Allocatable:             TRUE                               
      Alloc Granule:           4KB                                
      Alloc Recommended Granule:2048KB                             
      Alloc Alignment:         4KB                                
      Accessible by all:       FALSE                              
    Pool 2                   
      Segment:                 GLOBAL; FLAGS: EXTENDED FINE GRAINED
      Size:                    8388608(0x800000) KB               
      Allocatable:             TRUE                               
      Alloc Granule:           4KB                                
      Alloc Recommended Granule:2048KB                             
      Alloc Alignment:         4KB                                
      Accessible by all:       FALSE                              
    Pool 3                   
      Segment:                 GROUP                              
      Size:                    64(0x40) KB                        
      Allocatable:             FALSE                              
      Alloc Granule:           0KB                                
      Alloc Recommended Granule:0KB                                
      Alloc Alignment:         0KB                                
      Accessible by all:       FALSE                              
  ISA Info:                
    ISA 1                    
      Name:                    amdgcn-amd-amdhsa--gfx803          
      Machine Models:          HSA_MACHINE_MODEL_LARGE            
      Profiles:                HSA_PROFILE_BASE                   
      Default Rounding Mode:   NEAR                               
      Default Rounding Mode:   NEAR                               
      Fast f16:                TRUE                               
      Workgroup Max Size:      1024(0x400)                        
      Workgroup Max Size per Dimension:
        x                        1024(0x400)                        
        y                        1024(0x400)                        
        z                        1024(0x400)                        
      Grid Max Size:           4294967295(0xffffffff)             
      Grid Max Size per Dimension:
        x                        4294967295(0xffffffff)             
        y                        4294967295(0xffffffff)             
        z                        4294967295(0xffffffff)             
      FBarrier Max Size:       32                                 
*** Done ***           

chboishabba avatar Mar 04 '25 03:03 chboishabba

cat rvs_blas_build.log | grep -C 3 803

         To avoid this warning please remove this line from your configuration file or upgrade it using "doxygen -u"

warning: Tag 'FORMULA_TRANSPARENT' at line 1515 of file 'DoxyGuide' has become obsolete.

         To avoid this warning please remove this line from your configuration file or upgrade it using "doxygen -u"

warning: Tag 'LATEX_SOURCE_CODE' at line 1803 of file 'DoxyGuide' has become obsolete.

         To avoid this warning please remove this line from your configuration file or upgrade it using "doxygen -u"

warning: Tag 'LATEX_TIMESTAMP' at line 1819 of file 'DoxyGuide' has become obsolete.

         To avoid this warning please remove this line from your configuration file or upgrade it using "doxygen -u"

-- 

[ 10%] Create the bintest directory

[ 10%] Built target rvs_bintest_folder

[ 11%] Building CXX object rvslib/CMakeFiles/rvslib.dir/__/src/rvslognode.cpp.o

6 warnings generated when compiling for gfx803.

[ 12%] Building CXX object rvslib/CMakeFiles/rvslib.dir/__/src/rvslognodestring.cpp.o

[ 12%] Building CXX object rvslib/CMakeFiles/rvslib.dir/__/src/rvslognodeint.cpp.o

[ 13%] Building CXX object rvslib/CMakeFiles/rvslib.dir/__/src/rvsminnode.cpp.o

-- 

   31 | #define ROCBLAS_DEPRECATED_MSG(MSG) __attribute__((deprecated(#MSG)))

      |                                                    ^

fatal error: too many errors emitted, stopping now [-ferror-limit=]

5 warnings and 20 errors generated when compiling for gfx803.

make[2]: *** [rvslib/CMakeFiles/rvslib.dir/build.make:261: rvslib/CMakeFiles/rvslib.dir/__/src/rvs_blas.cpp.o] Error 1

make[2]: *** Waiting for unfinished jobs....

make[1]: *** [CMakeFiles/Makefile2:821: rvslib/CMakeFiles/rvslib.dir/all] Error 2


 cat rvs_blas_build.log | grep -C 3 error

/home/c/ROCmValidationSuite/gst.so/include/action.h:108: warning: Member gst_data_type (variable) of class gst_action is not documented.

/home/c/ROCmValidationSuite/gst.so/include/action.h:111: warning: Member gst_self_check (variable) of class gst_action is not documented.

/home/c/ROCmValidationSuite/gst.so/include/action.h:114: warning: Member gst_accu_check (variable) of class gst_action is not documented.

/home/c/ROCmValidationSuite/gst.so/include/action.h:118: warning: Member gst_error_inject (variable) of class gst_action is not documented.

/home/c/ROCmValidationSuite/gst.so/include/action.h:120: warning: Member gst_error_freq (variable) of class gst_action is not documented.

/home/c/ROCmValidationSuite/gst.so/include/action.h:122: warning: Member gst_error_count (variable) of class gst_action is not documented.

/home/c/ROCmValidationSuite/gst.so/include/action.h:125: warning: Member gst_gemm_mode (variable) of class gst_action is not documented.

/home/c/ROCmValidationSuite/gst.so/include/action.h:128: warning: Member gst_batch_size (variable) of class gst_action is not documented.

/home/c/ROCmValidationSuite/gst.so/include/action.h:132: warning: Member gst_stride_a (variable) of class gst_action is not documented.

-- 

/home/c/ROCmValidationSuite/iet.so/include/iet_worker.h:297: warning: Member computeThread(void) (function) of class IETWorker is not documented.

/home/c/ROCmValidationSuite/iet.so/include/iet_worker.h:298: warning: Member bandwidthThread(void) (function) of class IETWorker is not documented.

/home/c/ROCmValidationSuite/iet.so/include/iet_worker.h:300: warning: Member gpu_blas (variable) of class IETWorker is not documented.

/home/c/ROCmValidationSuite/iet.so/include/iet_worker.h:313: warning: Member blas_error (variable) of class IETWorker is not documented.

/home/c/ROCmValidationSuite/iet.so/include/iet_worker.h:338: warning: Member sgemm_success (variable) of class IETWorker is not documented.

/home/c/ROCmValidationSuite/iet.so/include/iet_worker.h:356: warning: Member matrix_size_b (variable) of class IETWorker is not documented.

/home/c/ROCmValidationSuite/iet.so/include/iet_worker.h:357: warning: Member matrix_size_c (variable) of class IETWorker is not documented.

-- 

/home/c/ROCmValidationSuite/include/rvs_blas.h:285: warning: Member allocate_gpu_matrix_mem(void) (function) of class rvs_blas is not documented.

/home/c/ROCmValidationSuite/include/rvs_blas.h:298: warning: Member datatype_to_hip_datatype(const std::string &datatype) (function) of class rvs_blas is not documented.

/home/c/ROCmValidationSuite/include/rvs_blas.h:313: warning: Member computetype_to_hipblas_computetype(const std::string &computetype) (function) of class rvs_blas is not documented.

/home/c/ROCmValidationSuite/include/rvs_blas.h:199: warning: Member error_freq (variable) of class rvs_blas is not documented.

/home/c/ROCmValidationSuite/include/rvs_blas.h:201: warning: Member error_count (variable) of class rvs_blas is not documented.

/home/c/ROCmValidationSuite/include/rvs_blas.h:203: warning: Member check_count (variable) of class rvs_blas is not documented.

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:73: warning: The following parameters of rvs_blas::rvs_blas(int _gpu_device_index, int _m, int _n, int _k, std::string _matrix_init, int transA, int transB, float alpha, float beta, int lda, int ldb, int ldc, int ldd, std::string _ops_type, std::string _data_type, std::string _gemm_mode, int _batch_size, uint64_t _stride_a, uint64_t _stride_b, uint64_t _stride_c, uint64_t _stride_d, std::string _blas_source, std::string _compute_type) are not documented:

  parameter '_matrix_init'

-- 

6 warnings generated when compiling for host.

[ 17%] Building CXX object rvslib/CMakeFiles/rvslib.dir/__/rvs/src/rvsif_base.cpp.o

In file included from /home/c/ROCmValidationSuite/src/rvs_blas.cpp:25:

/home/c/ROCmValidationSuite/rvslib/../include/rvs_blas.h:302:38: error: use of undeclared identifier 'HIP_R_8F_E4M3'

  302 |         (datatype == "fp8_r")      ? HIP_R_8F_E4M3  :

      |                                      ^

/home/c/ROCmValidationSuite/rvslib/../include/rvs_blas.h:303:38: error: use of undeclared identifier 'HIP_R_8F_E4M3'

  303 |         (datatype == "fp8_e4m3_r") ? HIP_R_8F_E4M3  : // OCP fp8 E4M3

      |                                      ^

/home/c/ROCmValidationSuite/rvslib/../include/rvs_blas.h:304:38: error: use of undeclared identifier 'HIP_R_8F_E5M2'

  304 |         (datatype == "fp8_e5m2_r") ? HIP_R_8F_E5M2  : // OCP fp8 E5M2

      |                                      ^

[ 18%] Building CXX object rvslib/CMakeFiles/rvslib.dir/__/rvs/src/rvsif0.cpp.o

-- 

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:112:26: note: field 'hbl_workspace' will be initialized after field 'hbl_layout_a'

  112 |   , hbl_handle(nullptr), hbl_workspace(nullptr)

      |                          ^~~~~~~~~~~~~~~~~~~~~~

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:214:9: error: use of undeclared identifier 'HIP_R_8F_E4M3'

  214 |     if((HIP_R_8F_E4M3 == hbl_datatype) || (HIP_R_8F_E5M2 == hbl_datatype)) {

      |         ^

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:214:44: error: use of undeclared identifier 'HIP_R_8F_E5M2'

  214 |     if((HIP_R_8F_E4M3 == hbl_datatype) || (HIP_R_8F_E5M2 == hbl_datatype)) {

      |                                            ^

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:329:52: error: use of undeclared identifier 'HIPBLASLT_MATMUL_DESC_COMPUTE_INPUT_TYPE_A_EXT'

  329 |     if(hipblasLtMatmulDescSetAttribute(hbl_matmul, HIPBLASLT_MATMUL_DESC_COMPUTE_INPUT_TYPE_A_EXT, &hbl_datatype, sizeof(void*)) != HIPBLAS_STATUS_SUCCESS) {

      |                                                    ^

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:334:52: error: use of undeclared identifier 'HIPBLASLT_MATMUL_DESC_COMPUTE_INPUT_TYPE_B_EXT'

  334 |     if(hipblasLtMatmulDescSetAttribute(hbl_matmul, HIPBLASLT_MATMUL_DESC_COMPUTE_INPUT_TYPE_B_EXT, &hbl_datatype, sizeof(void*)) != HIPBLAS_STATUS_SUCCESS) {

      |                                                    ^

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:480:31: error: use of undeclared identifier 'hipblaslt_f8'

  480 |       return copy_data_to_gpu<hipblaslt_f8, float>();

      |                               ^

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:485:29: error: use of undeclared identifier 'hipblaslt_f8'

  485 |     return copy_data_to_gpu<hipblaslt_f8, float>();

      |                             ^

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:489:29: error: use of undeclared identifier 'hipblaslt_bf8'

  489 |     return copy_data_to_gpu<hipblaslt_bf8, float>();

      |                             ^

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:562:38: error: use of undeclared identifier 'hipblaslt_f8'

  562 |       return allocate_gpu_matrix_mem<hipblaslt_f8, float>();

      |                                      ^

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:567:36: error: use of undeclared identifier 'hipblaslt_f8'

  567 |     return allocate_gpu_matrix_mem<hipblaslt_f8, float>();

      |                                    ^

[ 19%] Building CXX object rvslib/CMakeFiles/rvslib.dir/__/rvs/src/rvsif1.cpp.o

[ 19%] Building CXX object rvslib/CMakeFiles/rvslib.dir/__/rvs/src/rvsaction.cpp.o

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:571:36: error: use of undeclared identifier 'hipblaslt_bf8'

  571 |     return allocate_gpu_matrix_mem<hipblaslt_bf8, float>();

      |                                    ^

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:705:18: error: unknown type name 'hipblaslt_f8'

  705 |         ha = new hipblaslt_f8[size_a];

      |                  ^

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:706:18: error: unknown type name 'hipblaslt_f8'

  706 |         hb = new hipblaslt_f8[size_b];

      |                  ^

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:713:16: error: unknown type name 'hipblaslt_f8'

  713 |       ha = new hipblaslt_f8[size_a];

      |                ^

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:714:16: error: unknown type name 'hipblaslt_f8'

  714 |       hb = new hipblaslt_f8[size_b];

      |                ^

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:720:16: error: unknown type name 'hipblaslt_bf8'; did you mean 'hipblasLtInt8'?

  720 |       ha = new hipblaslt_bf8[size_a];

      |                ^~~~~~~~~~~~~

      |                hipblasLtInt8

/opt/rocm_sdk_612/include/hipblaslt/hipblaslt-types.h:59:17: note: 'hipblasLtInt8' declared here

   59 | typedef int8_t  hipblasLtInt8;

      |                 ^

/home/c/ROCmValidationSuite/src/rvs_blas.cpp:721:16: error: unknown type name 'hipblaslt_bf8'; did you mean 'hipblasLtInt8'?

  721 |       hb = new hipblaslt_bf8[size_b];

      |                ^~~~~~~~~~~~~

      |                hipblasLtInt8

-- 

/opt/rocm_sdk_612/include/rocblas/internal/rocblas-auxiliary.h:31:52: note: expanded from macro 'ROCBLAS_DEPRECATED_MSG'

   31 | #define ROCBLAS_DEPRECATED_MSG(MSG) __attribute__((deprecated(#MSG)))

      |                                                    ^

fatal error: too many errors emitted, stopping now [-ferror-limit=]

5 warnings and 20 errors generated when compiling for gfx803.

make[2]: *** [rvslib/CMakeFiles/rvslib.dir/build.make:261: rvslib/CMakeFiles/rvslib.dir/__/src/rvs_blas.cpp.o] Error 1

make[2]: *** Waiting for unfinished jobs....

make[1]: *** [CMakeFiles/Makefile2:821: rvslib/CMakeFiles/rvslib.dir/all] Error 2 

chboishabba avatar Mar 04 '25 04:03 chboishabba

RX 580 i7 7700k MSI B250M Mortar:

~ lspci -vvv -xxx
00:00.0 Host bridge: Intel Corporation Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers (rev 05)
	Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx-
	Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort+ >SERR- <PERR- INTx-
	Latency: 0
	Capabilities: [e0] Vendor Specific Information: Len=10 <?>
	Kernel driver in use: skl_uncore
00: 86 80 1f 59 06 00 90 20 05 00 00 06 00 00 00 00
10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 62 14 69 7a
30: 00 00 00 00 e0 00 00 00 00 00 00 00 00 00 00 00
40: 01 90 d1 fe 00 00 00 00 01 00 d1 fe 00 00 00 00
50: 03 00 00 00 29 80 00 00 04 00 00 00 01 00 c0 8f
60: 01 00 00 e0 00 00 00 00 01 80 d1 fe 00 00 00 00
70: 00 00 00 ff 07 00 00 00 00 0c 00 ff 7f 00 00 00
80: 11 11 11 11 11 11 11 00 1a 00 00 00 00 00 00 00
90: 01 00 00 ff 07 00 00 00 01 00 f0 6e 08 00 00 00
a0: 01 00 00 00 08 00 00 00 01 00 00 6f 08 00 00 00
b0: 01 00 00 90 01 00 00 90 01 00 c0 8f 01 00 00 90
c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
e0: 09 00 10 01 5d 20 01 62 c8 00 e4 b6 00 00 04 00
f0: 00 00 00 00 c8 0f 09 00 00 00 00 00 00 00 00 00

00:01.0 PCI bridge: Intel Corporation 6th-10th Gen Core Processor PCIe Controller (x16) (rev 05) (prog-if 00 [Normal decode])
	Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0, Cache Line Size: 64 bytes
	Interrupt: pin A routed to IRQ 120
	Bus: primary=00, secondary=01, subordinate=01, sec-latency=0
	I/O behind bridge: e000-efff [size=4K] [16-bit]
	Memory behind bridge: dfa00000-dfafffff [size=1M] [32-bit]
	Prefetchable memory behind bridge: c0000000-d01fffff [size=258M] [32-bit]
	Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- <SERR- <PERR-
	BridgeCtl: Parity- SERR+ NoISA- VGA+ VGA16+ MAbort- >Reset- FastB2B-
		PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
	Capabilities: [88] Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Capabilities: [80] Power Management version 3
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
		Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
	Capabilities: [90] MSI: Enable+ Count=1/1 Maskable- 64bit-
		Address: fee04000  Data: 0020
	Capabilities: [a0] Express (v2) Root Port (Slot+), IntMsgNum 0
		DevCap:	MaxPayload 256 bytes, PhantFunc 0
			ExtTag- RBE+ TEE-IO-
		DevCtl:	CorrErr- NonFatalErr- FatalErr- UnsupReq-
			RlxdOrd- ExtTag- PhantFunc- AuxPwr- NoSnoop-
			MaxPayload 256 bytes, MaxReadReq 128 bytes
		DevSta:	CorrErr- NonFatalErr- FatalErr- UnsupReq- AuxPwr- TransPend-
		LnkCap:	Port #2, Speed 8GT/s, Width x16, ASPM L0s L1, Exit Latency L0s <256ns, L1 <8us
			ClockPM- Surprise- LLActRep- BwNot+ ASPMOptComp+
		LnkCtl:	ASPM Disabled; RCB 64 bytes, LnkDisable- CommClk+
			ExtSynch- ClockPM- AutWidDis- BWInt+ AutBWInt+
		LnkSta:	Speed 8GT/s, Width x16
			TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt-
		SltCap:	AttnBtn- PwrCtrl- MRL- AttnInd- PwrInd- HotPlug- Surprise-
			Slot #1, PowerLimit 75W; Interlock- NoCompl+
		SltCtl:	Enable: AttnBtn- PwrFlt- MRL- PresDet- CmdCplt- HPIrq- LinkChg-
			Control: AttnInd Unknown, PwrInd Unknown, Power- Interlock-
		SltSta:	Status: AttnBtn- PowerFlt- MRL- CmdCplt- PresDet+ Interlock-
			Changed: MRL- PresDet+ LinkState-
		RootCap: CRSVisible-
		RootCtl: ErrCorrectable- ErrNon-Fatal- ErrFatal- PMEIntEna- CRSVisible-
		RootSta: PME ReqID 0000, PMEStatus- PMEPending-
		DevCap2: Completion Timeout: Not Supported, TimeoutDis- NROPrPrP- LTR+
			10BitTagComp- 10BitTagReq- OBFF Via WAKE#, ExtFmt- EETLPPrefix-
			EmergencyPowerReduction Not Supported, EmergencyPowerReductionInit-
			FRS- LN System CLS Not Supported, TPHComp- ExtTPHComp- ARIFwd-
			AtomicOpsCap: Routing- 32bit+ 64bit+ 128bitCAS+
		DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis- ARIFwd-
			AtomicOpsCtl: ReqEn- EgressBlck-
			IDOReq- IDOCompl- LTR+ EmergencyPowerReductionReq-
			10BitTagReq- OBFF Via WAKE#, EETLPPrefixBlk-
		LnkCap2: Supported Link Speeds: 2.5-8GT/s, Crosslink- Retimer- 2Retimers- DRS-
		LnkCtl2: Target Link Speed: 8GT/s, EnterCompliance- SpeedDis-
			Transmit Margin: Normal Operating Range, EnterModifiedCompliance- ComplianceSOS-
			Compliance Preset/De-emphasis: -6dB de-emphasis, 0dB preshoot
		LnkSta2: Current De-emphasis Level: -6dB, EqualizationComplete+ EqualizationPhase1+
			EqualizationPhase2+ EqualizationPhase3+ LinkEqualizationRequest-
			Retimer- 2Retimers- CrosslinkRes: unsupported
	Capabilities: [100 v1] Virtual Channel
		Caps:	LPEVC=0 RefClk=100ns PATEntryBits=1
		Arb:	Fixed- WRR32- WRR64- WRR128-
		Ctrl:	ArbSelect=Fixed
		Status:	InProgress-
		VC0:	Caps:	PATOffset=00 MaxTimeSlots=1 RejSnoopTrans-
			Arb:	Fixed+ WRR32- WRR64- WRR128- TWRR128- WRR256-
			Ctrl:	Enable+ ID=0 ArbSelect=Fixed TC/VC=ff
			Status:	NegoPending- InProgress-
	Capabilities: [140 v1] Root Complex Link
		Desc:	PortNumber=02 ComponentID=01 EltType=Config
		Link0:	Desc:	TargetPort=00 TargetComponent=01 AssocRCRB- LinkType=MemMapped LinkValid+
			Addr:	00000000fed19000
	Capabilities: [d94 v1] Secondary PCI Express
		LnkCtl3: LnkEquIntrruptEn- PerformEqu-
		LaneErrStat: 0
	Kernel driver in use: pcieport
00: 86 80 01 19 07 04 10 00 05 00 04 06 10 00 81 00
10: 00 00 00 00 00 00 00 00 00 01 01 00 e0 e0 00 00
20: a0 df a0 df 01 c0 11 d0 00 00 00 00 00 00 00 00
30: 00 00 00 00 88 00 00 00 00 00 00 00 0b 01 1a 00
40: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
50: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
60: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
70: 00 00 00 00 00 00 00 00 41 c0 1f 00 00 00 00 0a
80: 01 90 03 c8 08 00 00 00 0d 80 00 00 62 14 69 7a
90: 05 a0 01 00 00 40 e0 fe 20 00 00 00 00 00 00 00
a0: 10 00 42 01 01 80 00 00 20 00 00 00 03 ad 61 02
b0: 40 0c 03 11 80 25 0c 00 00 00 48 00 00 00 00 00
c0: 00 00 00 00 80 0b 08 00 00 64 00 00 0e 00 00 00
d0: 43 00 1e 00 00 00 00 00 00 00 00 00 00 00 00 00
e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
f0: 00 c0 00 bc 4e 01 01 20 00 00 00 00 e0 00 10 00

00:08.0 System peripheral: Intel Corporation Xeon E3-1200 v5/v6 / E3-1500 v5 / 6th/7th/8th Gen Core Processor Gaussian Mixture Model
	Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx-
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Interrupt: pin A routed to IRQ 11
	Region 0: Memory at dfb4f000 (64-bit, non-prefetchable) [disabled] [size=4K]
	Capabilities: [90] MSI: Enable- Count=1/1 Maskable- 64bit-
		Address: 00000000  Data: 0000
	Capabilities: [dc] Power Management version 2
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
		Status: D0 NoSoftRst- PME-Enable- DSel=0 DScale=0 PME-
	Capabilities: [f0] PCI Advanced Features
		AFCap: TP+ FLR+
		AFCtrl: FLR-
		AFStatus: TP-
00: 86 80 11 19 00 00 10 00 00 00 80 08 10 00 00 00
10: 04 f0 b4 df 00 00 00 00 00 00 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 62 14 69 7a
30: 00 00 00 00 90 00 00 00 00 00 00 00 0b 01 00 00
40: ff 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00
50: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
60: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
70: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
90: 05 dc 00 00 00 00 00 00 00 00 00 00 00 00 00 00
a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d0: 00 00 00 00 00 00 00 00 00 00 00 00 01 f0 02 00
e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
f0: 13 00 06 03 00 00 00 00 00 00 00 00 00 00 00 00

00:14.0 USB controller: Intel Corporation 200 Series/Z370 Chipset Family USB 3.0 xHCI Controller (prog-if 30 [XHCI])
	Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
	Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0
	Interrupt: pin A routed to IRQ 125
	Region 0: Memory at dfb30000 (64-bit, non-prefetchable) [size=64K]
	Capabilities: [70] Power Management version 2
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=375mA PME(D0-,D1-,D2-,D3hot+,D3cold+)
		Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
	Capabilities: [80] MSI: Enable+ Count=1/8 Maskable- 64bit+
		Address: 00000000fee07000  Data: 0020
	Kernel driver in use: xhci_hcd
00: 86 80 af a2 06 04 90 02 00 30 03 0c 00 00 80 00
10: 04 00 b3 df 00 00 00 00 00 00 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 62 14 69 7a
30: 00 00 00 00 70 00 00 00 00 00 00 00 0b 01 00 00
40: fd 01 34 80 88 c6 0f 80 00 00 00 00 00 00 00 00
50: 5b 6e ce 0f 00 00 00 00 00 00 00 00 00 00 00 00
60: 30 60 00 00 00 00 00 00 00 00 00 00 00 00 00 00
70: 01 80 c2 c1 08 00 00 00 00 00 00 00 00 00 00 00
80: 05 00 87 00 00 70 e0 fe 00 00 00 00 20 00 00 00
90: 09 00 14 f0 10 00 40 01 00 00 00 00 c1 0a 08 00
a0: 00 08 00 00 00 18 00 00 8f 40 02 00 00 01 04 00
b0: 03 c0 00 00 0c 00 00 00 30 00 00 00 c0 00 00 00
c0: 00 03 00 00 00 0c 00 00 00 10 00 00 00 00 00 00
d0: 03 00 00 00 0c 00 00 00 30 00 00 00 00 00 00 00
e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
f0: 00 00 00 00 00 00 00 00 b3 0f 00 08 00 00 00 00

00:14.2 Signal processing controller: Intel Corporation 200 Series PCH Thermal Subsystem
	Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Control: I/O- Mem+ BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx-
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Interrupt: pin C routed to IRQ 11
	Region 0: Memory at dfb4e000 (64-bit, non-prefetchable) [size=4K]
	Capabilities: [50] Power Management version 3
		Flags: PMEClk- DSI+ D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
		Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
	Capabilities: [80] MSI: Enable- Count=1/1 Maskable- 64bit-
		Address: 00000000  Data: 0000
00: 86 80 b1 a2 02 00 10 00 00 00 80 11 00 00 00 00
10: 04 e0 b4 df 00 00 00 00 00 00 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 62 14 69 7a
30: 00 00 00 00 50 00 00 00 00 00 00 00 0b 03 00 00
40: 04 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
50: 01 80 23 00 08 00 00 00 00 00 00 00 00 00 00 00
60: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
70: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
80: 05 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
f0: 00 00 00 00 00 00 00 00 b3 0f 00 08 00 00 00 00

00:16.0 Communication controller: Intel Corporation 200 Series PCH CSME HECI #1
	Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0
	Interrupt: pin A routed to IRQ 136
	Region 0: Memory at dfb4d000 (64-bit, non-prefetchable) [size=4K]
	Capabilities: [50] Power Management version 3
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot+,D3cold-)
		Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
	Capabilities: [8c] MSI: Enable+ Count=1/1 Maskable- 64bit+
		Address: 00000000fee06000  Data: 0022
	Kernel driver in use: mei_me
	Kernel modules: mei_me
00: 86 80 ba a2 06 04 10 00 00 00 80 07 00 00 80 00
10: 04 d0 b4 df 00 00 00 00 00 00 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 62 14 69 7a
30: 00 00 00 00 50 00 00 00 00 00 00 00 0b 01 00 00
40: 55 02 00 90 10 00 01 80 06 03 11 86 00 00 00 00
50: 01 8c 03 40 08 00 00 00 00 00 00 00 00 00 00 00
60: 20 00 00 00 04 40 08 00 00 00 00 00 02 00 00 00
70: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
80: 00 00 00 00 00 00 00 00 00 00 00 00 05 00 81 00
90: 00 60 e0 fe 00 00 00 00 22 00 00 00 00 00 00 00
a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 40
c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
f0: 00 00 00 00 00 00 00 00 b3 0f 00 08 00 00 00 00

00:17.0 SATA controller: Intel Corporation 200 Series PCH SATA controller [AHCI mode] (prog-if 01 [AHCI 1.0])
	Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
	Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0
	Interrupt: pin A routed to IRQ 124
	Region 0: Memory at dfb48000 (32-bit, non-prefetchable) [size=8K]
	Region 1: Memory at dfb4c000 (32-bit, non-prefetchable) [size=256]
	Region 2: I/O ports at f050 [size=8]
	Region 3: I/O ports at f040 [size=4]
	Region 4: I/O ports at f020 [size=32]
	Region 5: Memory at dfb4b000 (32-bit, non-prefetchable) [size=2K]
	Capabilities: [80] MSI: Enable+ Count=1/1 Maskable- 64bit-
		Address: fee05000  Data: 0020
	Capabilities: [70] Power Management version 3
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot+,D3cold-)
		Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
	Capabilities: [a8] SATA HBA v1.0 BAR4 Offset=00000004
	Kernel driver in use: ahci
00: 86 80 82 a2 07 04 b0 02 00 01 06 01 00 00 00 00
10: 00 80 b4 df 00 c0 b4 df 51 f0 00 00 41 f0 00 00
20: 21 f0 00 00 00 b0 b4 df 00 00 00 00 62 14 69 7a
30: 00 00 00 00 80 00 00 00 00 00 00 00 0b 01 00 00
40: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
50: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
60: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
70: 01 a8 03 40 08 00 00 00 00 00 00 00 00 00 00 00
80: 05 70 01 00 00 50 e0 fe 20 00 00 00 00 00 00 00
90: 00 00 ff 00 00 00 00 00 00 00 00 00 30 00 00 80
a0: 3c 00 00 00 00 00 00 00 12 00 10 00 48 00 00 00
b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d0: 11 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00
e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
f0: 00 00 00 00 00 00 00 00 b3 0f 00 08 00 00 00 00

00:1b.0 PCI bridge: Intel Corporation 200 Series PCH PCI Express Root Port #17 (rev f0) (prog-if 00 [Normal decode])
	Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0, Cache Line Size: 64 bytes
	Interrupt: pin A routed to IRQ 121
	Bus: primary=00, secondary=02, subordinate=02, sec-latency=0
	I/O behind bridge: f000-0fff [disabled] [16-bit]
	Memory behind bridge: fff00000-000fffff [disabled] [32-bit]
	Prefetchable memory behind bridge: 00000000fff00000-00000000000fffff [disabled] [64-bit]
	Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- <SERR- <PERR-
	BridgeCtl: Parity- SERR+ NoISA- VGA- VGA16+ MAbort- >Reset- FastB2B-
		PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
	Capabilities: [40] Express (v2) Root Port (Slot-), IntMsgNum 0
		DevCap:	MaxPayload 256 bytes, PhantFunc 0
			ExtTag- RBE+ TEE-IO-
		DevCtl:	CorrErr- NonFatalErr- FatalErr- UnsupReq-
			RlxdOrd- ExtTag- PhantFunc- AuxPwr- NoSnoop-
			MaxPayload 128 bytes, MaxReadReq 128 bytes
		DevSta:	CorrErr- NonFatalErr- FatalErr- UnsupReq- AuxPwr+ TransPend-
		LnkCap:	Port #17, Speed 8GT/s, Width x1, ASPM L0s L1, Exit Latency L0s unlimited, L1 <4us
			ClockPM- Surprise- LLActRep+ BwNot+ ASPMOptComp+
		LnkCtl:	ASPM Disabled; RCB 64 bytes, LnkDisable- CommClk-
			ExtSynch- ClockPM- AutWidDis- BWInt+ AutBWInt+
		LnkSta:	Speed 2.5GT/s, Width x0
			TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt-
		RootCap: CRSVisible-
		RootCtl: ErrCorrectable- ErrNon-Fatal- ErrFatal- PMEIntEna- CRSVisible-
		RootSta: PME ReqID 0000, PMEStatus- PMEPending-
		DevCap2: Completion Timeout: Range ABC, TimeoutDis+ NROPrPrP- LTR+
			10BitTagComp- 10BitTagReq- OBFF Via WAKE#, ExtFmt- EETLPPrefix-
			EmergencyPowerReduction Not Supported, EmergencyPowerReductionInit-
			FRS- LN System CLS Not Supported, TPHComp- ExtTPHComp- ARIFwd+
			AtomicOpsCap: Routing- 32bit- 64bit- 128bitCAS-
		DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis- ARIFwd-
			AtomicOpsCtl: ReqEn- EgressBlck-
			IDOReq- IDOCompl- LTR- EmergencyPowerReductionReq-
			10BitTagReq- OBFF Disabled, EETLPPrefixBlk-
		LnkCap2: Supported Link Speeds: 2.5-8GT/s, Crosslink- Retimer- 2Retimers- DRS-
		LnkCtl2: Target Link Speed: 2.5GT/s, EnterCompliance- SpeedDis-
			Transmit Margin: Normal Operating Range, EnterModifiedCompliance- ComplianceSOS-
			Compliance Preset/De-emphasis: -6dB de-emphasis, 0dB preshoot
		LnkSta2: Current De-emphasis Level: -3.5dB, EqualizationComplete- EqualizationPhase1-
			EqualizationPhase2- EqualizationPhase3- LinkEqualizationRequest-
			Retimer- 2Retimers- CrosslinkRes: unsupported
	Capabilities: [80] MSI: Enable+ Count=1/1 Maskable- 64bit-
		Address: fee06000  Data: 0020
	Capabilities: [90] Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Capabilities: [a0] Power Management version 3
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
		Status: D3 NoSoftRst- PME-Enable+ DSel=0 DScale=0 PME-
	Kernel driver in use: pcieport
00: 86 80 e7 a2 07 04 10 00 f0 00 04 06 10 00 81 00
10: 00 00 00 00 00 00 00 00 00 02 02 00 f0 00 00 00
20: f0 ff 00 00 f1 ff 01 00 00 00 00 00 00 00 00 00
30: 00 00 00 00 40 00 00 00 00 00 00 00 0b 01 12 00
40: 10 80 42 00 01 80 00 00 00 00 10 00 13 7c 71 11
50: 00 0c 01 10 60 00 04 00 00 00 40 00 00 00 00 00
60: 00 00 00 00 37 08 08 00 00 00 00 00 0e 00 00 00
70: 01 00 01 00 00 00 00 00 00 00 00 00 00 00 00 00
80: 05 90 01 00 00 60 e0 fe 20 00 00 00 00 00 00 00
90: 0d a0 00 00 62 14 69 7a 00 00 00 00 00 00 00 00
a0: 01 00 03 c8 03 01 00 00 00 00 00 00 00 00 00 00
b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d0: 11 00 00 07 00 08 00 00 02 00 9e 09 00 00 00 00
e0: 00 f7 91 00 00 00 00 00 04 80 02 00 00 00 00 00
f0: 50 01 00 00 00 f3 00 40 b3 0f 00 08 00 00 00 01

00:1b.4 PCI bridge: Intel Corporation 200 Series PCH PCI Express Root Port #21 (rev f0) (prog-if 00 [Normal decode])
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0, Cache Line Size: 64 bytes
	Interrupt: pin A routed to IRQ 122
	Bus: primary=00, secondary=03, subordinate=03, sec-latency=0
	I/O behind bridge: f000-0fff [disabled] [16-bit]
	Memory behind bridge: df900000-df9fffff [size=1M] [32-bit]
	Prefetchable memory behind bridge: 00000000fff00000-00000000000fffff [disabled] [64-bit]
	Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- <SERR- <PERR-
	BridgeCtl: Parity- SERR+ NoISA- VGA- VGA16+ MAbort- >Reset- FastB2B-
		PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
	Capabilities: [40] Express (v2) Root Port (Slot+), IntMsgNum 0
		DevCap:	MaxPayload 256 bytes, PhantFunc 0
			ExtTag- RBE+ TEE-IO-
		DevCtl:	CorrErr- NonFatalErr- FatalErr- UnsupReq-
			RlxdOrd- ExtTag- PhantFunc- AuxPwr- NoSnoop-
			MaxPayload 128 bytes, MaxReadReq 128 bytes
		DevSta:	CorrErr- NonFatalErr- FatalErr- UnsupReq- AuxPwr+ TransPend-
		LnkCap:	Port #21, Speed 8GT/s, Width x4, ASPM not supported
			ClockPM- Surprise- LLActRep+ BwNot+ ASPMOptComp+
		LnkCtl:	ASPM Disabled; RCB 64 bytes, LnkDisable- CommClk+
			ExtSynch- ClockPM- AutWidDis- BWInt+ AutBWInt+
		LnkSta:	Speed 8GT/s, Width x4
			TrErr- Train- SlotClk+ DLActive+ BWMgmt- ABWMgmt-
		SltCap:	AttnBtn- PwrCtrl- MRL- AttnInd- PwrInd- HotPlug- Surprise-
			Slot #24, PowerLimit 25W; Interlock- NoCompl+
		SltCtl:	Enable: AttnBtn- PwrFlt- MRL- PresDet- CmdCplt- HPIrq- LinkChg-
			Control: AttnInd Unknown, PwrInd Unknown, Power- Interlock-
		SltSta:	Status: AttnBtn- PowerFlt- MRL- CmdCplt- PresDet+ Interlock-
			Changed: MRL- PresDet- LinkState+
		RootCap: CRSVisible-
		RootCtl: ErrCorrectable- ErrNon-Fatal- ErrFatal- PMEIntEna- CRSVisible-
		RootSta: PME ReqID 0000, PMEStatus- PMEPending-
		DevCap2: Completion Timeout: Range ABC, TimeoutDis+ NROPrPrP- LTR+
			10BitTagComp- 10BitTagReq- OBFF Not Supported, ExtFmt- EETLPPrefix-
			EmergencyPowerReduction Not Supported, EmergencyPowerReductionInit-
			FRS- LN System CLS Not Supported, TPHComp- ExtTPHComp- ARIFwd+
			AtomicOpsCap: Routing- 32bit- 64bit- 128bitCAS-
		DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis- ARIFwd-
			AtomicOpsCtl: ReqEn- EgressBlck-
			IDOReq- IDOCompl- LTR+ EmergencyPowerReductionReq-
			10BitTagReq- OBFF Disabled, EETLPPrefixBlk-
		LnkCap2: Supported Link Speeds: 2.5-8GT/s, Crosslink- Retimer- 2Retimers- DRS-
		LnkCtl2: Target Link Speed: 8GT/s, EnterCompliance- SpeedDis-
			Transmit Margin: Normal Operating Range, EnterModifiedCompliance- ComplianceSOS-
			Compliance Preset/De-emphasis: -6dB de-emphasis, 0dB preshoot
		LnkSta2: Current De-emphasis Level: -3.5dB, EqualizationComplete+ EqualizationPhase1+
			EqualizationPhase2+ EqualizationPhase3+ LinkEqualizationRequest-
			Retimer- 2Retimers- CrosslinkRes: unsupported
	Capabilities: [80] MSI: Enable+ Count=1/1 Maskable- 64bit-
		Address: fee01000  Data: 0020
	Capabilities: [90] Subsystem: Device 0000:0000
	Capabilities: [a0] Power Management version 3
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
		Status: D0 NoSoftRst- PME-Enable- DSel=0 DScale=0 PME-
	Capabilities: [100 v1] Advanced Error Reporting
		UESta:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP-
			ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		UEMsk:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt+ RxOF- MalfTLP-
			ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		UESvrt:	DLP+ SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF+ MalfTLP+
			ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		CESta:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr- CorrIntErr- HeaderOF-
		CEMsk:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+ CorrIntErr- HeaderOF-
		AERCap:	First Error Pointer: 00, ECRCGenCap- ECRCGenEn- ECRCChkCap- ECRCChkEn-
			MultHdrRecCap- MultHdrRecEn- TLPPfxPres- HdrLogCap-
		HeaderLog: 00000000 00000000 00000000 00000000
		RootCmd: CERptEn- NFERptEn- FERptEn-
		RootSta: CERcvd- MultCERcvd- UERcvd- MultUERcvd-
			FirstFatal- NonFatalMsg- FatalMsg- IntMsgNum 0
		ErrorSrc: ERR_COR: 0000 ERR_FATAL/NONFATAL: 0000
	Capabilities: [140 v1] Access Control Services
		ACSCap:	SrcValid+ TransBlk+ ReqRedir+ CmpltRedir+ UpstreamFwd- EgressCtrl- DirectTrans-
		ACSCtl:	SrcValid- TransBlk- ReqRedir- CmpltRedir- UpstreamFwd- EgressCtrl- DirectTrans-
	Capabilities: [220 v1] Secondary PCI Express
		LnkCtl3: LnkEquIntrruptEn- PerformEqu-
		LaneErrStat: 0
	Kernel driver in use: pcieport
00: 86 80 eb a2 07 04 10 00 f0 00 04 06 10 00 81 00
10: 00 00 00 00 00 00 00 00 00 03 03 00 f0 00 00 00
20: 90 df 90 df f1 ff 01 00 00 00 00 00 00 00 00 00
30: 00 00 00 00 40 00 00 00 00 00 00 00 0b 01 12 00
40: 10 80 42 01 01 80 00 00 00 00 10 00 43 40 72 15
50: 40 0c 43 30 00 fd c4 00 00 00 40 01 00 00 00 00
60: 00 00 00 00 37 08 00 00 00 04 00 00 0e 00 00 00
70: 03 00 1f 00 00 00 00 00 00 00 00 00 00 00 00 00
80: 05 90 01 00 00 10 e0 fe 20 00 00 00 00 00 00 00
90: 0d a0 00 00 00 00 00 00 00 00 00 00 00 00 00 00
a0: 01 00 03 c8 00 00 00 00 00 00 00 00 00 00 00 00
b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d0: 01 10 00 07 42 18 00 00 08 00 9e 8b 00 00 00 00
e0: 00 b7 f3 00 00 80 00 80 16 80 12 00 00 00 00 00
f0: 50 01 00 00 00 03 00 40 b3 0f 00 08 00 c0 00 01

00:1d.0 PCI bridge: Intel Corporation 200 Series PCH PCI Express Root Port #9 (rev f0) (prog-if 00 [Normal decode])
	Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0, Cache Line Size: 64 bytes
	Interrupt: pin A routed to IRQ 123
	Bus: primary=00, secondary=04, subordinate=04, sec-latency=0
	I/O behind bridge: f000-0fff [disabled] [16-bit]
	Memory behind bridge: df000000-df8fffff [size=9M] [32-bit]
	Prefetchable memory behind bridge: d0400000-d07fffff [size=4M] [32-bit]
	Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- <SERR- <PERR-
	BridgeCtl: Parity- SERR+ NoISA- VGA- VGA16+ MAbort- >Reset- FastB2B-
		PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
	Capabilities: [40] Express (v2) Root Port (Slot+), IntMsgNum 0
		DevCap:	MaxPayload 256 bytes, PhantFunc 0
			ExtTag- RBE+ TEE-IO-
		DevCtl:	CorrErr- NonFatalErr- FatalErr- UnsupReq-
			RlxdOrd- ExtTag- PhantFunc- AuxPwr- NoSnoop-
			MaxPayload 256 bytes, MaxReadReq 128 bytes
		DevSta:	CorrErr- NonFatalErr- FatalErr- UnsupReq- AuxPwr+ TransPend-
		LnkCap:	Port #9, Speed 8GT/s, Width x4, ASPM not supported
			ClockPM- Surprise- LLActRep+ BwNot+ ASPMOptComp+
		LnkCtl:	ASPM Disabled; RCB 64 bytes, LnkDisable- CommClk+
			ExtSynch- ClockPM- AutWidDis- BWInt+ AutBWInt+
		LnkSta:	Speed 5GT/s, Width x1
			TrErr- Train- SlotClk+ DLActive+ BWMgmt- ABWMgmt-
		SltCap:	AttnBtn- PwrCtrl- MRL- AttnInd- PwrInd- HotPlug- Surprise-
			Slot #12, PowerLimit 25W; Interlock- NoCompl+
		SltCtl:	Enable: AttnBtn- PwrFlt- MRL- PresDet- CmdCplt- HPIrq- LinkChg-
			Control: AttnInd Unknown, PwrInd Unknown, Power- Interlock-
		SltSta:	Status: AttnBtn- PowerFlt- MRL- CmdCplt- PresDet+ Interlock-
			Changed: MRL- PresDet- LinkState+
		RootCap: CRSVisible-
		RootCtl: ErrCorrectable- ErrNon-Fatal- ErrFatal- PMEIntEna- CRSVisible-
		RootSta: PME ReqID 0000, PMEStatus- PMEPending-
		DevCap2: Completion Timeout: Range ABC, TimeoutDis+ NROPrPrP- LTR+
			10BitTagComp- 10BitTagReq- OBFF Not Supported, ExtFmt- EETLPPrefix-
			EmergencyPowerReduction Not Supported, EmergencyPowerReductionInit-
			FRS- LN System CLS Not Supported, TPHComp- ExtTPHComp- ARIFwd+
			AtomicOpsCap: Routing- 32bit- 64bit- 128bitCAS-
		DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis- ARIFwd-
			AtomicOpsCtl: ReqEn- EgressBlck-
			IDOReq- IDOCompl- LTR+ EmergencyPowerReductionReq-
			10BitTagReq- OBFF Disabled, EETLPPrefixBlk-
		LnkCap2: Supported Link Speeds: 2.5-8GT/s, Crosslink- Retimer- 2Retimers- DRS-
		LnkCtl2: Target Link Speed: 8GT/s, EnterCompliance- SpeedDis-
			Transmit Margin: Normal Operating Range, EnterModifiedCompliance- ComplianceSOS-
			Compliance Preset/De-emphasis: -6dB de-emphasis, 0dB preshoot
		LnkSta2: Current De-emphasis Level: -6dB, EqualizationComplete- EqualizationPhase1-
			EqualizationPhase2- EqualizationPhase3- LinkEqualizationRequest-
			Retimer- 2Retimers- CrosslinkRes: unsupported
	Capabilities: [80] MSI: Enable+ Count=1/1 Maskable- 64bit-
		Address: fee03000  Data: 0020
	Capabilities: [90] Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Capabilities: [a0] Power Management version 3
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
		Status: D0 NoSoftRst- PME-Enable- DSel=0 DScale=0 PME-
	Capabilities: [100 v1] Advanced Error Reporting
		UESta:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP-
			ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		UEMsk:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt+ RxOF- MalfTLP-
			ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		UESvrt:	DLP+ SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF+ MalfTLP+
			ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		CESta:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr- CorrIntErr- HeaderOF-
		CEMsk:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+ CorrIntErr- HeaderOF-
		AERCap:	First Error Pointer: 00, ECRCGenCap- ECRCGenEn- ECRCChkCap- ECRCChkEn-
			MultHdrRecCap- MultHdrRecEn- TLPPfxPres- HdrLogCap-
		HeaderLog: 00000000 00000000 00000000 00000000
		RootCmd: CERptEn- NFERptEn- FERptEn-
		RootSta: CERcvd- MultCERcvd- UERcvd- MultUERcvd-
			FirstFatal- NonFatalMsg- FatalMsg- IntMsgNum 0
		ErrorSrc: ERR_COR: 0000 ERR_FATAL/NONFATAL: 0000
	Capabilities: [140 v1] Access Control Services
		ACSCap:	SrcValid+ TransBlk+ ReqRedir+ CmpltRedir+ UpstreamFwd- EgressCtrl- DirectTrans-
		ACSCtl:	SrcValid- TransBlk- ReqRedir- CmpltRedir- UpstreamFwd- EgressCtrl- DirectTrans-
	Capabilities: [220 v1] Secondary PCI Express
		LnkCtl3: LnkEquIntrruptEn- PerformEqu-
		LaneErrStat: 0
	Kernel driver in use: pcieport
00: 86 80 98 a2 07 04 10 00 f0 00 04 06 10 00 81 00
10: 00 00 00 00 00 00 00 00 00 04 04 00 f0 00 00 00
20: 00 df 80 df 41 d0 71 d0 00 00 00 00 00 00 00 00
30: 00 00 00 00 40 00 00 00 00 00 00 00 0b 01 12 00
40: 10 80 42 01 01 80 00 00 20 00 10 00 43 40 72 09
50: 40 0c 12 30 00 fd 64 00 00 00 40 01 00 00 00 00
60: 00 00 00 00 37 08 00 00 00 04 00 00 0e 00 00 00
70: 03 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
80: 05 90 01 00 00 30 e0 fe 20 00 00 00 00 00 00 00
90: 0d a0 00 00 62 14 69 7a 00 00 00 00 00 00 00 00
a0: 01 00 03 c8 00 00 00 00 00 00 00 00 00 00 00 00
b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d0: 11 10 00 07 42 18 00 00 08 00 9e 8b 00 00 00 00
e0: 00 b7 f3 00 3c 88 3c 88 16 80 12 00 00 00 00 00
f0: 50 01 00 00 00 03 00 40 b3 0f 00 08 00 c0 00 01

00:1f.0 ISA bridge: Intel Corporation 200 Series PCH LPC Controller (Z270)
	Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx-
	Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0
00: 86 80 c5 a2 07 00 00 02 00 00 01 06 00 00 80 00
10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 62 14 69 7a
30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
40: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
50: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
60: 00 00 00 00 d0 00 00 00 00 00 00 00 00 00 00 00
70: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
80: 70 00 0f 3f 01 02 fc 00 01 0a 3c 00 e1 03 0c 00
90: e1 02 1c 00 00 00 00 00 00 00 00 00 00 00 00 00
a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d0: 33 22 11 00 67 45 00 00 cf ff 00 00 80 00 00 00
e0: c1 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
f0: 00 00 00 00 00 00 00 00 b3 0f 00 08 00 00 00 00

00:1f.2 Memory controller: Intel Corporation 200 Series/Z370 Chipset Family Power Management Controller
	Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx-
	Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0
	Region 0: Memory at dfb44000 (32-bit, non-prefetchable) [size=16K]
00: 86 80 a1 a2 06 00 00 00 00 00 80 05 00 00 80 00
10: 00 40 b4 df 00 00 00 00 00 00 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 62 14 69 7a
30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
40: 01 18 00 00 80 01 00 00 00 00 00 fe 00 00 00 00
50: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
60: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
70: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
a0: f8 36 a0 d8 08 3a 06 00 00 46 00 00 00 00 00 01
b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
f0: 00 00 00 00 00 00 00 00 b3 0f 00 08 00 00 00 00

00:1f.3 Audio device: Intel Corporation 200 Series PCH HD Audio
	Subsystem: Micro-Star International Co., Ltd. [MSI] Device fa69
	Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 32, Cache Line Size: 64 bytes
	Interrupt: pin A routed to IRQ 139
	Region 0: Memory at dfb40000 (64-bit, non-prefetchable) [size=16K]
	Region 4: Memory at dfb20000 (64-bit, non-prefetchable) [size=64K]
	Capabilities: [50] Power Management version 3
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=55mA PME(D0-,D1-,D2-,D3hot+,D3cold+)
		Status: D3 NoSoftRst+ PME-Enable+ DSel=0 DScale=0 PME-
	Capabilities: [60] MSI: Enable+ Count=1/1 Maskable- 64bit+
		Address: 00000000fee07000  Data: 0022
	Kernel driver in use: snd_hda_intel
	Kernel modules: snd_hda_intel, snd_soc_avs
00: 86 80 f0 a2 06 04 10 00 00 00 03 04 10 20 00 00
10: 04 00 b4 df 00 00 00 00 00 00 00 00 00 00 00 00
20: 04 00 b2 df 00 00 00 00 00 00 00 00 62 14 69 fa
30: 00 00 00 00 50 00 00 00 00 00 00 00 0b 01 00 00
40: 00 00 00 00 00 00 00 00 ff 0d 3b 80 00 00 00 00
50: 01 60 43 c0 0b 01 00 00 00 00 00 00 00 00 00 00
60: 05 00 81 00 00 70 e0 fe 00 00 00 00 22 00 00 00
70: 10 00 91 00 00 00 00 10 00 20 10 00 00 00 00 00
80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c0: 08 06 02 00 00 70 80 04 00 0c a5 82 10 00 03 00
d0: 00 0c b5 02 10 00 03 00 00 00 00 00 00 00 00 00
e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
f0: 00 00 00 00 00 00 00 00 b3 0f 00 08 00 00 00 00

00:1f.4 SMBus: Intel Corporation 200 Series/Z370 Chipset Family SMBus Controller
	Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Control: I/O+ Mem+ BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx-
	Status: Cap- 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Interrupt: pin A routed to IRQ 16
	Region 0: Memory at dfb4a000 (64-bit, non-prefetchable) [size=256]
	Region 4: I/O ports at f000 [size=32]
	Kernel driver in use: i801_smbus
	Kernel modules: i2c_i801
00: 86 80 a3 a2 03 00 80 02 00 00 05 0c 00 00 00 00
10: 04 a0 b4 df 00 00 00 00 00 00 00 00 00 00 00 00
20: 01 f0 00 00 00 00 00 00 00 00 00 00 62 14 69 7a
30: 00 00 00 00 00 00 00 00 00 00 00 00 0b 01 00 00
40: 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
50: 01 04 00 00 00 01 00 00 00 00 00 00 00 00 00 00
60: 04 05 05 00 00 00 0a 0a 00 00 00 00 00 00 00 00
70: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
80: 24 00 04 00 00 00 00 00 00 00 00 00 00 00 00 00
90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
f0: 00 00 00 00 00 00 00 00 b3 0f 00 08 00 00 00 00

00:1f.6 Ethernet controller: Intel Corporation Ethernet Connection (2) I219-V
	Subsystem: Micro-Star International Co., Ltd. [MSI] Device 7a69
	Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0
	Interrupt: pin A routed to IRQ 137
	Region 0: Memory at dfb00000 (32-bit, non-prefetchable) [size=128K]
	Capabilities: [c8] Power Management version 3
		Flags: PMEClk- DSI+ D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
		Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=1 PME-
	Capabilities: [d0] MSI: Enable+ Count=1/1 Maskable- 64bit+
		Address: 00000000fee03000  Data: 0022
	Capabilities: [e0] PCI Advanced Features
		AFCap: TP+ FLR+
		AFCtrl: FLR-
		AFStatus: TP-
	Kernel driver in use: e1000e
	Kernel modules: e1000e
00: 86 80 b8 15 06 04 10 00 00 00 00 02 00 00 00 00
10: 00 00 b0 df 00 00 00 00 00 00 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 62 14 69 7a
30: 00 00 00 00 c8 00 00 00 00 00 00 00 0b 01 00 00
40: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
50: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
60: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
70: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
80: 28 00 00 00 08 00 00 00 00 00 00 00 00 00 00 00
90: 00 00 00 00 1f 00 00 00 00 00 00 00 40 02 18 40
a0: 00 00 00 00 01 00 00 00 03 10 03 10 00 00 00 00
b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c0: 00 00 00 00 00 00 00 00 01 d0 23 c8 08 20 00 00
d0: 05 e0 81 00 00 30 e0 fe 00 00 00 00 22 00 00 00
e0: 13 00 06 03 00 00 00 00 00 00 00 00 00 00 00 00
f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

01:00.0 VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI] Ellesmere [Radeon RX 470/480/570/570X/580/580X/590] (rev e7) (prog-if 00 [VGA controller])
	Subsystem: ASUSTeK Computer Inc. Device 0519
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0, Cache Line Size: 64 bytes
	Interrupt: pin A routed to IRQ 135
	Region 0: Memory at c0000000 (64-bit, prefetchable) [size=256M]
	Region 2: Memory at d0000000 (64-bit, prefetchable) [size=2M]
	Region 4: I/O ports at e000 [size=256]
	Region 5: Memory at dfa00000 (32-bit, non-prefetchable) [size=256K]
	Expansion ROM at 000c0000 [disabled] [size=128K]
	Capabilities: [48] Vendor Specific Information: Len=08 <?>
	Capabilities: [50] Power Management version 3
		Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=0mA PME(D0-,D1+,D2+,D3hot+,D3cold+)
		Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
	Capabilities: [58] Express (v2) Legacy Endpoint, IntMsgNum 0
		DevCap:	MaxPayload 256 bytes, PhantFunc 0, Latency L0s <4us, L1 unlimited
			ExtTag+ AttnBtn- AttnInd- PwrInd- RBE+ FLReset- TEE-IO-
		DevCtl:	CorrErr- NonFatalErr- FatalErr- UnsupReq-
			RlxdOrd+ ExtTag+ PhantFunc- AuxPwr- NoSnoop+
			MaxPayload 256 bytes, MaxReadReq 512 bytes
		DevSta:	CorrErr+ NonFatalErr- FatalErr- UnsupReq+ AuxPwr- TransPend-
		LnkCap:	Port #0, Speed 8GT/s, Width x16, ASPM L1, Exit Latency L1 <1us
			ClockPM+ Surprise- LLActRep- BwNot- ASPMOptComp+
		LnkCtl:	ASPM Disabled; RCB 64 bytes, LnkDisable- CommClk+
			ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt-
		LnkSta:	Speed 8GT/s, Width x16
			TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt-
		DevCap2: Completion Timeout: Not Supported, TimeoutDis- NROPrPrP- LTR+
			10BitTagComp- 10BitTagReq- OBFF Not Supported, ExtFmt+ EETLPPrefix+, MaxEETLPPrefixes 1
			EmergencyPowerReduction Not Supported, EmergencyPowerReductionInit-
			FRS-
			AtomicOpsCap: 32bit+ 64bit+ 128bitCAS-
		DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-
			AtomicOpsCtl: ReqEn+
			IDOReq- IDOCompl- LTR+ EmergencyPowerReductionReq-
			10BitTagReq- OBFF Disabled, EETLPPrefixBlk-
		LnkCap2: Supported Link Speeds: 2.5-8GT/s, Crosslink- Retimer- 2Retimers- DRS-
		LnkCtl2: Target Link Speed: 8GT/s, EnterCompliance- SpeedDis-
			Transmit Margin: Normal Operating Range, EnterModifiedCompliance- ComplianceSOS-
			Compliance Preset/De-emphasis: -6dB de-emphasis, 0dB preshoot
		LnkSta2: Current De-emphasis Level: -3.5dB, EqualizationComplete+ EqualizationPhase1+
			EqualizationPhase2+ EqualizationPhase3+ LinkEqualizationRequest-
			Retimer- 2Retimers- CrosslinkRes: unsupported
	Capabilities: [a0] MSI: Enable+ Count=1/1 Maskable- 64bit+
		Address: 00000000fee04000  Data: 0022
	Capabilities: [100 v1] Vendor Specific Information: ID=0001 Rev=1 Len=010 <?>
	Capabilities: [150 v2] Advanced Error Reporting
		UESta:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP-
			ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		UEMsk:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP-
			ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		UESvrt:	DLP+ SDES+ TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+ MalfTLP+
			ECRC- UnsupReq- ACSViol- UncorrIntErr+ BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		CESta:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+ CorrIntErr- HeaderOF-
		CEMsk:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+ CorrIntErr- HeaderOF-
		AERCap:	First Error Pointer: 00, ECRCGenCap+ ECRCGenEn- ECRCChkCap+ ECRCChkEn-
			MultHdrRecCap- MultHdrRecEn- TLPPfxPres- HdrLogCap-
		HeaderLog: 00000000 00000000 00000000 00000000
	Capabilities: [200 v1] Physical Resizable BAR
		BAR 0: current size: 256MB, supported: 256MB 512MB 1GB 2GB 4GB 8GB
	Capabilities: [270 v1] Secondary PCI Express
		LnkCtl3: LnkEquIntrruptEn- PerformEqu-
		LaneErrStat: 0
	Capabilities: [2b0 v1] Address Translation Service (ATS)
		ATSCap:	Invalidate Queue Depth: 00
		ATSCtl:	Enable-, Smallest Translation Unit: 00
	Capabilities: [2c0 v1] Page Request Interface (PRI)
		PRICtl: Enable- Reset-
		PRISta: RF- UPRGI- Stopped+ PASID-
		Page Request Capacity: 00000020, Page Request Allocation: 00000000
	Capabilities: [2d0 v1] Process Address Space ID (PASID)
		PASIDCap: Exec+ Priv+, Max PASID Width: 10
		PASIDCtl: Enable- Exec- Priv-
	Capabilities: [320 v1] Latency Tolerance Reporting
		Max snoop latency: 71680ns
		Max no snoop latency: 71680ns
	Capabilities: [328 v1] Alternative Routing-ID Interpretation (ARI)
		ARICap:	MFVC- ACS-, Next Function: 1
		ARICtl:	MFVC- ACS-, Function Group: 0
	Capabilities: [370 v1] L1 PM Substates
		L1SubCap: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+ L1_PM_Substates+
			 PortCommonModeRestoreTime=0us PortTPowerOnTime=170us
		L1SubCtl1: PCI-PM_L1.2- PCI-PM_L1.1- ASPM_L1.2- ASPM_L1.1-
			  T_CommonMode=0us LTR1.2_Threshold=0ns
		L1SubCtl2: T_PwrOn=10us
	Kernel driver in use: amdgpu
	Kernel modules: amdgpu
00: 02 10 df 67 07 04 10 00 e7 00 00 03 10 00 80 00
10: 0c 00 00 c0 00 00 00 00 0c 00 00 d0 00 00 00 00
20: 01 e0 00 00 00 00 a0 df 00 00 00 00 43 10 19 05
30: 00 00 a4 df 48 00 00 00 00 00 00 00 0b 01 00 00
40: 00 00 00 00 00 00 00 00 09 50 08 00 43 10 19 05
50: 01 58 03 f6 08 00 00 00 10 a0 12 00 a1 8f 2c 01
60: 30 29 09 00 03 09 44 00 40 00 03 11 00 00 00 00
70: 00 00 00 00 00 00 00 00 00 00 00 00 80 09 70 00
80: 40 04 00 00 0e 00 00 00 03 00 1f 00 00 00 00 00
90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
a0: 05 00 81 00 00 40 e0 fe 00 00 00 00 22 00 00 00
b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

01:00.1 Audio device: Advanced Micro Devices, Inc. [AMD/ATI] Ellesmere HDMI Audio [Radeon RX 470/480 / 570/580/590]
	Subsystem: ASUSTeK Computer Inc. Device aaf0
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0, Cache Line Size: 64 bytes
	Interrupt: pin B routed to IRQ 138
	Region 0: Memory at dfa60000 (64-bit, non-prefetchable) [size=16K]
	Capabilities: [48] Vendor Specific Information: Len=08 <?>
	Capabilities: [50] Power Management version 3
		Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
		Status: D3 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
	Capabilities: [58] Express (v2) Legacy Endpoint, IntMsgNum 0
		DevCap:	MaxPayload 256 bytes, PhantFunc 0, Latency L0s <4us, L1 unlimited
			ExtTag+ AttnBtn- AttnInd- PwrInd- RBE+ FLReset- TEE-IO-
		DevCtl:	CorrErr- NonFatalErr- FatalErr- UnsupReq-
			RlxdOrd+ ExtTag+ PhantFunc- AuxPwr- NoSnoop+
			MaxPayload 256 bytes, MaxReadReq 512 bytes
		DevSta:	CorrErr+ NonFatalErr- FatalErr- UnsupReq+ AuxPwr- TransPend-
		LnkCap:	Port #0, Speed 8GT/s, Width x16, ASPM L1, Exit Latency L1 <1us
			ClockPM+ Surprise- LLActRep- BwNot- ASPMOptComp+
		LnkCtl:	ASPM Disabled; RCB 64 bytes, LnkDisable- CommClk+
			ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt-
		LnkSta:	Speed 8GT/s, Width x16
			TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt-
		DevCap2: Completion Timeout: Not Supported, TimeoutDis- NROPrPrP- LTR+
			10BitTagComp- 10BitTagReq- OBFF Not Supported, ExtFmt+ EETLPPrefix+, MaxEETLPPrefixes 1
			EmergencyPowerReduction Not Supported, EmergencyPowerReductionInit-
			FRS-
			AtomicOpsCap: 32bit+ 64bit+ 128bitCAS-
		DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-
			AtomicOpsCtl: ReqEn-
			IDOReq- IDOCompl- LTR- EmergencyPowerReductionReq-
			10BitTagReq- OBFF Disabled, EETLPPrefixBlk-
		LnkSta2: Current De-emphasis Level: -3.5dB, EqualizationComplete- EqualizationPhase1-
			EqualizationPhase2- EqualizationPhase3- LinkEqualizationRequest-
			Retimer- 2Retimers- CrosslinkRes: unsupported
	Capabilities: [a0] MSI: Enable+ Count=1/1 Maskable- 64bit+
		Address: 00000000fee05000  Data: 0022
	Capabilities: [100 v1] Vendor Specific Information: ID=0001 Rev=1 Len=010 <?>
	Capabilities: [150 v2] Advanced Error Reporting
		UESta:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP-
			ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		UEMsk:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP-
			ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		UESvrt:	DLP+ SDES+ TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+ MalfTLP+
			ECRC- UnsupReq- ACSViol- UncorrIntErr+ BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		CESta:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+ CorrIntErr- HeaderOF-
		CEMsk:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+ CorrIntErr- HeaderOF-
		AERCap:	First Error Pointer: 00, ECRCGenCap+ ECRCGenEn- ECRCChkCap+ ECRCChkEn-
			MultHdrRecCap- MultHdrRecEn- TLPPfxPres- HdrLogCap-
		HeaderLog: 00000000 00000000 00000000 00000000
	Capabilities: [328 v1] Alternative Routing-ID Interpretation (ARI)
		ARICap:	MFVC- ACS-, Next Function: 0
		ARICtl:	MFVC- ACS-, Function Group: 0
	Kernel driver in use: snd_hda_intel
	Kernel modules: snd_hda_intel
00: 02 10 f0 aa 07 04 10 00 00 00 03 04 10 00 80 00
10: 04 00 a6 df 00 00 00 00 00 00 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 43 10 f0 aa
30: 00 00 00 00 48 00 00 00 00 00 00 00 0a 02 00 00
40: 00 00 00 00 00 00 00 00 09 50 08 00 43 10 f0 aa
50: 01 58 03 06 0b 00 00 00 10 a0 12 00 a1 8f 2c 01
60: 30 29 09 00 03 09 44 00 40 00 03 11 00 00 00 00
70: 00 00 00 00 00 00 00 00 00 00 00 00 80 09 70 00
80: 00 00 00 00 0e 00 00 00 00 00 01 00 00 00 00 00
90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
a0: 05 00 81 00 00 50 e0 fe 00 00 00 00 22 00 00 00
b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

03:00.0 Non-Volatile memory controller: Intel Corporation SSD 660P Series (rev 03) (prog-if 02 [NVM Express])
	Subsystem: Intel Corporation Device 390d
	Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0, Cache Line Size: 64 bytes
	Interrupt: pin A routed to IRQ 16
	Region 0: Memory at df900000 (64-bit, non-prefetchable) [size=16K]
	Capabilities: [40] Power Management version 3
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
		Status: D0 NoSoftRst- PME-Enable- DSel=0 DScale=0 PME-
	Capabilities: [50] MSI: Enable- Count=1/8 Maskable+ 64bit+
		Address: 0000000000000000  Data: 0000
		Masking: 00000000  Pending: 00000000
	Capabilities: [70] Express (v2) Endpoint, IntMsgNum 0
		DevCap:	MaxPayload 128 bytes, PhantFunc 0, Latency L0s unlimited, L1 unlimited
			ExtTag- AttnBtn- AttnInd- PwrInd- RBE+ FLReset+ SlotPowerLimit 25W TEE-IO-
		DevCtl:	CorrErr- NonFatalErr- FatalErr- UnsupReq-
			RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop- FLReset-
			MaxPayload 128 bytes, MaxReadReq 512 bytes
		DevSta:	CorrErr+ NonFatalErr- FatalErr- UnsupReq+ AuxPwr+ TransPend-
		LnkCap:	Port #0, Speed 8GT/s, Width x4, ASPM L1, Exit Latency L1 <8us
			ClockPM+ Surprise- LLActRep- BwNot- ASPMOptComp+
		LnkCtl:	ASPM Disabled; RCB 64 bytes, LnkDisable- CommClk+
			ExtSynch- ClockPM+ AutWidDis- BWInt- AutBWInt-
		LnkSta:	Speed 8GT/s, Width x4
			TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt-
		DevCap2: Completion Timeout: Range ABCD, TimeoutDis+ NROPrPrP- LTR+
			10BitTagComp- 10BitTagReq- OBFF Not Supported, ExtFmt- EETLPPrefix-
			EmergencyPowerReduction Not Supported, EmergencyPowerReductionInit-
			FRS- TPHComp- ExtTPHComp-
			AtomicOpsCap: 32bit- 64bit- 128bitCAS-
		DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-
			AtomicOpsCtl: ReqEn-
			IDOReq- IDOCompl- LTR+ EmergencyPowerReductionReq-
			10BitTagReq- OBFF Disabled, EETLPPrefixBlk-
		LnkCap2: Supported Link Speeds: 2.5-8GT/s, Crosslink- Retimer- 2Retimers- DRS-
		LnkCtl2: Target Link Speed: 8GT/s, EnterCompliance- SpeedDis-
			Transmit Margin: Normal Operating Range, EnterModifiedCompliance- ComplianceSOS-
			Compliance Preset/De-emphasis: -6dB de-emphasis, 0dB preshoot
		LnkSta2: Current De-emphasis Level: -6dB, EqualizationComplete+ EqualizationPhase1+
			EqualizationPhase2+ EqualizationPhase3+ LinkEqualizationRequest-
			Retimer- 2Retimers- CrosslinkRes: unsupported
	Capabilities: [b0] MSI-X: Enable+ Count=16 Masked-
		Vector table: BAR=0 offset=00002000
		PBA: BAR=0 offset=00002100
	Capabilities: [100 v2] Advanced Error Reporting
		UESta:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP-
			ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		UEMsk:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP-
			ECRC- UnsupReq- ACSViol- UncorrIntErr+ BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		UESvrt:	DLP+ SDES+ TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+ MalfTLP+
			ECRC- UnsupReq- ACSViol- UncorrIntErr+ BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		CESta:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+ CorrIntErr- HeaderOF-
		CEMsk:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+ CorrIntErr+ HeaderOF+
		AERCap:	First Error Pointer: 00, ECRCGenCap+ ECRCGenEn- ECRCChkCap+ ECRCChkEn-
			MultHdrRecCap- MultHdrRecEn- TLPPfxPres- HdrLogCap-
		HeaderLog: 00000000 00000000 00000000 00000000
	Capabilities: [158 v1] Secondary PCI Express
		LnkCtl3: LnkEquIntrruptEn- PerformEqu-
		LaneErrStat: 0
	Capabilities: [178 v1] Latency Tolerance Reporting
		Max snoop latency: 3145728ns
		Max no snoop latency: 3145728ns
	Capabilities: [180 v1] L1 PM Substates
		L1SubCap: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+ L1_PM_Substates+
			 PortCommonModeRestoreTime=10us PortTPowerOnTime=10us
		L1SubCtl1: PCI-PM_L1.2- PCI-PM_L1.1- ASPM_L1.2- ASPM_L1.1-
			  T_CommonMode=0us LTR1.2_Threshold=0ns
		L1SubCtl2: T_PwrOn=44us
	Kernel driver in use: nvme
	Kernel modules: nvme
00: 86 80 a8 f1 06 04 10 00 03 02 08 01 10 00 00 00
10: 04 00 90 df 00 00 00 00 00 00 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 0d 39
30: 00 00 00 00 40 00 00 00 00 00 00 00 0b 01 00 00
40: 01 50 03 00 00 00 00 00 00 00 00 00 00 00 00 00
50: 05 70 86 01 00 00 00 00 00 00 00 00 00 00 00 00
60: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
70: 10 b0 02 00 c0 8f e8 17 10 20 19 00 43 c8 45 00
80: 40 01 43 10 00 00 00 00 00 00 00 00 00 00 00 00
90: 00 00 00 00 1f 08 00 00 00 04 00 00 0e 00 00 00
a0: 03 00 1e 00 00 00 00 00 00 00 00 00 00 00 00 00
b0: 11 00 0f 80 00 20 00 00 00 21 00 00 00 00 00 00
c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d0: 03 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

04:00.0 Network controller: Broadcom Inc. and subsidiaries Device 43c3 (rev 04)
	Subsystem: ASUSTeK Computer Inc. Device 86fb
	Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0, Cache Line Size: 64 bytes
	Interrupt: pin A routed to IRQ 140
	Region 0: Memory at df800000 (64-bit, non-prefetchable) [size=32K]
	Region 2: Memory at df000000 (64-bit, non-prefetchable) [size=8M]
	Region 4: Memory at d0400000 (64-bit, prefetchable) [size=4M]
	Capabilities: [48] Power Management version 3
		Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold+)
		Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=2 PME-
	Capabilities: [58] MSI: Enable+ Count=1/32 Maskable- 64bit+
		Address: 00000000fee00000  Data: 0022
	Capabilities: [68] Vendor Specific Information: Len=44 <?>
	Capabilities: [ac] Express (v2) Endpoint, IntMsgNum 0
		DevCap:	MaxPayload 256 bytes, PhantFunc 0, Latency L0s <4us, L1 unlimited
			ExtTag- AttnBtn- AttnInd- PwrInd- RBE+ FLReset- SlotPowerLimit 25W TEE-IO-
		DevCtl:	CorrErr- NonFatalErr- FatalErr- UnsupReq-
			RlxdOrd+ ExtTag- PhantFunc- AuxPwr+ NoSnoop+
			MaxPayload 256 bytes, MaxReadReq 512 bytes
		DevSta:	CorrErr+ NonFatalErr- FatalErr- UnsupReq+ AuxPwr+ TransPend-
		LnkCap:	Port #0, Speed 5GT/s, Width x1, ASPM L0s L1, Exit Latency L0s <2us, L1 <32us
			ClockPM+ Surprise- LLActRep- BwNot- ASPMOptComp+
		LnkCtl:	ASPM Disabled; RCB 64 bytes, LnkDisable- CommClk+
			ExtSynch- ClockPM+ AutWidDis- BWInt- AutBWInt-
		LnkSta:	Speed 5GT/s, Width x1
			TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt-
		DevCap2: Completion Timeout: Range ABCD, TimeoutDis+ NROPrPrP- LTR+
			10BitTagComp- 10BitTagReq- OBFF Via WAKE#, ExtFmt- EETLPPrefix-
			EmergencyPowerReduction Not Supported, EmergencyPowerReductionInit-
			FRS- TPHComp- ExtTPHComp-
			AtomicOpsCap: 32bit- 64bit- 128bitCAS-
		DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-
			AtomicOpsCtl: ReqEn-
			IDOReq- IDOCompl- LTR+ EmergencyPowerReductionReq-
			10BitTagReq- OBFF Disabled, EETLPPrefixBlk-
		LnkCap2: Supported Link Speeds: 2.5-5GT/s, Crosslink- Retimer- 2Retimers- DRS-
		LnkCtl2: Target Link Speed: 2.5GT/s, EnterCompliance- SpeedDis-
			Transmit Margin: Normal Operating Range, EnterModifiedCompliance- ComplianceSOS-
			Compliance Preset/De-emphasis: -6dB de-emphasis, 0dB preshoot
		LnkSta2: Current De-emphasis Level: -6dB, EqualizationComplete- EqualizationPhase1-
			EqualizationPhase2- EqualizationPhase3- LinkEqualizationRequest-
			Retimer- 2Retimers- CrosslinkRes: unsupported
	Capabilities: [100 v1] Advanced Error Reporting
		UESta:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP-
			ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		UEMsk:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP-
			ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		UESvrt:	DLP+ SDES+ TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+ MalfTLP+
			ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr-
			PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked-
		CESta:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+ CorrIntErr- HeaderOF-
		CEMsk:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+ CorrIntErr- HeaderOF-
		AERCap:	First Error Pointer: 00, ECRCGenCap+ ECRCGenEn- ECRCChkCap+ ECRCChkEn-
			MultHdrRecCap- MultHdrRecEn- TLPPfxPres- HdrLogCap-
		HeaderLog: 00000000 00000000 00000000 00000000
	Capabilities: [13c v1] Device Serial Number 00-00-00-ff-ff-00-00-00
	Capabilities: [150 v1] Power Budgeting <?>
	Capabilities: [160 v1] Virtual Channel
		Caps:	LPEVC=0 RefClk=100ns PATEntryBits=1
		Arb:	Fixed- WRR32- WRR64- WRR128-
		Ctrl:	ArbSelect=Fixed
		Status:	InProgress-
		VC0:	Caps:	PATOffset=00 MaxTimeSlots=1 RejSnoopTrans-
			Arb:	Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256-
			Ctrl:	Enable+ ID=0 ArbSelect=Fixed TC/VC=ff
			Status:	NegoPending- InProgress-
	Capabilities: [1b0 v1] Latency Tolerance Reporting
		Max snoop latency: 3145728ns
		Max no snoop latency: 3145728ns
	Capabilities: [220 v1] Physical Resizable BAR
		BAR 2: current size: 8MB, supported: 1MB 2MB 4MB 8MB
	Kernel driver in use: brcmfmac
	Kernel modules: brcmfmac
00: e4 14 c3 43 06 04 10 00 04 00 80 02 10 00 00 00
10: 04 00 80 df 00 00 00 00 04 00 00 df 00 00 00 00
20: 0c 00 40 d0 00 00 00 00 00 00 00 00 43 10 fb 86
30: 00 00 00 00 48 00 00 00 00 00 00 00 0b 01 00 00
40: 00 00 00 00 00 00 00 00 01 58 03 fe 08 40 00 00
50: 00 00 00 00 00 00 00 00 05 68 8b 00 00 00 e0 fe
60: 00 00 00 00 22 00 00 00 09 ac 44 00 16 01 00 00
70: 00 30 10 18 00 00 00 00 00 00 00 00 00 00 00 00
80: 00 30 00 18 00 00 00 00 00 03 00 00 00 00 00 00
90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
a0: 00 00 00 00 00 00 00 00 40 00 0b 00 10 00 02 00
b0: 81 8f e8 07 30 2c 19 00 12 dc 46 00 40 01 12 10
c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d0: 1f 08 08 00 00 04 00 00 06 00 00 00 01 00 00 00
e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

chboishabba avatar Mar 13 '25 05:03 chboishabba