LLVM  13.0.0git
Classes | Macros | Typedefs | Enumerations
AMDKernelCodeT.h File Reference
#include <cstdint>
Include dependency graph for AMDKernelCodeT.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  hsa_dim3_s
 
struct  hsa_ext_control_directives_s
 The hsa_ext_control_directives_t specifies the values for the HSAIL control directives. More...
 
struct  amd_kernel_code_t
 AMD Kernel Code Object (amd_kernel_code_t). More...
 

Macros

#define AMD_HSA_BITS_SET(dst, mask, val)
 
#define AMD_HSA_BITS_GET(src, mask)   ((src & mask) >> mask ## _SHIFT) \
 

Typedefs

typedef uint8_t hsa_powertwo8_t
 
typedef uint32_t hsa_ext_code_kind_t
 
typedef uint8_t hsa_ext_brig_profile8_t
 
typedef uint8_t hsa_ext_brig_machine_model8_t
 
typedef uint64_t hsa_ext_control_directive_present64_t
 
typedef uint16_t hsa_ext_exception_kind16_t
 
typedef uint32_t hsa_ext_code_kind32_t
 
typedef struct hsa_dim3_s hsa_dim3_t
 
typedef uint32_t amd_code_version32_t
 The version of the amd_*_code_t struct. More...
 
typedef uint64_t amd_compute_pgm_resource_register64_t
 Shader program settings for CS. More...
 
typedef uint32_t amd_code_property32_t
 Every amd_*_code_t has the following properties, which are composed of a number of bit fields. More...
 
typedef struct hsa_ext_control_directives_s hsa_ext_control_directives_t
 The hsa_ext_control_directives_t specifies the values for the HSAIL control directives. More...
 

Enumerations

enum  amd_code_version_t { AMD_CODE_VERSION_MAJOR = 0, AMD_CODE_VERSION_MINOR = 1 }
 
enum  amd_element_byte_size_t { AMD_ELEMENT_2_BYTES = 0, AMD_ELEMENT_4_BYTES = 1, AMD_ELEMENT_8_BYTES = 2, AMD_ELEMENT_16_BYTES = 3 }
 The values used to define the number of bytes to use for the swizzle element size. More...
 
enum  amd_code_property_mask_t {
  AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT = 0, AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH = 1, AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT, AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT = 1,
  AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH = 1, AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT, AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT = 2, AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH = 1,
  AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT, AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT = 3, AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH = 1, AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
  AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT = 4, AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH = 1, AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT, AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT = 5,
  AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH = 1, AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT, AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT = 6, AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH = 1,
  AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT, AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT = 7, AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH = 1, AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT,
  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT = 8, AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH = 1, AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT, AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT = 9,
  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1, AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT, AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT = 10, AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH = 1,
  AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32 = ((1 << AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT, AMD_CODE_PROPERTY_RESERVED1_SHIFT = 11, AMD_CODE_PROPERTY_RESERVED1_WIDTH = 5, AMD_CODE_PROPERTY_RESERVED1 = ((1 << AMD_CODE_PROPERTY_RESERVED1_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED1_SHIFT,
  AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT = 16, AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH = 1, AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS = ((1 << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 17,
  AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE = ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT, AMD_CODE_PROPERTY_IS_PTR64_SHIFT = 19, AMD_CODE_PROPERTY_IS_PTR64_WIDTH = 1,
  AMD_CODE_PROPERTY_IS_PTR64 = ((1 << AMD_CODE_PROPERTY_IS_PTR64_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_PTR64_SHIFT, AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT = 20, AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH = 1, AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK = ((1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT,
  AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 21, AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1, AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT, AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT = 22,
  AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH = 1, AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT, AMD_CODE_PROPERTY_RESERVED2_SHIFT = 23, AMD_CODE_PROPERTY_RESERVED2_WIDTH = 9,
  AMD_CODE_PROPERTY_RESERVED2 = ((1 << AMD_CODE_PROPERTY_RESERVED2_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED2_SHIFT
}
 

Macro Definition Documentation

◆ AMD_HSA_BITS_GET

#define AMD_HSA_BITS_GET (   src,
  mask 
)    ((src & mask) >> mask ## _SHIFT) \

Definition at line 48 of file AMDKernelCodeT.h.

◆ AMD_HSA_BITS_SET

#define AMD_HSA_BITS_SET (   dst,
  mask,
  val 
)
Value:
dst &= (~(1 << mask ## _SHIFT) & ~mask); \
dst |= (((val) << mask ## _SHIFT) & mask)

Definition at line 43 of file AMDKernelCodeT.h.

Typedef Documentation

◆ amd_code_property32_t

Every amd_*_code_t has the following properties, which are composed of a number of bit fields.

Every bit field has a mask (AMD_CODE_PROPERTY_*), bit width (AMD_CODE_PROPERTY_*_WIDTH, and bit shift amount (AMD_CODE_PROPERTY_*_SHIFT) for convenient access. Unused bits must be 0.

(Note that bit fields cannot be used as their layout is implementation defined in the C standard and so cannot be used to specify an ABI)

Definition at line 72 of file AMDKernelCodeT.h.

◆ amd_code_version32_t

The version of the amd_*_code_t struct.

Minor versions must be backward compatible.

Definition at line 36 of file AMDKernelCodeT.h.

◆ amd_compute_pgm_resource_register64_t

Shader program settings for CS.

Contains COMPUTE_PGM_RSRC1 and COMPUTE_PGM_RSRC2 registers.

Definition at line 62 of file AMDKernelCodeT.h.

◆ hsa_dim3_t

typedef struct hsa_dim3_s hsa_dim3_t

◆ hsa_ext_brig_machine_model8_t

Definition at line 23 of file AMDKernelCodeT.h.

◆ hsa_ext_brig_profile8_t

typedef uint8_t hsa_ext_brig_profile8_t

Definition at line 22 of file AMDKernelCodeT.h.

◆ hsa_ext_code_kind32_t

Definition at line 26 of file AMDKernelCodeT.h.

◆ hsa_ext_code_kind_t

Definition at line 21 of file AMDKernelCodeT.h.

◆ hsa_ext_control_directive_present64_t

Definition at line 24 of file AMDKernelCodeT.h.

◆ hsa_ext_control_directives_t

The hsa_ext_control_directives_t specifies the values for the HSAIL control directives.

These control how the finalizer generates code. This struct is used both as an argument to hsaFinalizeKernel to specify values for the control directives, and is used in HsaKernelCode to record the values of the control directives that the finalize used when generating the code which either came from the finalizer argument or explicit HSAIL control directives. See the definition of the control directives in HSA Programmer's Reference Manual which also defines how the values specified as finalizer arguments have to agree with the control directives in the HSAIL code.

◆ hsa_ext_exception_kind16_t

Definition at line 25 of file AMDKernelCodeT.h.

◆ hsa_powertwo8_t

typedef uint8_t hsa_powertwo8_t

Definition at line 20 of file AMDKernelCodeT.h.

Enumeration Type Documentation

◆ amd_code_property_mask_t

Enumerator
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT 

Enable the setup of the SGPR user data registers (AMD_CODE_PROPERTY_ENABLE_SGPR_*), see documentation of amd_kernel_code_t for initial register state.

The total number of SGPRuser data registers requested must not exceed 16. Any requests beyond 16 will be ignored.

Used to set COMPUTE_PGM_RSRC2.USER_SGPR (set to total count of SGPR user data registers enabled up to 16).

AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH 
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER 
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT 
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH 
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR 
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT 
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH 
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR 
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT 
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH 
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR 
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT 
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH 
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID 
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT 
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH 
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT 
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT 
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH 
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE 
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT 
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH 
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X 
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT 
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH 
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y 
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT 
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH 
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z 
AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT 
AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH 
AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32 
AMD_CODE_PROPERTY_RESERVED1_SHIFT 
AMD_CODE_PROPERTY_RESERVED1_WIDTH 
AMD_CODE_PROPERTY_RESERVED1 
AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT 

Control wave ID base counter for GDS ordered-append.

Used to set COMPUTE_DISPATCH_INITIATOR.ORDERED_APPEND_ENBL. (Not sure if ORDERED_APPEND_MODE also needs to be settable)

AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH 
AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS 
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT 

The interleave (swizzle) element size in bytes required by the code for private memory.

This must be 2, 4, 8 or 16. This value is provided to the finalizer when it is invoked and is recorded here. The hardware will interleave the memory requests of each lane of a wavefront by this element size to ensure each work-item gets a distinct memory memory location. Therefore, the finalizer ensures that all load and store operations done to private memory do not exceed this size. For example, if the element size is 4 (32-bits or dword) and a 64-bit value must be loaded, the finalizer will generate two 32-bit loads. This ensures that the interleaving will get the work-item specific dword for both halves of the 64-bit value. If it just did a 64-bit load then it would get one dword which belonged to its own work-item, but the second dword would belong to the adjacent lane work-item since the interleaving is in dwords.

The value used must match the value that the runtime configures the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This is generally DWORD.

uSE VALUES FROM THE AMD_ELEMENT_BYTE_SIZE_T ENUM.

AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH 
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE 
AMD_CODE_PROPERTY_IS_PTR64_SHIFT 

Are global memory addresses 64 bits.

Must match amd_kernel_code_t.hsail_machine_model == HSA_MACHINE_LARGE. Must also match SH_MEM_CONFIG.PTR32 (GFX6 (SI)/GFX7 (CI)), SH_MEM_CONFIG.ADDRESS_MODE (GFX8 (VI)+).

AMD_CODE_PROPERTY_IS_PTR64_WIDTH 
AMD_CODE_PROPERTY_IS_PTR64 
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT 

Indicate if the generated ISA is using a dynamically sized call stack.

This can happen if calls are implemented using a call stack and recursion, alloca or calls to indirect functions are present. In these cases the Finalizer cannot compute the total private segment size at compile time. In this case the workitem_private_segment_byte_size only specifies the statically know private segment size, and additional space must be added for the call stack.

AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH 
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK 
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT 

Indicate if code generated has support for debugging.

AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH 
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED 
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT 
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH 
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED 
AMD_CODE_PROPERTY_RESERVED2_SHIFT 
AMD_CODE_PROPERTY_RESERVED2_WIDTH 
AMD_CODE_PROPERTY_RESERVED2 

Definition at line 73 of file AMDKernelCodeT.h.

◆ amd_code_version_t

Enumerator
AMD_CODE_VERSION_MAJOR 
AMD_CODE_VERSION_MINOR 

Definition at line 37 of file AMDKernelCodeT.h.

◆ amd_element_byte_size_t

The values used to define the number of bytes to use for the swizzle element size.

Enumerator
AMD_ELEMENT_2_BYTES 
AMD_ELEMENT_4_BYTES 
AMD_ELEMENT_8_BYTES 
AMD_ELEMENT_16_BYTES 

Definition at line 53 of file AMDKernelCodeT.h.

val
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 the input will be treated as an leaving the upper bits uninitialised For i64 store i32 val
Definition: README.txt:15