Fix error: '__builtin_amdgcn_lerp' needs target feature cube-insts. Copies files from: https://github.com/ROCm/llvm-project/blame/amd-staging/amd/device-libs/ockl/src/image.cl https://github.com/ROCm/llvm-project/blame/amd-staging/amd/device-libs/ockl/src/media.cl --- a/ockl/src/image.cl +++ b/ockl/src/image.cl @@ -13,6 +13,7 @@ #define EII() __oclc_ISA_version != 9010 #define RATTR __attribute__((pure)) +#define CRATTR __attribute__((pure, target("cube-insts"))) #define ERATTR __attribute__((pure, target("extended-image-insts"))) #define WATTR #define GATTR __attribute__((const)) @@ -510,14 +511,14 @@ OCKL_MANGLE_T(image_load,3D)(TSHARP i, int4 c) return my_image_load_3d_v4f32_i32(c.x, c.y, c.z, LOAD_TSHARP(i)); } -RATTR float4 -OCKL_MANGLE_T(image_load,CM)(TSHARP i, int2 c, int f) +CRATTR float4 +OCKL_MANGLE_T(image_load, CM)(TSHARP i, int2 c, int f) { return my_image_load_cube_v4f32_i32(c.x, c.y, f, LOAD_TSHARP(i)); } -RATTR float4 -OCKL_MANGLE_T(image_load,CMa)(TSHARP i, int4 c, int f) +CRATTR float4 +OCKL_MANGLE_T(image_load, CMa)(TSHARP i, int4 c, int f) { f = LS_ARRAY_FACE(c.z, f); return my_image_load_cube_v4f32_i32(c.x, c.y, f, LOAD_TSHARP(i)); @@ -565,14 +566,14 @@ OCKL_MANGLE_T(image_load_lod,3D)(TSHARP i, int4 c, int l) return my_image_load_mip_3d_v4f32_i32(c.x, c.y, c.z, l, LOAD_TSHARP(i)); } -RATTR float4 -OCKL_MANGLE_T(image_load_lod,CM)(TSHARP i, int2 c, int f, int l) +CRATTR float4 +OCKL_MANGLE_T(image_load_lod, CM)(TSHARP i, int2 c, int f, int l) { return my_image_load_mip_cube_v4f32_i32(c.x, c.y, f, l, LOAD_TSHARP(i)); } -RATTR float4 -OCKL_MANGLE_T(image_load_lod,CMa)(TSHARP i, int4 c, int f, int l) +CRATTR float4 +OCKL_MANGLE_T(image_load_lod, CMa)(TSHARP i, int4 c, int f, int l) { f = LS_ARRAY_FACE(c.z, f); return my_image_load_mip_cube_v4f32_i32(c.x, c.y, f, l, LOAD_TSHARP(i)); @@ -614,14 +615,14 @@ OCKL_MANGLE_T(image_loadh,3D)(TSHARP i, int4 c) return my_image_load_3d_v4f16_i32(c.x, c.y, c.z, LOAD_TSHARP(i)); } -RATTR half4 -OCKL_MANGLE_T(image_loadh,CM)(TSHARP i, int2 c, int f) +CRATTR half4 +OCKL_MANGLE_T(image_loadh, CM)(TSHARP i, int2 c, int f) { return my_image_load_cube_v4f16_i32(c.x, c.y, f, LOAD_TSHARP(i)); } -RATTR half4 -OCKL_MANGLE_T(image_loadh,CMa)(TSHARP i, int4 c, int f) +CRATTR half4 +OCKL_MANGLE_T(image_loadh, CMa)(TSHARP i, int4 c, int f) { f = LS_ARRAY_FACE(c.z, f); return my_image_load_cube_v4f16_i32(c.x, c.y, f, LOAD_TSHARP(i)); @@ -657,14 +658,14 @@ OCKL_MANGLE_T(image_loadh_lod,3D)(TSHARP i, int4 c, int l) return my_image_load_mip_3d_v4f16_i32(c.x, c.y, c.z, l, LOAD_TSHARP(i)); } -RATTR half4 -OCKL_MANGLE_T(image_loadh_lod,CM)(TSHARP i, int2 c, int f, int l) +CRATTR half4 +OCKL_MANGLE_T(image_loadh_lod, CM)(TSHARP i, int2 c, int f, int l) { return my_image_load_mip_cube_v4f16_i32(c.x, c.y, f, l, LOAD_TSHARP(i)); } -RATTR half4 -OCKL_MANGLE_T(image_loadh_lod,CMa)(TSHARP i, int4 c, int f, int l) +CRATTR half4 +OCKL_MANGLE_T(image_loadh_lod, CMa)(TSHARP i, int4 c, int f, int l) { f = LS_ARRAY_FACE(c.z, f); return my_image_load_mip_cube_v4f16_i32(c.x, c.y, f, l, LOAD_TSHARP(i)); @@ -950,8 +951,8 @@ OCKL_MANGLE_T(image_sample,3D)(TSHARP i, SSHARP s, float4 c) return my_image_sample_3d_v4f32_f32(c.x, c.y, c.z, LOAD_TSHARP(i), LOAD_SSHARP(s)); } -RATTR float4 -OCKL_MANGLE_T(image_sample,CM)(TSHARP i, SSHARP s, float4 c) +CRATTR float4 +OCKL_MANGLE_T(image_sample, CM)(TSHARP i, SSHARP s, float4 c) { CUBE_PREP(c); if (EII()) @@ -960,8 +961,8 @@ OCKL_MANGLE_T(image_sample,CM)(TSHARP i, SSHARP s, float4 c) return my_image_sample_cube_v4f32_f32(c.x, c.y, c.z, LOAD_TSHARP(i), LOAD_SSHARP(s)); } -RATTR float4 -OCKL_MANGLE_T(image_sample,CMa)(TSHARP i, SSHARP s, float4 c) +CRATTR float4 +OCKL_MANGLE_T(image_sample, CMa)(TSHARP i, SSHARP s, float4 c) { CUBE_PREP(c); c.z = SAMPLE_ARRAY_FACE(c.w, c.z); @@ -1068,15 +1069,15 @@ OCKL_MANGLE_T(image_sample_lod,3D)(TSHARP i, SSHARP s, float4 c, float l) return my_image_sample_l_3d_v4f32_f32(c.x, c.y, c.z, l, LOAD_TSHARP(i), LOAD_SSHARP(s)); } -RATTR float4 -OCKL_MANGLE_T(image_sample_lod,CM)(TSHARP i, SSHARP s, float4 c, float l) +CRATTR float4 +OCKL_MANGLE_T(image_sample_lod, CM)(TSHARP i, SSHARP s, float4 c, float l) { CUBE_PREP(c); return my_image_sample_l_cube_v4f32_f32(c.x, c.y, c.z, l, LOAD_TSHARP(i), LOAD_SSHARP(s)); } -RATTR float4 -OCKL_MANGLE_T(image_sample_lod,CMa)(TSHARP i, SSHARP s, float4 c, float l) +CRATTR float4 +OCKL_MANGLE_T(image_sample_lod, CMa)(TSHARP i, SSHARP s, float4 c, float l) { CUBE_PREP(c); c.z = SAMPLE_ARRAY_FACE(c.w, c.z); @@ -1135,8 +1136,8 @@ OCKL_MANGLE_T(image_sampleh,3D)(TSHARP i, SSHARP s, float4 c) return my_image_sample_3d_v4f16_f32(c.x, c.y, c.z, LOAD_TSHARP(i), LOAD_SSHARP(s)); } -RATTR half4 -OCKL_MANGLE_T(image_sampleh,CM)(TSHARP i, SSHARP s, float4 c) +CRATTR half4 +OCKL_MANGLE_T(image_sampleh, CM)(TSHARP i, SSHARP s, float4 c) { CUBE_PREP(c); if (EII()) @@ -1145,8 +1146,8 @@ OCKL_MANGLE_T(image_sampleh,CM)(TSHARP i, SSHARP s, float4 c) return my_image_sample_cube_v4f16_f32(c.x, c.y, c.z, LOAD_TSHARP(i), LOAD_SSHARP(s)); } -RATTR half4 -OCKL_MANGLE_T(image_sampleh,CMa)(TSHARP i, SSHARP s, float4 c) +CRATTR half4 +OCKL_MANGLE_T(image_sampleh, CMa)(TSHARP i, SSHARP s, float4 c) { CUBE_PREP(c); c.z = SAMPLE_ARRAY_FACE(c.w, c.z); @@ -1225,15 +1226,15 @@ OCKL_MANGLE_T(image_sampleh_lod,3D)(TSHARP i, SSHARP s, float4 c, float l) return my_image_sample_l_3d_v4f16_f32(c.x, c.y, c.z, l, LOAD_TSHARP(i), LOAD_SSHARP(s)); } -RATTR half4 -OCKL_MANGLE_T(image_sampleh_lod,CM)(TSHARP i, SSHARP s, float4 c, float l) +CRATTR half4 +OCKL_MANGLE_T(image_sampleh_lod, CM)(TSHARP i, SSHARP s, float4 c, float l) { CUBE_PREP(c); return my_image_sample_l_cube_v4f16_f32(c.x, c.y, c.z, l, LOAD_TSHARP(i), LOAD_SSHARP(s)); } -RATTR half4 -OCKL_MANGLE_T(image_sampleh_lod,CMa)(TSHARP i, SSHARP s, float4 c, float l) +CRATTR half4 +OCKL_MANGLE_T(image_sampleh_lod, CMa)(TSHARP i, SSHARP s, float4 c, float l) { CUBE_PREP(c); c.z = SAMPLE_ARRAY_FACE(c.w, c.z); --- a/ockl/src/media.cl +++ b/ockl/src/media.cl @@ -11,6 +11,9 @@ #pragma OPENCL EXTENSION cl_khr_fp16 : enable #define CATTR __attribute__((const)) +#define LCATTR __attribute__((const, target("lerp-inst"))) +#define QCATTR __attribute__((const, target("qsad-insts"))) +#define SCATTR __attribute__((const, target("sad-insts"))) #define AS_UCHAR4(X) __builtin_astype(X, uchar4) CATTR uint @@ -44,7 +47,7 @@ OCKL_MANGLE_U32(bytealign)(uint a, uint b, uint c) return __builtin_amdgcn_alignbyte(a, b, c); } -CATTR uint +LCATTR uint OCKL_MANGLE_U32(lerp)(uint a, uint b, uint c) { return __builtin_amdgcn_lerp(a, b, c); @@ -155,13 +158,13 @@ OCKL_MANGLE_U32(pack)(float4 a) __builtin_amdgcn_cvt_pk_u8_f32(a.s0, 0, 0)))); } -CATTR ulong +QCATTR ulong OCKL_MANGLE_U64(qsad)(ulong a, uint b, ulong c) { return __builtin_amdgcn_qsad_pk_u16_u8(a, b, c); } -CATTR uint +SCATTR uint OCKL_MANGLE_U32(sad)(uint a, uint b, uint c) { return __builtin_amdgcn_sad_u8(a, b, c); @@ -174,13 +177,13 @@ OCKL_MANGLE_U32(sadd)(uint a, uint b, uint c) return (a > b ? a : b) - (a < b ? a : b) + c; } -CATTR uint +SCATTR uint OCKL_MANGLE_U32(sadhi)(uint a, uint b, uint c) { return __builtin_amdgcn_sad_hi_u8(a, b, c); } -CATTR uint +SCATTR uint OCKL_MANGLE_U32(sadw)(uint a, uint b, uint c) { return __builtin_amdgcn_sad_u16(a, b, c); @@ -213,4 +216,3 @@ OCKL_MANGLE_F32(unpack3)(uint a) uchar4 v = AS_UCHAR4(a); return (float)v.s3; } -