Mesa: User error: GL_INVALID_ENUM in glGetIntegerv(pname=GL_CONTEXT_PROFILE_MASK) SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = add i32 %5, %8 %15 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %13, i32 0, i32 %14) %16 = extractelement <4 x float> %15, i32 0 %17 = extractelement <4 x float> %15, i32 1 %18 = extractelement <4 x float> %15, i32 2 %19 = extractelement <4 x float> %15, i32 3 %20 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, align 16, !tbaa !0 %22 = add i32 %5, %8 %23 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %21, i32 0, i32 %22) %24 = extractelement <4 x float> %23, i32 0 %25 = extractelement <4 x float> %23, i32 1 %26 = extractelement <4 x float> %23, i32 2 %27 = extractelement <4 x float> %23, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %24, float %25, float %26, float %27) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %16, float %17, float %18, float %19) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6) %24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6) %25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6) %26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6) %27 = call i32 @llvm.SI.packf16(float %23, float %24) %28 = bitcast i32 %27 to float %29 = call i32 @llvm.SI.packf16(float %25, float %26) %30 = bitcast i32 %29 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %28, float %30, float %28, float %30) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s10 ; BEFC030A v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_interp_mov_f32 v2, P0, 3, 0, [m0] ; C80A0302 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 40 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** Using breakpad crash handler Setting breakpad minidump AppID = 730 Forcing breakpad minidump interfaces to load Looking up breakpad interfaces from steamclient Calling BreakpadMiniDumpSystemInit Looking up breakpad interfaces from steamclient Calling BreakpadMiniDumpSystemInit Steam_SetMinidumpSteamID: Caching Steam ID: 76561198008816698 [API loaded yes] Steam_SetMinidumpSteamID: Setting Steam ID: 76561198008816698 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %32, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = call i32 @llvm.SI.packf16(float %34, float %35) %39 = bitcast i32 %38 to float %40 = call i32 @llvm.SI.packf16(float %36, float %37) %41 = bitcast i32 %40 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %39, float %41, float %39, float %41) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[1], IMM[0].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %14, %34 %39 = fmul float %15, %34 %40 = fmul float %16, %34 %41 = fmul float %17, %34 %42 = fmul float %18, %35 %43 = fadd float %42, %38 %44 = fmul float %19, %35 %45 = fadd float %44, %39 %46 = fmul float %20, %35 %47 = fadd float %46, %40 %48 = fmul float %21, %35 %49 = fadd float %48, %41 %50 = fmul float %22, %36 %51 = fadd float %50, %43 %52 = fmul float %23, %36 %53 = fadd float %52, %45 %54 = fmul float %24, %36 %55 = fadd float %54, %47 %56 = fmul float %25, %36 %57 = fadd float %56, %49 %58 = fmul float %26, %37 %59 = fadd float %58, %51 %60 = fmul float %27, %37 %61 = fadd float %60, %53 %62 = fmul float %28, %37 %63 = fadd float %62, %55 %64 = fmul float %29, %37 %65 = fadd float %64, %57 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %61, float %63, float %65) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v0 ; 10080004 v_mul_f32_e32 v5, s5, v0 ; 100A0005 v_mul_f32_e32 v6, s6, v0 ; 100C0006 v_mul_f32_e32 v0, s7, v0 ; 10000007 v_mac_f32_e32 v4, s8, v1 ; 3E080208 v_mac_f32_e32 v5, s9, v1 ; 3E0A0209 v_mac_f32_e32 v6, s10, v1 ; 3E0C020A v_mac_f32_e32 v0, s11, v1 ; 3E00020B v_mac_f32_e32 v4, s12, v2 ; 3E08040C v_mac_f32_e32 v5, s13, v2 ; 3E0A040D v_mac_f32_e32 v6, s14, v2 ; 3E0C040E v_mac_f32_e32 v0, s15, v2 ; 3E00040F v_mac_f32_e32 v4, s16, v3 ; 3E080610 v_mac_f32_e32 v5, s17, v3 ; 3E0A0611 v_mac_f32_e32 v6, s18, v3 ; 3E0C0612 v_mac_f32_e32 v0, s0, v3 ; 3E000600 v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 exp 15, 12, 0, 1, 0, v4, v5, v6, v0 ; F80008CF 00060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 184 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %32, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = call i32 @llvm.SI.packf16(float %34, float %35) %39 = bitcast i32 %38 to float %40 = call i32 @llvm.SI.packf16(float %36, float %37) %41 = bitcast i32 %40 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %39, float %41, float %39, float %41) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6) %24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6) %25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6) %26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6) %27 = call i32 @llvm.SI.packf16(float %23, float %24) %28 = bitcast i32 %27 to float %29 = call i32 @llvm.SI.packf16(float %25, float %26) %30 = bitcast i32 %29 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %28, float %30, float %28, float %30) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s10 ; BEFC030A v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_interp_mov_f32 v2, P0, 3, 0, [m0] ; C80A0302 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 40 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, UINT 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %32, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %34, float %35, float %36, float %37) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 60 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], CUBE, FLOAT 0: TEX OUT[0], IN[0], SAMP[0], CUBE 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %12) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %12) %31 = insertelement <4 x float> undef, float %27, i32 0 %32 = insertelement <4 x float> %31, float %28, i32 1 %33 = insertelement <4 x float> %32, float %29, i32 2 %34 = insertelement <4 x float> %33, float %30, i32 3 %35 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = extractelement <4 x float> %35, i32 2 %39 = call float @llvm.fabs.f32(float %38) %40 = fdiv float 1.000000e+00, %39 %41 = fmul float %36, %40 %42 = fadd float %41, 1.500000e+00 %43 = fmul float %37, %40 %44 = fadd float %43, 1.500000e+00 %45 = bitcast float %44 to i32 %46 = bitcast float %42 to i32 %bc = bitcast <4 x float> %35 to <4 x i32> %47 = insertelement <4 x i32> undef, i32 %45, i32 0 %48 = insertelement <4 x i32> %47, i32 %46, i32 1 %49 = shufflevector <4 x i32> %48, <4 x i32> %bc, <4 x i32> %50 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %49, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %51 = extractelement <4 x float> %50, i32 0 %52 = extractelement <4 x float> %50, i32 1 %53 = extractelement <4 x float> %50, i32 2 %54 = extractelement <4 x float> %50, i32 3 %55 = call i32 @llvm.SI.packf16(float %51, float %52) %56 = bitcast i32 %55 to float %57 = call i32 @llvm.SI.packf16(float %53, float %54) %58 = bitcast i32 %57 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v0, v0, 3, 0, [m0] ; C8000300 v_interp_p2_f32 v0, [v0], v1, 3, 0, [m0] ; C8010301 v_cubeid_f32 v7, v2, v3, v4 ; D2880007 04120702 v_cubema_f32 v0, v2, v3, v4 ; D28E0000 04120702 v_rcp_f32_e64 v0, |v0| ; D3540100 00000100 v_cubesc_f32 v1, v2, v3, v4 ; D28A0001 04120702 v_cubetc_f32 v2, v2, v3, v4 ; D28C0002 04120702 v_mov_b32_e32 v5, 0x3fc00000 ; 7E0A02FF 3FC00000 v_mad_f32 v6, v0, v2, v5 ; D2820006 04160500 v_mac_f32_e32 v5, v0, v1 ; 3E0A0300 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[5:8], s[8:15], s[0:3] ; F0800F00 00020005 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 144 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[1], IMM[0].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %14, %34 %39 = fmul float %15, %34 %40 = fmul float %16, %34 %41 = fmul float %17, %34 %42 = fmul float %18, %35 %43 = fadd float %42, %38 %44 = fmul float %19, %35 %45 = fadd float %44, %39 %46 = fmul float %20, %35 %47 = fadd float %46, %40 %48 = fmul float %21, %35 %49 = fadd float %48, %41 %50 = fmul float %22, %36 %51 = fadd float %50, %43 %52 = fmul float %23, %36 %53 = fadd float %52, %45 %54 = fmul float %24, %36 %55 = fadd float %54, %47 %56 = fmul float %25, %36 %57 = fadd float %56, %49 %58 = fmul float %26, %37 %59 = fadd float %58, %51 %60 = fmul float %27, %37 %61 = fadd float %60, %53 %62 = fmul float %28, %37 %63 = fadd float %62, %55 %64 = fmul float %29, %37 %65 = fadd float %64, %57 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %61, float %63, float %65) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v0 ; 10080004 v_mul_f32_e32 v5, s5, v0 ; 100A0005 v_mul_f32_e32 v6, s6, v0 ; 100C0006 v_mul_f32_e32 v0, s7, v0 ; 10000007 v_mac_f32_e32 v4, s8, v1 ; 3E080208 v_mac_f32_e32 v5, s9, v1 ; 3E0A0209 v_mac_f32_e32 v6, s10, v1 ; 3E0C020A v_mac_f32_e32 v0, s11, v1 ; 3E00020B v_mac_f32_e32 v4, s12, v2 ; 3E08040C v_mac_f32_e32 v5, s13, v2 ; 3E0A040D v_mac_f32_e32 v6, s14, v2 ; 3E0C040E v_mac_f32_e32 v0, s15, v2 ; 3E00040F v_mac_f32_e32 v4, s16, v3 ; 3E080610 v_mac_f32_e32 v5, s17, v3 ; 3E0A0611 v_mac_f32_e32 v6, s18, v3 ; 3E0C0612 v_mac_f32_e32 v0, s0, v3 ; 3E000600 v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 exp 15, 12, 0, 1, 0, v4, v5, v6, v0 ; F80008CF 00060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 184 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], CUBE, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzz 1: TEX TEMP[0], TEMP[0], SAMP[0], CUBE 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = insertelement <4 x float> undef, float %27, i32 0 %31 = insertelement <4 x float> %30, float %28, i32 1 %32 = insertelement <4 x float> %31, float %29, i32 2 %33 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = call float @llvm.fabs.f32(float %36) %38 = fdiv float 1.000000e+00, %37 %39 = fmul float %34, %38 %40 = fadd float %39, 1.500000e+00 %41 = fmul float %35, %38 %42 = fadd float %41, 1.500000e+00 %43 = bitcast float %42 to i32 %44 = bitcast float %40 to i32 %bc = bitcast <4 x float> %33 to <4 x i32> %45 = insertelement <4 x i32> undef, i32 %43, i32 0 %46 = insertelement <4 x i32> %45, i32 %44, i32 1 %47 = shufflevector <4 x i32> %46, <4 x i32> %bc, <4 x i32> %48 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %47, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = call i32 @llvm.SI.packf16(float %49, float %50) %54 = bitcast i32 %53 to float %55 = call i32 @llvm.SI.packf16(float %51, float %52) %56 = bitcast i32 %55 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %54, float %56, float %54, float %56) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v0, v0, 2, 0, [m0] ; C8000200 v_interp_p2_f32 v0, [v0], v1, 2, 0, [m0] ; C8010201 v_cubeid_f32 v6, v2, v3, v0 ; D2880006 04020702 v_cubema_f32 v1, v2, v3, v0 ; D28E0001 04020702 v_rcp_f32_e64 v1, |v1| ; D3540101 00000101 v_cubesc_f32 v7, v2, v3, v0 ; D28A0007 04020702 v_cubetc_f32 v0, v2, v3, v0 ; D28C0000 04020702 v_mov_b32_e32 v4, 0x3fc00000 ; 7E0802FF 3FC00000 v_mad_f32 v5, v1, v0, v4 ; D2820005 04120101 v_mac_f32_e32 v4, v1, v7 ; 3E080F01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[8:15], s[0:3] ; F0800F00 00020004 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 136 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL CONST[0..57] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 1.0000, 2.2000, 0.0000, 0.0000} 0: ABS TEMP[0].x, CONST[50].xxxx 1: FSLT TEMP[1].x, -TEMP[0].xxxx, TEMP[0].xxxx 2: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: ADD TEMP[0].xyz, IN[1].xyzz, IN[1].xyzz 4: LG2 TEMP[2].x, TEMP[0].xxxx 5: LG2 TEMP[3].x, TEMP[0].yyyy 6: MOV TEMP[2].y, TEMP[3].xxxx 7: LG2 TEMP[3].x, TEMP[0].zzzz 8: MOV TEMP[2].z, TEMP[3].xxxx 9: MUL TEMP[0].xyz, TEMP[2].xyzz, IMM[0].yyyy 10: EX2 TEMP[2].x, TEMP[0].xxxx 11: EX2 TEMP[3].x, TEMP[0].yyyy 12: MOV TEMP[2].y, TEMP[3].xxxx 13: EX2 TEMP[3].x, TEMP[0].zzzz 14: MOV TEMP[2].z, TEMP[3].xxxx 15: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[2].xyzz 16: DP4 TEMP[3].x, IN[2], CONST[48] 17: DP4 TEMP[4].x, IN[2], CONST[49] 18: MOV TEMP[3].y, TEMP[4].xxxx 19: MOV TEMP[0].w, CONST[0].yyyy 20: MOV TEMP[2].w, IN[0].wwww 21: MAD TEMP[2].xyz, IN[3].xyzz, CONST[13].xxxx, IN[0].xyzz 22: DP4 TEMP[0].x, TEMP[2], CONST[54] 23: DP4 TEMP[4].x, TEMP[2], CONST[55] 24: MOV TEMP[0].y, TEMP[4].xxxx 25: DP4 TEMP[2].x, TEMP[2], CONST[56] 26: MOV TEMP[0].z, TEMP[2].xxxx 27: DP4 TEMP[2].x, TEMP[0], CONST[8] 28: DP4 TEMP[4].x, TEMP[0], CONST[9] 29: MOV TEMP[2].y, TEMP[4].xxxx 30: DP4 TEMP[5].x, TEMP[0], CONST[11] 31: MOV TEMP[2].w, TEMP[5].xxxx 32: DP4 TEMP[6].x, TEMP[0], CONST[10] 33: MOV TEMP[0].w, TEMP[6].xxxx 34: MOV TEMP[2].z, TEMP[6].xxxx 35: MOV TEMP[3].zw, CONST[0].xxxx 36: MOV TEMP[1].w, CONST[0].xxxx 37: MOV TEMP[7], TEMP[2] 38: MAD TEMP[6].x, TEMP[6].xxxx, CONST[0].zzzz, -TEMP[5].xxxx 39: MOV TEMP[2].z, TEMP[6].xxxx 40: MOV TEMP[2].y, -TEMP[4].xxxx 41: MAD TEMP[2].xy, CONST[57].xyyy, TEMP[5].xxxx, TEMP[2].xyyy 42: MOV OUT[2], TEMP[3] 43: MOV OUT[0], TEMP[2] 44: MOV OUT[1], TEMP[7] 45: MOV OUT[3], TEMP[1] 46: MOV OUT[4], TEMP[0] 47: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 128) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 132) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 136) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 140) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 144) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 148) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 152) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 156) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 160) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 164) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 168) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 172) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 176) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 180) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 184) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 188) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 208) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 776) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %38 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %39 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %40 = call float @llvm.SI.load.const(<16 x i8> %13, i32 792) %41 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %42 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %43 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %44 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %45 = call float @llvm.SI.load.const(<16 x i8> %13, i32 872) %46 = call float @llvm.SI.load.const(<16 x i8> %13, i32 876) %47 = call float @llvm.SI.load.const(<16 x i8> %13, i32 880) %48 = call float @llvm.SI.load.const(<16 x i8> %13, i32 884) %49 = call float @llvm.SI.load.const(<16 x i8> %13, i32 888) %50 = call float @llvm.SI.load.const(<16 x i8> %13, i32 892) %51 = call float @llvm.SI.load.const(<16 x i8> %13, i32 896) %52 = call float @llvm.SI.load.const(<16 x i8> %13, i32 900) %53 = call float @llvm.SI.load.const(<16 x i8> %13, i32 904) %54 = call float @llvm.SI.load.const(<16 x i8> %13, i32 908) %55 = call float @llvm.SI.load.const(<16 x i8> %13, i32 912) %56 = call float @llvm.SI.load.const(<16 x i8> %13, i32 916) %57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = add i32 %5, %8 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = extractelement <4 x float> %60, i32 3 %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = add i32 %5, %8 %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = extractelement <4 x float> %68, i32 2 %72 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0 %74 = add i32 %5, %8 %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %74) %76 = extractelement <4 x float> %75, i32 0 %77 = extractelement <4 x float> %75, i32 1 %78 = extractelement <4 x float> %75, i32 2 %79 = extractelement <4 x float> %75, i32 3 %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %8 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = call float @llvm.fabs.f32(float %42) %88 = fsub float -0.000000e+00, %87 %89 = fcmp ogt float %87, %88 %90 = select i1 %89, float 1.000000e+00, float 0.000000e+00 %91 = fadd float %69, %69 %92 = fadd float %70, %70 %93 = fadd float %71, %71 %94 = call float @llvm.log2.f32(float %91) %95 = call float @llvm.log2.f32(float %92) %96 = call float @llvm.log2.f32(float %93) %97 = fmul float %94, 0x40019999A0000000 %98 = fmul float %95, 0x40019999A0000000 %99 = fmul float %96, 0x40019999A0000000 %100 = call float @llvm.AMDIL.exp.(float %97) %101 = call float @llvm.AMDIL.exp.(float %98) %102 = call float @llvm.AMDIL.exp.(float %99) %103 = fmul float %90, %100 %104 = fmul float %90, %101 %105 = fmul float %90, %102 %106 = fmul float %76, %34 %107 = fmul float %77, %35 %108 = fadd float %106, %107 %109 = fmul float %78, %36 %110 = fadd float %108, %109 %111 = fmul float %79, %37 %112 = fadd float %110, %111 %113 = fmul float %76, %38 %114 = fmul float %77, %39 %115 = fadd float %113, %114 %116 = fmul float %78, %40 %117 = fadd float %115, %116 %118 = fmul float %79, %41 %119 = fadd float %117, %118 %120 = fmul float %84, %33 %121 = fadd float %120, %61 %122 = fmul float %85, %33 %123 = fadd float %122, %62 %124 = fmul float %86, %33 %125 = fadd float %124, %63 %126 = fmul float %121, %43 %127 = fmul float %123, %44 %128 = fadd float %126, %127 %129 = fmul float %125, %45 %130 = fadd float %128, %129 %131 = fmul float %64, %46 %132 = fadd float %130, %131 %133 = fmul float %121, %47 %134 = fmul float %123, %48 %135 = fadd float %133, %134 %136 = fmul float %125, %49 %137 = fadd float %135, %136 %138 = fmul float %64, %50 %139 = fadd float %137, %138 %140 = fmul float %121, %51 %141 = fmul float %123, %52 %142 = fadd float %140, %141 %143 = fmul float %125, %53 %144 = fadd float %142, %143 %145 = fmul float %64, %54 %146 = fadd float %144, %145 %147 = fmul float %132, %17 %148 = fmul float %139, %18 %149 = fadd float %147, %148 %150 = fmul float %146, %19 %151 = fadd float %149, %150 %152 = fmul float %15, %20 %153 = fadd float %151, %152 %154 = fmul float %132, %21 %155 = fmul float %139, %22 %156 = fadd float %154, %155 %157 = fmul float %146, %23 %158 = fadd float %156, %157 %159 = fmul float %15, %24 %160 = fadd float %158, %159 %161 = fmul float %132, %29 %162 = fmul float %139, %30 %163 = fadd float %161, %162 %164 = fmul float %146, %31 %165 = fadd float %163, %164 %166 = fmul float %15, %32 %167 = fadd float %165, %166 %168 = fmul float %132, %25 %169 = fmul float %139, %26 %170 = fadd float %168, %169 %171 = fmul float %146, %27 %172 = fadd float %170, %171 %173 = fmul float %15, %28 %174 = fadd float %172, %173 %175 = fmul float %174, %16 %176 = fsub float %175, %167 %177 = fmul float %55, %167 %178 = fadd float %177, %153 %179 = fmul float %56, %167 %180 = fsub float %179, %160 %181 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %182 = load <16 x i8>, <16 x i8> addrspace(2)* %181, align 16, !tbaa !0 %183 = call float @llvm.SI.load.const(<16 x i8> %182, i32 0) %184 = fmul float %183, %153 %185 = call float @llvm.SI.load.const(<16 x i8> %182, i32 4) %186 = fmul float %185, %160 %187 = fadd float %184, %186 %188 = call float @llvm.SI.load.const(<16 x i8> %182, i32 8) %189 = fmul float %188, %174 %190 = fadd float %187, %189 %191 = call float @llvm.SI.load.const(<16 x i8> %182, i32 12) %192 = fmul float %191, %167 %193 = fadd float %190, %192 %194 = call float @llvm.SI.load.const(<16 x i8> %182, i32 16) %195 = fmul float %194, %153 %196 = call float @llvm.SI.load.const(<16 x i8> %182, i32 20) %197 = fmul float %196, %160 %198 = fadd float %195, %197 %199 = call float @llvm.SI.load.const(<16 x i8> %182, i32 24) %200 = fmul float %199, %174 %201 = fadd float %198, %200 %202 = call float @llvm.SI.load.const(<16 x i8> %182, i32 28) %203 = fmul float %202, %167 %204 = fadd float %201, %203 %205 = call float @llvm.SI.load.const(<16 x i8> %182, i32 32) %206 = fmul float %205, %153 %207 = call float @llvm.SI.load.const(<16 x i8> %182, i32 36) %208 = fmul float %207, %160 %209 = fadd float %206, %208 %210 = call float @llvm.SI.load.const(<16 x i8> %182, i32 40) %211 = fmul float %210, %174 %212 = fadd float %209, %211 %213 = call float @llvm.SI.load.const(<16 x i8> %182, i32 44) %214 = fmul float %213, %167 %215 = fadd float %212, %214 %216 = call float @llvm.SI.load.const(<16 x i8> %182, i32 48) %217 = fmul float %216, %153 %218 = call float @llvm.SI.load.const(<16 x i8> %182, i32 52) %219 = fmul float %218, %160 %220 = fadd float %217, %219 %221 = call float @llvm.SI.load.const(<16 x i8> %182, i32 56) %222 = fmul float %221, %174 %223 = fadd float %220, %222 %224 = call float @llvm.SI.load.const(<16 x i8> %182, i32 60) %225 = fmul float %224, %167 %226 = fadd float %223, %225 %227 = call float @llvm.SI.load.const(<16 x i8> %182, i32 64) %228 = fmul float %227, %153 %229 = call float @llvm.SI.load.const(<16 x i8> %182, i32 68) %230 = fmul float %229, %160 %231 = fadd float %228, %230 %232 = call float @llvm.SI.load.const(<16 x i8> %182, i32 72) %233 = fmul float %232, %174 %234 = fadd float %231, %233 %235 = call float @llvm.SI.load.const(<16 x i8> %182, i32 76) %236 = fmul float %235, %167 %237 = fadd float %234, %236 %238 = call float @llvm.SI.load.const(<16 x i8> %182, i32 80) %239 = fmul float %238, %153 %240 = call float @llvm.SI.load.const(<16 x i8> %182, i32 84) %241 = fmul float %240, %160 %242 = fadd float %239, %241 %243 = call float @llvm.SI.load.const(<16 x i8> %182, i32 88) %244 = fmul float %243, %174 %245 = fadd float %242, %244 %246 = call float @llvm.SI.load.const(<16 x i8> %182, i32 92) %247 = fmul float %246, %167 %248 = fadd float %245, %247 %249 = call float @llvm.SI.load.const(<16 x i8> %182, i32 96) %250 = fmul float %249, %153 %251 = call float @llvm.SI.load.const(<16 x i8> %182, i32 100) %252 = fmul float %251, %160 %253 = fadd float %250, %252 %254 = call float @llvm.SI.load.const(<16 x i8> %182, i32 104) %255 = fmul float %254, %174 %256 = fadd float %253, %255 %257 = call float @llvm.SI.load.const(<16 x i8> %182, i32 108) %258 = fmul float %257, %167 %259 = fadd float %256, %258 %260 = call float @llvm.SI.load.const(<16 x i8> %182, i32 112) %261 = fmul float %260, %153 %262 = call float @llvm.SI.load.const(<16 x i8> %182, i32 116) %263 = fmul float %262, %160 %264 = fadd float %261, %263 %265 = call float @llvm.SI.load.const(<16 x i8> %182, i32 120) %266 = fmul float %265, %174 %267 = fadd float %264, %266 %268 = call float @llvm.SI.load.const(<16 x i8> %182, i32 124) %269 = fmul float %268, %167 %270 = fadd float %267, %269 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %112, float %119, float %14, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %103, float %104, float %105, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %132, float %139, float %146, float %174) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %178, float %180, float %176, float %167) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %193, float %204, float %215, float %226) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %237, float %248, float %259, float %270) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0x400ccccd ; 7E0202FF 400CCCCD v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_load_dwordx4 s[40:43], s[2:3], 0x40 ; C0940340 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900 buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00 s_buffer_load_dword s37, s[20:23], 0x2c ; C212952C s_buffer_load_dword s38, s[20:23], 0x2d ; C213152D s_buffer_load_dword s39, s[20:23], 0x2e ; C213952E s_buffer_load_dword s0, s[20:23], 0x2f ; C200152F s_buffer_load_dword s44, s[20:23], 0x34 ; C2161534 s_buffer_load_dword s45, s[20:23], 0xd9 ; C21695D9 s_buffer_load_dword s46, s[20:23], 0xda ; C21715DA s_buffer_load_dword s47, s[20:23], 0xdb ; C21795DB s_buffer_load_dword s48, s[20:23], 0xdc ; C21815DC s_buffer_load_dword s49, s[20:23], 0xdd ; C21895DD s_buffer_load_dword s50, s[20:23], 0xc5 ; C21915C5 s_buffer_load_dword s51, s[20:23], 0xc6 ; C21995C6 s_buffer_load_dword s52, s[20:23], 0xc7 ; C21A15C7 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s0 ; 7E000200 s_buffer_load_dword s0, s[20:23], 0xc8 ; C20015C8 s_buffer_load_dword s53, s[20:23], 0xd8 ; C21A95D8 s_buffer_load_dword s54, s[20:23], 0xde ; C21B15DE s_buffer_load_dword s55, s[20:23], 0xdf ; C21B95DF s_buffer_load_dword s56, s[20:23], 0xe0 ; C21C15E0 s_buffer_load_dword s57, s[20:23], 0xe1 ; C21C95E1 s_buffer_load_dword s58, s[20:23], 0xe2 ; C21D15E2 s_buffer_load_dword s59, s[20:23], 0x22 ; C21D9522 s_buffer_load_dword s1, s[20:23], 0x23 ; C2009523 s_buffer_load_dword s60, s[20:23], 0x24 ; C21E1524 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_gt_f32_e64 s[2:3], |s0|, -|s0| ; D0080302 40000000 s_waitcnt vmcnt(0) ; BF8C0770 v_cndmask_b32_e64 v16, 0, 1.0, s[2:3] ; D2000010 0009E480 s_buffer_load_dword s61, s[20:23], 0x25 ; C21E9525 s_buffer_load_dword s62, s[20:23], 0x26 ; C21F1526 s_buffer_load_dword s63, s[20:23], 0xc0 ; C21F95C0 s_buffer_load_dword s64, s[20:23], 0xc1 ; C22015C1 s_buffer_load_dword s65, s[20:23], 0xc2 ; C22095C2 s_buffer_load_dword s66, s[20:23], 0xc3 ; C22115C3 v_mov_b32_e32 v17, s1 ; 7E220201 s_buffer_load_dword s67, s[20:23], 0xc4 ; C22195C4 s_buffer_load_dword s1, s[20:23], 0x0 ; C2009500 s_buffer_load_dword s6, s[20:23], 0x1 ; C2031501 s_buffer_load_dword s0, s[20:23], 0x2 ; C2001502 s_buffer_load_dword s68, s[20:23], 0x20 ; C2221520 s_buffer_load_dword s69, s[20:23], 0x21 ; C2229521 s_buffer_load_dword s70, s[20:23], 0xe3 ; C22315E3 s_buffer_load_dword s12, s[20:23], 0xe4 ; C20615E4 s_buffer_load_dword s10, s[20:23], 0xe5 ; C20515E5 s_buffer_load_dword s2, s[20:23], 0x27 ; C2011527 s_buffer_load_dword s71, s[20:23], 0x28 ; C2239528 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v18, s1 ; 7E240201 s_buffer_load_dword s72, s[20:23], 0x29 ; C2241529 s_buffer_load_dword s28, s[20:23], 0x2a ; C20E152A s_buffer_load_dword s3, s[20:23], 0x2b ; C201952B s_buffer_load_dword s13, s[40:43], 0x0 ; C206A900 s_buffer_load_dword s23, s[40:43], 0x1 ; C20BA901 s_buffer_load_dword s4, s[40:43], 0x2 ; C2022902 s_buffer_load_dword s1, s[40:43], 0x3 ; C200A903 s_buffer_load_dword s16, s[40:43], 0x4 ; C2082904 v_mov_b32_e32 v19, s2 ; 7E260202 s_buffer_load_dword s26, s[40:43], 0x5 ; C20D2905 s_buffer_load_dword s8, s[40:43], 0x6 ; C2042906 s_buffer_load_dword s2, s[40:43], 0x7 ; C2012907 s_buffer_load_dword s19, s[40:43], 0x8 ; C209A908 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v20, s3 ; 7E280203 s_buffer_load_dword s30, s[40:43], 0x9 ; C20F2909 s_buffer_load_dword s11, s[40:43], 0xa ; C205A90A s_buffer_load_dword s3, s[40:43], 0xb ; C201A90B s_buffer_load_dword s22, s[40:43], 0xc ; C20B290C s_buffer_load_dword s32, s[40:43], 0xd ; C210290D s_buffer_load_dword s15, s[40:43], 0xe ; C207A90E s_buffer_load_dword s5, s[40:43], 0xf ; C202A90F s_buffer_load_dword s25, s[40:43], 0x10 ; C20CA910 s_buffer_load_dword s33, s[40:43], 0x11 ; C210A911 s_buffer_load_dword s18, s[40:43], 0x12 ; C2092912 s_buffer_load_dword s7, s[40:43], 0x13 ; C203A913 s_buffer_load_dword s27, s[40:43], 0x14 ; C20DA914 s_buffer_load_dword s34, s[40:43], 0x15 ; C2112915 s_buffer_load_dword s20, s[40:43], 0x16 ; C20A2916 s_buffer_load_dword s9, s[40:43], 0x17 ; C204A917 s_buffer_load_dword s29, s[40:43], 0x18 ; C20EA918 s_buffer_load_dword s35, s[40:43], 0x19 ; C211A919 s_buffer_load_dword s21, s[40:43], 0x1a ; C20AA91A s_buffer_load_dword s14, s[40:43], 0x1b ; C207291B s_buffer_load_dword s31, s[40:43], 0x1c ; C20FA91C s_buffer_load_dword s36, s[40:43], 0x1d ; C212291D s_buffer_load_dword s24, s[40:43], 0x1e ; C20C291E s_buffer_load_dword s17, s[40:43], 0x1f ; C208A91F v_add_f32_e32 v6, v6, v6 ; 060C0D06 v_mul_f32_e32 v21, s64, v10 ; 102A1440 v_mul_f32_e32 v10, s50, v10 ; 10141432 v_mad_f32 v2, s44, v13, v2 ; D2820002 040A1A2C v_mad_f32 v3, s44, v14, v3 ; D2820003 040E1C2C v_mad_f32 v4, s44, v15, v4 ; D2820004 04121E2C v_mac_f32_e32 v21, s63, v9 ; 3E2A123F v_mac_f32_e32 v10, s67, v9 ; 3E141243 v_mul_f32_e32 v9, s45, v3 ; 1012062D v_mac_f32_e32 v21, s65, v11 ; 3E2A1641 v_mac_f32_e32 v10, s51, v11 ; 3E141633 v_mac_f32_e32 v21, s66, v12 ; 3E2A1842 v_mac_f32_e32 v10, s52, v12 ; 3E141834 v_mul_f32_e32 v11, s49, v3 ; 10160631 v_mul_f32_e32 v3, s57, v3 ; 10060639 v_mac_f32_e32 v9, s53, v2 ; 3E120435 v_mac_f32_e32 v11, s48, v2 ; 3E160430 v_mac_f32_e32 v3, s56, v2 ; 3E060438 v_mac_f32_e32 v9, s46, v4 ; 3E12082E v_mac_f32_e32 v11, s54, v4 ; 3E160836 v_mac_f32_e32 v3, s58, v4 ; 3E06083A v_mac_f32_e32 v9, s47, v5 ; 3E120A2F v_mac_f32_e32 v11, s55, v5 ; 3E160A37 v_mac_f32_e32 v3, s70, v5 ; 3E060A46 v_mul_f32_e32 v2, s69, v11 ; 10041645 v_mac_f32_e32 v2, s68, v9 ; 3E041244 v_mul_f32_e32 v4, s61, v11 ; 1008163D v_mac_f32_e32 v4, s60, v9 ; 3E08123C v_mul_f32_e32 v5, s38, v11 ; 100A1626 v_mul_f32_e32 v12, s72, v11 ; 10181648 v_mac_f32_e32 v5, s37, v9 ; 3E0A1225 v_mac_f32_e32 v12, s71, v9 ; 3E181247 v_mac_f32_e32 v2, s59, v3 ; 3E04063B v_mac_f32_e32 v4, s62, v3 ; 3E08063E v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_add_f32_e32 v7, v7, v7 ; 060E0F07 v_add_f32_e32 v8, v8, v8 ; 06101108 v_log_f32_e32 v6, v6 ; 7E0C4F06 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_log_f32_e32 v8, v8 ; 7E104F08 exp 15, 32, 0, 0, 0, v21, v10, v18, v18 ; F800020F 12120A15 v_mul_f32_e32 v6, v1, v6 ; 100C0D01 v_mul_f32_e32 v7, v1, v7 ; 100E0F01 v_mul_f32_e32 v1, v1, v8 ; 10021101 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_f32_e32 v6, v6, v16 ; 100C2106 v_mul_f32_e32 v7, v7, v16 ; 100E2107 v_mac_f32_e32 v12, s28, v3 ; 3E18061C v_mac_f32_e32 v2, s6, v17 ; 3E042206 v_mac_f32_e32 v4, s6, v19 ; 3E082606 v_mac_f32_e32 v5, s6, v0 ; 3E0A0006 v_mac_f32_e32 v12, s6, v20 ; 3E182806 v_mad_f32 v0, s12, v5, v2 ; D2820000 040A0A0C v_mad_f32 v8, s10, v5, -v4 ; D2820008 84120A0A s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v10, s23, v4 ; 10140817 v_mul_f32_e32 v13, s26, v4 ; 101A081A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v14, s30, v4 ; 101C081E v_mul_f32_e32 v15, s32, v4 ; 101E0820 v_mul_f32_e32 v17, s33, v4 ; 10220821 v_mul_f32_e32 v19, s34, v4 ; 10260822 v_mul_f32_e32 v20, s35, v4 ; 10280823 v_mul_f32_e32 v4, s36, v4 ; 10080824 v_mac_f32_e32 v10, s13, v2 ; 3E14040D v_mac_f32_e32 v13, s16, v2 ; 3E1A0410 v_mac_f32_e32 v14, s19, v2 ; 3E1C0413 v_mac_f32_e32 v15, s22, v2 ; 3E1E0416 v_mac_f32_e32 v17, s25, v2 ; 3E220419 v_mac_f32_e32 v19, s27, v2 ; 3E26041B v_mac_f32_e32 v20, s29, v2 ; 3E28041D v_mac_f32_e32 v4, s31, v2 ; 3E08041F v_mac_f32_e32 v10, s4, v12 ; 3E141804 v_mac_f32_e32 v13, s8, v12 ; 3E1A1808 v_mac_f32_e32 v14, s11, v12 ; 3E1C180B v_mac_f32_e32 v15, s15, v12 ; 3E1E180F v_mac_f32_e32 v17, s18, v12 ; 3E221812 v_mac_f32_e32 v19, s20, v12 ; 3E261814 v_mac_f32_e32 v20, s21, v12 ; 3E281815 v_mac_f32_e32 v4, s24, v12 ; 3E081818 v_mac_f32_e32 v10, s1, v5 ; 3E140A01 v_mac_f32_e32 v13, s2, v5 ; 3E1A0A02 v_mac_f32_e32 v14, s3, v5 ; 3E1C0A03 v_mac_f32_e32 v15, s5, v5 ; 3E1E0A05 v_mac_f32_e32 v17, s7, v5 ; 3E220A07 v_mac_f32_e32 v19, s9, v5 ; 3E260A09 v_mac_f32_e32 v20, s14, v5 ; 3E280A0E v_mac_f32_e32 v4, s17, v5 ; 3E080A11 v_mad_f32 v2, v12, s0, -v5 ; D2820002 8414010C v_mul_f32_e32 v1, v1, v16 ; 10022101 exp 15, 33, 0, 0, 0, v6, v7, v1, v18 ; F800021F 12010706 exp 15, 34, 0, 0, 0, v9, v11, v3, v12 ; F800022F 0C030B09 exp 15, 12, 0, 0, 0, v0, v8, v2, v5 ; F80000CF 05020800 exp 15, 13, 0, 0, 0, v10, v13, v14, v15 ; F80000DF 0F0E0D0A exp 15, 14, 0, 1, 0, v17, v19, v20, v4 ; F80008EF 04141311 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 24 Code Size: 872 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..30] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { -1.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: ADD TEMP[1].x, TEMP[0].wwww, IMM[0].xxxx 3: MAD TEMP[1].x, CONST[20].wwww, TEMP[1].xxxx, IMM[0].yyyy 4: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1].wwww 5: MAD TEMP[2].x, TEMP[1].xxxx, IN[1].wwww, -TEMP[1].xxxx 6: MAD TEMP[1].x, CONST[12].wwww, TEMP[2].xxxx, TEMP[1].xxxx 7: ABS TEMP[3].x, CONST[12].yyyy 8: MUL TEMP[4].x, CONST[29].wwww, IN[2].wwww 9: FSGE TEMP[5].x, -TEMP[3].xxxx, IMM[0].zzzz 10: UIF TEMP[5].xxxx :0 11: MOV TEMP[5].x, TEMP[1].xxxx 12: ELSE :0 13: MOV TEMP[5].x, TEMP[4].xxxx 14: ENDIF 15: MOV TEMP[3].w, TEMP[5].xxxx 16: ADD TEMP[4].x, TEMP[0].wwww, CONST[12].xxxx 17: MOV_SAT TEMP[4].x, TEMP[4].xxxx 18: ADD TEMP[1].xyz, IMM[0].xxxx, CONST[1].xyzz 19: MAD TEMP[1].xyz, TEMP[4].xxxx, TEMP[1].xyzz, IMM[0].yyyy 20: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz 21: MUL TEMP[1].xyz, TEMP[0].xyzz, CONST[30].xxxx 22: MAD TEMP[0].xyz, TEMP[0].xyzz, -CONST[30].xxxx, CONST[29].xyzz 23: ADD TEMP[2].xyz, CONST[20].xyzz, -IN[2].xyzz 24: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz 25: SQRT TEMP[2].x, TEMP[2].xxxx 26: MAD TEMP[2].x, TEMP[2].xxxx, CONST[21].wwww, CONST[21].xxxx 27: MOV_SAT TEMP[2].x, TEMP[2].xxxx 28: MIN TEMP[2].x, TEMP[2].xxxx, CONST[21].zzzz 29: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 30: MAD TEMP[3].xyz, TEMP[2].xxxx, TEMP[0].xyzz, TEMP[1].xyzz 31: MOV OUT[0], TEMP[3] 32: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 192) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 196) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 204) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 324) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 328) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 332) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 336) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 344) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 348) %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 464) %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 468) %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 472) %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 476) %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 480) %44 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %45 = load <8 x i32>, <8 x i32> addrspace(2)* %44, align 32, !tbaa !0 %46 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %50 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %51 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %52 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %53 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %54 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %55 = bitcast float %48 to i32 %56 = bitcast float %49 to i32 %57 = insertelement <2 x i32> undef, i32 %55, i32 0 %58 = insertelement <2 x i32> %57, i32 %56, i32 1 %59 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %58, <8 x i32> %45, <4 x i32> %47, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %60 = extractelement <4 x float> %59, i32 0 %61 = extractelement <4 x float> %59, i32 1 %62 = extractelement <4 x float> %59, i32 2 %63 = extractelement <4 x float> %59, i32 3 %64 = fadd float %63, -1.000000e+00 %65 = fmul float %35, %64 %66 = fadd float %65, 1.000000e+00 %67 = fmul float %66, %28 %68 = fmul float %67, %50 %69 = fsub float %68, %67 %70 = fmul float %31, %69 %71 = fadd float %70, %67 %72 = call float @llvm.fabs.f32(float %30) %73 = fmul float %42, %54 %74 = fcmp ole float %72, -0.000000e+00 %. = select i1 %74, float %71, float %73 %75 = fadd float %63, %29 %76 = call float @llvm.AMDIL.clamp.(float %75, float 0.000000e+00, float 1.000000e+00) %77 = fadd float %25, -1.000000e+00 %78 = fadd float %26, -1.000000e+00 %79 = fadd float %27, -1.000000e+00 %80 = fmul float %76, %77 %81 = fadd float %80, 1.000000e+00 %82 = fmul float %76, %78 %83 = fadd float %82, 1.000000e+00 %84 = fmul float %76, %79 %85 = fadd float %84, 1.000000e+00 %86 = fmul float %60, %81 %87 = fmul float %61, %83 %88 = fmul float %62, %85 %89 = fmul float %86, %43 %90 = fmul float %87, %43 %91 = fmul float %88, %43 %92 = fmul float %43, %86 %93 = fsub float %39, %92 %94 = fmul float %43, %87 %95 = fsub float %40, %94 %96 = fmul float %43, %88 %97 = fsub float %41, %96 %98 = fsub float %32, %51 %99 = fsub float %33, %52 %100 = fsub float %34, %53 %101 = fmul float %98, %98 %102 = fmul float %99, %99 %103 = fadd float %102, %101 %104 = fmul float %100, %100 %105 = fadd float %103, %104 %106 = call float @llvm.sqrt.f32(float %105) %107 = fmul float %106, %38 %108 = fadd float %107, %36 %109 = call float @llvm.AMDIL.clamp.(float %108, float 0.000000e+00, float 1.000000e+00) %110 = call float @llvm.minnum.f32(float %109, float %37) %111 = fmul float %110, %110 %112 = fmul float %111, %93 %113 = fadd float %112, %89 %114 = fmul float %111, %95 %115 = fadd float %114, %90 %116 = fmul float %111, %97 %117 = fadd float %116, %91 %118 = call i32 @llvm.SI.packf16(float %113, float %115) %119 = bitcast i32 %118 to float %120 = call i32 @llvm.SI.packf16(float %117, float %.) %121 = bitcast i32 %120 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %119, float %121, float %119, float %121) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s11, s[0:3], 0x6 ; C2058106 s_buffer_load_dword s12, s[0:3], 0x7 ; C2060107 s_buffer_load_dword s13, s[0:3], 0x30 ; C2068130 s_buffer_load_dword s14, s[0:3], 0x31 ; C2070131 s_buffer_load_dword s15, s[0:3], 0x33 ; C2078133 s_buffer_load_dword s16, s[0:3], 0x50 ; C2080150 s_buffer_load_dword s17, s[0:3], 0x51 ; C2088151 s_buffer_load_dword s18, s[0:3], 0x52 ; C2090152 s_buffer_load_dword s19, s[0:3], 0x53 ; C2098153 s_buffer_load_dword s20, s[0:3], 0x54 ; C20A0154 s_buffer_load_dword s21, s[0:3], 0x56 ; C20A8156 s_buffer_load_dword s22, s[0:3], 0x57 ; C20B0157 s_buffer_load_dword s23, s[0:3], 0x74 ; C20B8174 s_buffer_load_dword s24, s[0:3], 0x75 ; C20C0175 s_buffer_load_dword s25, s[0:3], 0x76 ; C20C8176 s_buffer_load_dword s26, s[0:3], 0x77 ; C20D0177 s_buffer_load_dword s0, s[0:3], 0x78 ; C2000178 s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v0, v0, 3, 2, [m0] ; C8000B00 v_interp_p2_f32 v0, [v0], v1, 3, 2, [m0] ; C8010B01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[4:7] ; F0800F00 00270802 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v11, s19, -s19 ; D2820001 804C270B v_mad_f32 v1, v1, s12, s12 ; D2820001 00301901 v_mad_f32 v2, v1, v4, -v1 ; D2820002 84060901 v_mac_f32_e32 v1, s15, v2 ; 3E02040F v_mul_f32_e32 v0, s26, v0 ; 1000001A v_mov_b32_e32 v2, 0x80000000 ; 7E0402FF 80000000 v_cmp_le_f32_e64 vcc, |s14|, v2 ; D006016A 0002040E v_cndmask_b32_e32 v0, v0, v1 ; 00000300 v_add_f32_e32 v1, s13, v11 ; 0602160D v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mad_f32 v2, s8, v1, -v1 ; D2820002 84060208 v_mad_f32 v3, s9, v1, -v1 ; D2820003 84060209 v_mad_f32 v1, s11, v1, -v1 ; D2820001 8406020B v_mad_f32 v2, v8, v2, v8 ; D2820002 04220508 v_mad_f32 v3, v9, v3, v9 ; D2820003 04260709 v_mad_f32 v1, v10, v1, v10 ; D2820001 042A030A v_mul_f32_e32 v4, s0, v2 ; 10080400 v_mul_f32_e32 v8, s0, v3 ; 10100600 v_mul_f32_e32 v9, s0, v1 ; 10120200 v_mov_b32_e32 v10, s23 ; 7E140217 v_mad_f32 v2, -v2, s0, v10 ; D2820002 24280102 v_mov_b32_e32 v10, s24 ; 7E140218 v_mad_f32 v3, -v3, s0, v10 ; D2820003 24280103 v_mov_b32_e32 v10, s25 ; 7E140219 v_mad_f32 v1, -v1, s0, v10 ; D2820001 24280101 v_sub_f32_e32 v5, s16, v5 ; 080A0A10 v_sub_f32_e32 v6, s17, v6 ; 080C0C11 v_sub_f32_e32 v7, s18, v7 ; 080E0E12 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06 v_mac_f32_e32 v5, v7, v7 ; 3E0A0F07 v_sqrt_f32_e32 v5, v5 ; 7E0A6705 v_mov_b32_e32 v6, s20 ; 7E0C0214 v_mac_f32_e32 v6, s22, v5 ; 3E0C0A16 v_add_f32_e64 v5, 0, v6 clamp ; D2060805 00020C80 v_min_f32_e32 v5, s21, v5 ; 1E0A0A15 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_mac_f32_e32 v4, v2, v5 ; 3E080B02 v_mac_f32_e32 v8, v3, v5 ; 3E100B03 v_mac_f32_e32 v9, v1, v5 ; 3E120B01 v_cvt_pkrtz_f16_f32_e32 v1, v4, v8 ; 5E021104 v_cvt_pkrtz_f16_f32_e32 v0, v9, v0 ; 5E000109 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 416 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D_MSAA, FLOAT DCL TEMP[0..2] IMM[0] FLT32 { 0.0000, 0.1250, 0.0000, 0.0000} IMM[1] UINT32 {0, 1, 2, 3} IMM[2] UINT32 {4, 5, 6, 7} 0: MOV TEMP[0], IMM[0].xxxx 1: F2U TEMP[1], IN[0] 2: MOV TEMP[1].w, IMM[1].xxxx 3: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 4: ADD TEMP[0], TEMP[0], TEMP[2] 5: MOV TEMP[1].w, IMM[1].yyyy 6: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 7: ADD TEMP[0], TEMP[0], TEMP[2] 8: MOV TEMP[1].w, IMM[1].zzzz 9: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 10: ADD TEMP[0], TEMP[0], TEMP[2] 11: MOV TEMP[1].w, IMM[1].wwww 12: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 13: ADD TEMP[0], TEMP[0], TEMP[2] 14: MOV TEMP[1].w, IMM[2].xxxx 15: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 16: ADD TEMP[0], TEMP[0], TEMP[2] 17: MOV TEMP[1].w, IMM[2].yyyy 18: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 19: ADD TEMP[0], TEMP[0], TEMP[2] 20: MOV TEMP[1].w, IMM[2].zzzz 21: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 22: ADD TEMP[0], TEMP[0], TEMP[2] 23: MOV TEMP[1].w, IMM[2].wwww 24: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 25: ADD TEMP[0], TEMP[0], TEMP[2] 26: MUL TEMP[0], TEMP[0], IMM[0].yyyy 27: MOV OUT[0], TEMP[0] 28: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 17 %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %29 = fptoui float %27 to i32 %30 = fptoui float %28 to i32 %31 = insertelement <4 x i32> undef, i32 %29, i32 0 %32 = insertelement <4 x i32> %31, i32 %30, i32 1 %33 = insertelement <4 x i32> %32, i32 0, i32 2 %34 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %33, <8 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %35 = extractelement <4 x i32> %34, i32 0 %36 = and i32 %35, 15 %37 = extractelement <8 x i32> %26, i32 1 %38 = icmp ne i32 %37, 0 %39 = select i1 %38, i32 %36, i32 0 %40 = insertelement <4 x i32> undef, i32 %29, i32 0 %41 = insertelement <4 x i32> %40, i32 %30, i32 1 %42 = insertelement <4 x i32> %41, i32 %39, i32 2 %43 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %42, <8 x i32> %24, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %bc = bitcast <4 x i32> %43 to <4 x float> %44 = extractelement <4 x float> %bc, i32 0 %bc12 = bitcast <4 x i32> %43 to <4 x float> %45 = extractelement <4 x float> %bc12, i32 1 %bc13 = bitcast <4 x i32> %43 to <4 x float> %46 = extractelement <4 x float> %bc13, i32 2 %bc14 = bitcast <4 x i32> %43 to <4 x float> %47 = extractelement <4 x float> %bc14, i32 3 %48 = fadd float %44, 0.000000e+00 %49 = fadd float %45, 0.000000e+00 %50 = fadd float %46, 0.000000e+00 %51 = fadd float %47, 0.000000e+00 %52 = insertelement <4 x i32> undef, i32 %29, i32 0 %53 = insertelement <4 x i32> %52, i32 %30, i32 1 %54 = insertelement <4 x i32> %53, i32 0, i32 2 %55 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %54, <8 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %56 = extractelement <4 x i32> %55, i32 0 %57 = lshr i32 %56, 4 %58 = and i32 %57, 15 %59 = extractelement <8 x i32> %26, i32 1 %60 = icmp ne i32 %59, 0 %61 = select i1 %60, i32 %58, i32 1 %62 = insertelement <4 x i32> undef, i32 %29, i32 0 %63 = insertelement <4 x i32> %62, i32 %30, i32 1 %64 = insertelement <4 x i32> %63, i32 %61, i32 2 %65 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %64, <8 x i32> %24, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %bc15 = bitcast <4 x i32> %65 to <4 x float> %66 = extractelement <4 x float> %bc15, i32 0 %bc16 = bitcast <4 x i32> %65 to <4 x float> %67 = extractelement <4 x float> %bc16, i32 1 %bc17 = bitcast <4 x i32> %65 to <4 x float> %68 = extractelement <4 x float> %bc17, i32 2 %bc18 = bitcast <4 x i32> %65 to <4 x float> %69 = extractelement <4 x float> %bc18, i32 3 %70 = fadd float %48, %66 %71 = fadd float %49, %67 %72 = fadd float %50, %68 %73 = fadd float %51, %69 %74 = insertelement <4 x i32> undef, i32 %29, i32 0 %75 = insertelement <4 x i32> %74, i32 %30, i32 1 %76 = insertelement <4 x i32> %75, i32 0, i32 2 %77 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %76, <8 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %78 = extractelement <4 x i32> %77, i32 0 %79 = lshr i32 %78, 8 %80 = and i32 %79, 15 %81 = extractelement <8 x i32> %26, i32 1 %82 = icmp ne i32 %81, 0 %83 = select i1 %82, i32 %80, i32 2 %84 = insertelement <4 x i32> undef, i32 %29, i32 0 %85 = insertelement <4 x i32> %84, i32 %30, i32 1 %86 = insertelement <4 x i32> %85, i32 %83, i32 2 %87 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %86, <8 x i32> %24, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %bc19 = bitcast <4 x i32> %87 to <4 x float> %88 = extractelement <4 x float> %bc19, i32 0 %bc20 = bitcast <4 x i32> %87 to <4 x float> %89 = extractelement <4 x float> %bc20, i32 1 %bc21 = bitcast <4 x i32> %87 to <4 x float> %90 = extractelement <4 x float> %bc21, i32 2 %bc22 = bitcast <4 x i32> %87 to <4 x float> %91 = extractelement <4 x float> %bc22, i32 3 %92 = fadd float %70, %88 %93 = fadd float %71, %89 %94 = fadd float %72, %90 %95 = fadd float %73, %91 %96 = insertelement <4 x i32> undef, i32 %29, i32 0 %97 = insertelement <4 x i32> %96, i32 %30, i32 1 %98 = insertelement <4 x i32> %97, i32 0, i32 2 %99 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %98, <8 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %100 = extractelement <4 x i32> %99, i32 0 %101 = lshr i32 %100, 12 %102 = and i32 %101, 15 %103 = extractelement <8 x i32> %26, i32 1 %104 = icmp ne i32 %103, 0 %105 = select i1 %104, i32 %102, i32 3 %106 = insertelement <4 x i32> undef, i32 %29, i32 0 %107 = insertelement <4 x i32> %106, i32 %30, i32 1 %108 = insertelement <4 x i32> %107, i32 %105, i32 2 %109 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %108, <8 x i32> %24, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %bc23 = bitcast <4 x i32> %109 to <4 x float> %110 = extractelement <4 x float> %bc23, i32 0 %bc24 = bitcast <4 x i32> %109 to <4 x float> %111 = extractelement <4 x float> %bc24, i32 1 %bc25 = bitcast <4 x i32> %109 to <4 x float> %112 = extractelement <4 x float> %bc25, i32 2 %bc26 = bitcast <4 x i32> %109 to <4 x float> %113 = extractelement <4 x float> %bc26, i32 3 %114 = fadd float %92, %110 %115 = fadd float %93, %111 %116 = fadd float %94, %112 %117 = fadd float %95, %113 %118 = insertelement <4 x i32> undef, i32 %29, i32 0 %119 = insertelement <4 x i32> %118, i32 %30, i32 1 %120 = insertelement <4 x i32> %119, i32 0, i32 2 %121 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %120, <8 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %122 = extractelement <4 x i32> %121, i32 0 %123 = lshr i32 %122, 16 %124 = and i32 %123, 15 %125 = extractelement <8 x i32> %26, i32 1 %126 = icmp ne i32 %125, 0 %127 = select i1 %126, i32 %124, i32 4 %128 = insertelement <4 x i32> undef, i32 %29, i32 0 %129 = insertelement <4 x i32> %128, i32 %30, i32 1 %130 = insertelement <4 x i32> %129, i32 %127, i32 2 %131 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %130, <8 x i32> %24, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %bc27 = bitcast <4 x i32> %131 to <4 x float> %132 = extractelement <4 x float> %bc27, i32 0 %bc28 = bitcast <4 x i32> %131 to <4 x float> %133 = extractelement <4 x float> %bc28, i32 1 %bc29 = bitcast <4 x i32> %131 to <4 x float> %134 = extractelement <4 x float> %bc29, i32 2 %bc30 = bitcast <4 x i32> %131 to <4 x float> %135 = extractelement <4 x float> %bc30, i32 3 %136 = fadd float %114, %132 %137 = fadd float %115, %133 %138 = fadd float %116, %134 %139 = fadd float %117, %135 %140 = insertelement <4 x i32> undef, i32 %29, i32 0 %141 = insertelement <4 x i32> %140, i32 %30, i32 1 %142 = insertelement <4 x i32> %141, i32 0, i32 2 %143 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %142, <8 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %144 = extractelement <4 x i32> %143, i32 0 %145 = lshr i32 %144, 20 %146 = and i32 %145, 15 %147 = extractelement <8 x i32> %26, i32 1 %148 = icmp ne i32 %147, 0 %149 = select i1 %148, i32 %146, i32 5 %150 = insertelement <4 x i32> undef, i32 %29, i32 0 %151 = insertelement <4 x i32> %150, i32 %30, i32 1 %152 = insertelement <4 x i32> %151, i32 %149, i32 2 %153 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %152, <8 x i32> %24, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %bc31 = bitcast <4 x i32> %153 to <4 x float> %154 = extractelement <4 x float> %bc31, i32 0 %bc32 = bitcast <4 x i32> %153 to <4 x float> %155 = extractelement <4 x float> %bc32, i32 1 %bc33 = bitcast <4 x i32> %153 to <4 x float> %156 = extractelement <4 x float> %bc33, i32 2 %bc34 = bitcast <4 x i32> %153 to <4 x float> %157 = extractelement <4 x float> %bc34, i32 3 %158 = fadd float %136, %154 %159 = fadd float %137, %155 %160 = fadd float %138, %156 %161 = fadd float %139, %157 %162 = insertelement <4 x i32> undef, i32 %29, i32 0 %163 = insertelement <4 x i32> %162, i32 %30, i32 1 %164 = insertelement <4 x i32> %163, i32 0, i32 2 %165 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %164, <8 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %166 = extractelement <4 x i32> %165, i32 0 %167 = lshr i32 %166, 24 %168 = and i32 %167, 15 %169 = extractelement <8 x i32> %26, i32 1 %170 = icmp ne i32 %169, 0 %171 = select i1 %170, i32 %168, i32 6 %172 = insertelement <4 x i32> undef, i32 %29, i32 0 %173 = insertelement <4 x i32> %172, i32 %30, i32 1 %174 = insertelement <4 x i32> %173, i32 %171, i32 2 %175 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %174, <8 x i32> %24, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %bc35 = bitcast <4 x i32> %175 to <4 x float> %176 = extractelement <4 x float> %bc35, i32 0 %bc36 = bitcast <4 x i32> %175 to <4 x float> %177 = extractelement <4 x float> %bc36, i32 1 %bc37 = bitcast <4 x i32> %175 to <4 x float> %178 = extractelement <4 x float> %bc37, i32 2 %bc38 = bitcast <4 x i32> %175 to <4 x float> %179 = extractelement <4 x float> %bc38, i32 3 %180 = fadd float %158, %176 %181 = fadd float %159, %177 %182 = fadd float %160, %178 %183 = fadd float %161, %179 %184 = insertelement <4 x i32> undef, i32 %29, i32 0 %185 = insertelement <4 x i32> %184, i32 %30, i32 1 %186 = insertelement <4 x i32> %185, i32 0, i32 2 %187 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %186, <8 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %188 = extractelement <4 x i32> %187, i32 0 %189 = lshr i32 %188, 28 %190 = extractelement <8 x i32> %26, i32 1 %191 = icmp ne i32 %190, 0 %192 = select i1 %191, i32 %189, i32 7 %193 = insertelement <4 x i32> undef, i32 %29, i32 0 %194 = insertelement <4 x i32> %193, i32 %30, i32 1 %195 = insertelement <4 x i32> %194, i32 %192, i32 2 %196 = call <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32> %195, <8 x i32> %24, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %bc39 = bitcast <4 x i32> %196 to <4 x float> %197 = extractelement <4 x float> %bc39, i32 0 %bc40 = bitcast <4 x i32> %196 to <4 x float> %198 = extractelement <4 x float> %bc40, i32 1 %bc41 = bitcast <4 x i32> %196 to <4 x float> %199 = extractelement <4 x float> %bc41, i32 2 %bc42 = bitcast <4 x i32> %196 to <4 x float> %200 = extractelement <4 x float> %bc42, i32 3 %201 = fadd float %180, %197 %202 = fadd float %181, %198 %203 = fadd float %182, %199 %204 = fadd float %183, %200 %205 = fmul float %201, 1.250000e-01 %206 = fmul float %202, 1.250000e-01 %207 = fmul float %203, 1.250000e-01 %208 = fmul float %204, 1.250000e-01 %209 = call i32 @llvm.SI.packf16(float %205, float %206) %210 = bitcast i32 %209 to float %211 = call i32 @llvm.SI.packf16(float %207, float %208) %212 = bitcast i32 %211 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %210, float %212, float %210, float %212) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x i32> @llvm.SI.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx8 s[12:19], s[6:7], 0x88 ; C0C60788 s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[0:7], s[6:7], 0x0 ; C0C00700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100 v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101 v_cvt_u32_f32_e32 v1, v2 ; 7E020F02 v_cvt_u32_f32_e32 v2, v0 ; 7E040F00 v_mov_b32_e32 v3, 0 ; 7E060280 s_waitcnt lgkmcnt(0) ; BF8C007F image_load v0, 1, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[12:19] ; F0000100 00030001 s_waitcnt vmcnt(0) ; BF8C0770 v_and_b32_e32 v3, 15, v0 ; 3606008F v_cmp_ne_i32_e64 vcc, 0, s13 ; D10A006A 00001A80 v_cndmask_b32_e32 v3, 0, v3 ; 00060680 image_load v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000301 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v7, 0, v3 ; 060E0680 v_add_f32_e32 v4, 0, v4 ; 06080880 v_add_f32_e32 v5, 0, v5 ; 060A0A80 v_add_f32_e32 v6, 0, v6 ; 060C0C80 v_bfe_u32 v3, v0, 4, 4 ; D2900003 02110900 v_cndmask_b32_e32 v3, 1, v3 ; 00060681 image_load v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000801 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v7, v8, v7 ; 060E0F08 v_add_f32_e32 v4, v9, v4 ; 06080909 v_add_f32_e32 v5, v10, v5 ; 060A0B0A v_add_f32_e32 v6, v11, v6 ; 060C0D0B v_bfe_u32 v3, v0, 8, 4 ; D2900003 02111100 v_cndmask_b32_e32 v3, 2, v3 ; 00060682 image_load v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000801 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v7, v8, v7 ; 060E0F08 v_add_f32_e32 v4, v9, v4 ; 06080909 v_add_f32_e32 v5, v10, v5 ; 060A0B0A v_add_f32_e32 v6, v11, v6 ; 060C0D0B v_bfe_u32 v3, v0, 12, 4 ; D2900003 02111900 v_cndmask_b32_e32 v3, 3, v3 ; 00060683 image_load v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000801 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v7, v8, v7 ; 060E0F08 v_add_f32_e32 v4, v9, v4 ; 06080909 v_add_f32_e32 v5, v10, v5 ; 060A0B0A v_add_f32_e32 v6, v11, v6 ; 060C0D0B v_bfe_u32 v3, v0, 16, 4 ; D2900003 02112100 v_cndmask_b32_e32 v3, 4, v3 ; 00060684 image_load v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000801 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v7, v8, v7 ; 060E0F08 v_add_f32_e32 v4, v9, v4 ; 06080909 v_add_f32_e32 v5, v10, v5 ; 060A0B0A v_add_f32_e32 v6, v11, v6 ; 060C0D0B v_bfe_u32 v3, v0, 20, 4 ; D2900003 02112900 v_cndmask_b32_e32 v3, 5, v3 ; 00060685 image_load v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000801 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v7, v8, v7 ; 060E0F08 v_add_f32_e32 v4, v9, v4 ; 06080909 v_add_f32_e32 v5, v10, v5 ; 060A0B0A v_add_f32_e32 v6, v11, v6 ; 060C0D0B v_bfe_u32 v3, v0, 24, 4 ; D2900003 02113100 v_cndmask_b32_e32 v3, 6, v3 ; 00060686 image_load v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000801 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v7, v8, v7 ; 060E0F08 v_add_f32_e32 v4, v9, v4 ; 06080909 v_add_f32_e32 v5, v10, v5 ; 060A0B0A v_add_f32_e32 v6, v11, v6 ; 060C0D0B v_lshrrev_b32_e32 v0, 28, v0 ; 2C00009C v_cndmask_b32_e32 v3, 7, v0 ; 00060087 image_load v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000001 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v0, v7 ; 06000F00 v_add_f32_e32 v1, v1, v4 ; 06020901 v_add_f32_e32 v2, v2, v5 ; 06040B02 v_add_f32_e32 v3, v3, v6 ; 06060D03 v_mov_b32_e32 v4, 0x3e000000 ; 7E0802FF 3E000000 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 420 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6) %24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6) %25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6) %26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %23, float %24, float %25, float %26) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s10 ; BEFC030A v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 32 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL OUT[1], POSITION DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxy 1: TEX OUT[1].z, IN[0], SAMP[0], 2D 2: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %32, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %34 = extractelement <4 x float> %33, i32 2 call void @llvm.SI.export(i32 1, i32 0, i32 0, i32 8, i32 0, float %34, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_mov_b32_e32 v0, 0 ; 7E000280 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 4, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800400 00030102 s_waitcnt vmcnt(0) ; BF8C0770 exp 1, 8, 0, 0, 0, v1, v0, v0, v0 ; F8000081 00000001 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 0, 0, 1, 1, v0, v0, v0, v1 ; F800180F 01000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 3D, FLOAT 0: TEX OUT[0], IN[0], SAMP[0], 3D 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %12) %30 = bitcast float %27 to i32 %31 = bitcast float %28 to i32 %32 = bitcast float %29 to i32 %33 = insertelement <4 x i32> undef, i32 %30, i32 0 %34 = insertelement <4 x i32> %33, i32 %31, i32 1 %35 = insertelement <4 x i32> %34, i32 %32, i32 2 %36 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %35, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %37 = extractelement <4 x float> %36, i32 0 %38 = extractelement <4 x float> %36, i32 1 %39 = extractelement <4 x float> %36, i32 2 %40 = extractelement <4 x float> %36, i32 3 %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float %43 = call i32 @llvm.SI.packf16(float %39, float %40) %44 = bitcast i32 %43 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 76 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[1], IMM[0].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %14, %34 %39 = fmul float %15, %34 %40 = fmul float %16, %34 %41 = fmul float %17, %34 %42 = fmul float %18, %35 %43 = fadd float %42, %38 %44 = fmul float %19, %35 %45 = fadd float %44, %39 %46 = fmul float %20, %35 %47 = fadd float %46, %40 %48 = fmul float %21, %35 %49 = fadd float %48, %41 %50 = fmul float %22, %36 %51 = fadd float %50, %43 %52 = fmul float %23, %36 %53 = fadd float %52, %45 %54 = fmul float %24, %36 %55 = fadd float %54, %47 %56 = fmul float %25, %36 %57 = fadd float %56, %49 %58 = fmul float %26, %37 %59 = fadd float %58, %51 %60 = fmul float %27, %37 %61 = fadd float %60, %53 %62 = fmul float %28, %37 %63 = fadd float %62, %55 %64 = fmul float %29, %37 %65 = fadd float %64, %57 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %61, float %63, float %65) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v0 ; 10080004 v_mul_f32_e32 v5, s5, v0 ; 100A0005 v_mul_f32_e32 v6, s6, v0 ; 100C0006 v_mul_f32_e32 v0, s7, v0 ; 10000007 v_mac_f32_e32 v4, s8, v1 ; 3E080208 v_mac_f32_e32 v5, s9, v1 ; 3E0A0209 v_mac_f32_e32 v6, s10, v1 ; 3E0C020A v_mac_f32_e32 v0, s11, v1 ; 3E00020B v_mac_f32_e32 v4, s12, v2 ; 3E08040C v_mac_f32_e32 v5, s13, v2 ; 3E0A040D v_mac_f32_e32 v6, s14, v2 ; 3E0C040E v_mac_f32_e32 v0, s15, v2 ; 3E00040F v_mac_f32_e32 v4, s16, v3 ; 3E080610 v_mac_f32_e32 v5, s17, v3 ; 3E0A0611 v_mac_f32_e32 v6, s18, v3 ; 3E0C0612 v_mac_f32_e32 v0, s0, v3 ; 3E000600 v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 exp 15, 12, 0, 1, 0, v4, v5, v6, v0 ; F80008CF 00060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 184 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 3D, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzz 1: TEX TEMP[0], TEMP[0], SAMP[0], 3D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = bitcast float %27 to i32 %31 = bitcast float %28 to i32 %32 = bitcast float %29 to i32 %33 = insertelement <4 x i32> undef, i32 %30, i32 0 %34 = insertelement <4 x i32> %33, i32 %31, i32 1 %35 = insertelement <4 x i32> %34, i32 %32, i32 2 %36 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %35, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %37 = extractelement <4 x float> %36, i32 0 %38 = extractelement <4 x float> %36, i32 1 %39 = extractelement <4 x float> %36, i32 2 %40 = extractelement <4 x float> %36, i32 3 %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float %43 = call i32 @llvm.SI.packf16(float %39, float %40) %44 = bitcast i32 %43 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 76 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[0] 2: DP4 TEMP[1].x, IN[1], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[1], CONST[2] 5: DP4 TEMP[2].x, IN[1], CONST[3] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], IN[0] 9: MOV OUT[0], TEMP[0] 10: MOV OUT[2], TEMP[1] 11: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %14 %47 = fmul float %43, %15 %48 = fadd float %46, %47 %49 = fmul float %44, %16 %50 = fadd float %48, %49 %51 = fmul float %45, %17 %52 = fadd float %50, %51 %53 = fmul float %42, %18 %54 = fmul float %43, %19 %55 = fadd float %53, %54 %56 = fmul float %44, %20 %57 = fadd float %55, %56 %58 = fmul float %45, %21 %59 = fadd float %57, %58 %60 = fmul float %42, %22 %61 = fmul float %43, %23 %62 = fadd float %60, %61 %63 = fmul float %44, %24 %64 = fadd float %62, %63 %65 = fmul float %45, %25 %66 = fadd float %64, %65 %67 = fmul float %42, %26 %68 = fmul float %43, %27 %69 = fadd float %67, %68 %70 = fmul float %44, %28 %71 = fadd float %69, %70 %72 = fmul float %45, %29 %73 = fadd float %71, %72 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %34, float %35, float %36, float %37) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %66, float %73, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %52, float %59, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107 s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108 s_buffer_load_dword s16, s[0:3], 0x9 ; C2080109 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_buffer_load_dword s5, s[0:3], 0xb ; C202810B s_buffer_load_dword s6, s[0:3], 0xc ; C203010C s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s12, s[0:3], 0xe ; C206010E s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s17, s[0:3], 0x3 ; C2088103 s_buffer_load_dword s18, s[0:3], 0x4 ; C2090104 s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s14, v7 ; 10000E0E s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s8, v7 ; 10040E08 v_mul_f32_e32 v3, s16, v7 ; 10060E10 v_mul_f32_e32 v4, s7, v7 ; 10080E07 v_mac_f32_e32 v0, s13, v6 ; 3E000C0D v_mac_f32_e32 v2, s18, v6 ; 3E040C12 v_mac_f32_e32 v3, s11, v6 ; 3E060C0B v_mac_f32_e32 v4, s6, v6 ; 3E080C06 v_mac_f32_e32 v0, s15, v8 ; 3E00100F v_mac_f32_e32 v2, s9, v8 ; 3E041009 v_mac_f32_e32 v3, s4, v8 ; 3E061004 v_mac_f32_e32 v4, s12, v8 ; 3E08100C v_mac_f32_e32 v0, s17, v9 ; 3E001211 v_mac_f32_e32 v2, s10, v9 ; 3E04120A v_mac_f32_e32 v3, s5, v9 ; 3E061205 v_mac_f32_e32 v4, s0, v9 ; 3E081200 exp 15, 33, 0, 0, 0, v3, v4, v0, v0 ; F800021F 00000403 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 12, 0, 1, 0, v0, v2, v3, v1 ; F80008CF 01030200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 216 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xyz, TEMP[0].xyzx 3: MUL TEMP[0].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[1].w, TEMP[0].xxxx 5: MOV OUT[0], TEMP[1] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %30 = bitcast float %28 to i32 %31 = bitcast float %29 to i32 %32 = insertelement <2 x i32> undef, i32 %30, i32 0 %33 = insertelement <2 x i32> %32, i32 %31, i32 1 %34 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %33, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = fmul float %38, %27 %40 = call i32 @llvm.SI.packf16(float %35, float %36) %41 = bitcast i32 %40 to float %42 = call i32 @llvm.SI.packf16(float %37, float %39) %43 = bitcast i32 %42 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %41, float %43, float %41, float %43) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[12:19], s[0:3] ; F0800F00 00030303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[2] 2: DP4 TEMP[1].x, IN[1], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[1], CONST[4] 5: DP4 TEMP[2].x, IN[1], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], IN[0] 9: MOV OUT[2], CONST[0] 10: MOV OUT[3], CONST[1] 11: MOV OUT[0], TEMP[0] 12: MOV OUT[4], TEMP[1] 13: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 64) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 68) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 72) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 76) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 92) %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %8 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = fmul float %50, %22 %55 = fmul float %51, %23 %56 = fadd float %54, %55 %57 = fmul float %52, %24 %58 = fadd float %56, %57 %59 = fmul float %53, %25 %60 = fadd float %58, %59 %61 = fmul float %50, %26 %62 = fmul float %51, %27 %63 = fadd float %61, %62 %64 = fmul float %52, %28 %65 = fadd float %63, %64 %66 = fmul float %53, %29 %67 = fadd float %65, %66 %68 = fmul float %50, %30 %69 = fmul float %51, %31 %70 = fadd float %68, %69 %71 = fmul float %52, %32 %72 = fadd float %70, %71 %73 = fmul float %53, %33 %74 = fadd float %72, %73 %75 = fmul float %50, %34 %76 = fmul float %51, %35 %77 = fadd float %75, %76 %78 = fmul float %52, %36 %79 = fadd float %77, %78 %80 = fmul float %53, %37 %81 = fadd float %79, %80 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %42, float %43, float %44, float %45) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %74, float %81, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %67, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_buffer_load_dword s9, s[0:3], 0x10 ; C2048110 s_buffer_load_dword s10, s[0:3], 0x11 ; C2050111 s_buffer_load_dword s11, s[0:3], 0x12 ; C2058112 s_buffer_load_dword s16, s[0:3], 0x13 ; C2080113 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s12, s[0:3], 0x9 ; C2060109 s_buffer_load_dword s13, s[0:3], 0xa ; C206810A s_buffer_load_dword s14, s[0:3], 0xb ; C207010B s_buffer_load_dword s15, s[0:3], 0x0 ; C2078100 s_buffer_load_dword s17, s[0:3], 0x1 ; C2088101 s_buffer_load_dword s18, s[0:3], 0x2 ; C2090102 s_buffer_load_dword s19, s[0:3], 0x3 ; C2098103 s_buffer_load_dword s20, s[0:3], 0x4 ; C20A0104 s_buffer_load_dword s21, s[0:3], 0xd ; C20A810D s_buffer_load_dword s22, s[0:3], 0xc ; C20B010C s_buffer_load_dword s23, s[0:3], 0x14 ; C20B8114 s_buffer_load_dword s24, s[0:3], 0x15 ; C20C0115 s_buffer_load_dword s25, s[0:3], 0xe ; C20C810E s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s15 ; 7E00020F v_mov_b32_e32 v10, s17 ; 7E140211 v_mov_b32_e32 v11, s18 ; 7E160212 s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v2, s19 ; 7E040213 v_mov_b32_e32 v3, s20 ; 7E060214 v_mov_b32_e32 v4, s4 ; 7E080204 v_mov_b32_e32 v5, s5 ; 7E0A0205 exp 15, 33, 0, 0, 0, v0, v10, v11, v2 ; F800021F 020B0A00 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, s6 ; 7E000206 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, s12, v7 ; 10040E0C v_mul_f32_e32 v10, s21, v7 ; 10140E15 v_mul_f32_e32 v11, s10, v7 ; 10160E0A exp 15, 34, 0, 0, 0, v3, v4, v5, v0 ; F800022F 00050403 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s24, v7 ; 10000E18 v_mac_f32_e32 v2, s7, v6 ; 3E040C07 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s9, v6 ; 3E160C09 v_mac_f32_e32 v0, s23, v6 ; 3E000C17 v_mac_f32_e32 v2, s13, v8 ; 3E04100D v_mac_f32_e32 v10, s25, v8 ; 3E141019 v_mac_f32_e32 v11, s11, v8 ; 3E16100B v_mac_f32_e32 v0, s26, v8 ; 3E00101A v_mac_f32_e32 v2, s14, v9 ; 3E04120E v_mac_f32_e32 v10, s8, v9 ; 3E141208 v_mac_f32_e32 v11, s16, v9 ; 3E161210 v_mac_f32_e32 v0, s0, v9 ; 3E001200 exp 15, 35, 0, 0, 0, v11, v0, v0, v0 ; F800023F 0000000B s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v2, v10, v0, v1 ; F80008CF 01000A02 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 308 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[2], IN[1] 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %36 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %37 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %38 = bitcast float %36 to i32 %39 = bitcast float %37 to i32 %40 = insertelement <2 x i32> undef, i32 %38, i32 0 %41 = insertelement <2 x i32> %40, i32 %39, i32 1 %42 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %41, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = fmul float %43, %32 %48 = fadd float %47, %28 %49 = fmul float %44, %33 %50 = fadd float %49, %29 %51 = fmul float %45, %34 %52 = fadd float %51, %30 %53 = fmul float %46, %35 %54 = fadd float %53, %31 %55 = fmul float %54, %27 %56 = call i32 @llvm.SI.packf16(float %48, float %50) %57 = bitcast i32 %56 to float %58 = call i32 @llvm.SI.packf16(float %52, float %55) %59 = bitcast i32 %58 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %57, float %59, float %57, float %59) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v3, v7, v11 ; 3E061707 v_mac_f32_e32 v4, v8, v12 ; 3E081908 v_mac_f32_e32 v5, v9, v13 ; 3E0A1B09 v_mac_f32_e32 v6, v10, v14 ; 3E0C1D0A v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..3] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[2] 2: DP4 TEMP[1].x, IN[2], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV OUT[2], IN[1] 5: MOV OUT[3], CONST[0] 6: MOV OUT[1], IN[0] 7: MOV OUT[4], CONST[1] 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %8 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = fmul float %50, %22 %55 = fmul float %51, %23 %56 = fadd float %54, %55 %57 = fmul float %52, %24 %58 = fadd float %56, %57 %59 = fmul float %53, %25 %60 = fadd float %58, %59 %61 = fmul float %50, %26 %62 = fmul float %51, %27 %63 = fadd float %61, %62 %64 = fmul float %52, %28 %65 = fadd float %63, %64 %66 = fmul float %53, %29 %67 = fadd float %65, %66 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %34, float %35, float %36, float %37) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %42, float %43, float %44, float %45) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %67, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107 s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108 s_buffer_load_dword s20, s[0:3], 0x9 ; C20A0109 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_buffer_load_dword s5, s[0:3], 0xb ; C202810B s_buffer_load_dword s6, s[0:3], 0xc ; C203010C s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s12, s[0:3], 0xe ; C206010E s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s16, s[0:3], 0x3 ; C2080103 s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s8 ; 7E000208 v_mov_b32_e32 v14, s13 ; 7E1C020D v_mov_b32_e32 v15, s14 ; 7E1E020E v_mov_b32_e32 v16, s15 ; 7E20020F v_mov_b32_e32 v17, s16 ; 7E220210 v_mov_b32_e32 v18, s17 ; 7E240211 s_waitcnt vmcnt(2) ; BF8C0772 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 33, 0, 0, 0, v6, v7, v8, v9 ; F800021F 09080706 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v2, s9 ; 7E040209 v_mov_b32_e32 v3, s10 ; 7E06020A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, s20, v11 ; 10081614 exp 15, 34, 0, 0, 0, v14, v15, v16, v17 ; F800022F 11100F0E v_mul_f32_e32 v5, s7, v11 ; 100A1607 v_mac_f32_e32 v4, s11, v10 ; 3E08140B v_mac_f32_e32 v5, s6, v10 ; 3E0A1406 v_mac_f32_e32 v4, s4, v12 ; 3E081804 v_mac_f32_e32 v5, s12, v12 ; 3E0A180C v_mac_f32_e32 v4, s5, v13 ; 3E081A05 v_mac_f32_e32 v5, s0, v13 ; 3E0A1A00 exp 15, 35, 0, 0, 0, v18, v0, v2, v3 ; F800023F 03020012 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v4, v5, v0, v1 ; F80008CF 01000504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 20 Code Size: 252 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MAD TEMP[0], IN[0], IN[3], IN[2] 1: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %24 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %25 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %27 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) %36 = fmul float %23, %32 %37 = fadd float %36, %28 %38 = fmul float %24, %33 %39 = fadd float %38, %29 %40 = fmul float %25, %34 %41 = fadd float %40, %30 %42 = fmul float %26, %35 %43 = fadd float %42, %31 %44 = fmul float %43, %27 %45 = call i32 @llvm.SI.packf16(float %37, float %39) %46 = bitcast i32 %45 to float %47 = call i32 @llvm.SI.packf16(float %41, float %44) %48 = bitcast i32 %47 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %46, float %48, float %46, float %48) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 2, 3, [m0] ; C8340E00 v_interp_p2_f32 v13, [v13], v1, 2, 3, [m0] ; C8350E01 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 v_mac_f32_e32 v7, v11, v2 ; 3E0E050B v_mac_f32_e32 v8, v12, v3 ; 3E10070C v_mac_f32_e32 v9, v13, v4 ; 3E12090D v_mac_f32_e32 v10, v0, v5 ; 3E140B00 v_mul_f32_e32 v0, v6, v10 ; 10001506 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 v_cvt_pkrtz_f16_f32_e32 v0, v9, v0 ; 5E000109 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 148 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL CONST[0..57] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 2.2000, 0.0000, 0.0000, 0.0000} 0: LG2 TEMP[0].x, IN[1].xxxx 1: LG2 TEMP[1].x, IN[1].yyyy 2: MOV TEMP[0].y, TEMP[1].xxxx 3: LG2 TEMP[1].x, IN[1].zzzz 4: MOV TEMP[0].z, TEMP[1].xxxx 5: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx 6: EX2 TEMP[1].x, TEMP[0].xxxx 7: EX2 TEMP[2].x, TEMP[0].yyyy 8: MOV TEMP[1].y, TEMP[2].xxxx 9: EX2 TEMP[2].x, TEMP[0].zzzz 10: MOV TEMP[1].z, TEMP[2].xxxx 11: DP4 TEMP[2].x, IN[2], CONST[48] 12: DP4 TEMP[3].x, IN[2], CONST[49] 13: MOV TEMP[2].y, TEMP[3].xxxx 14: MOV TEMP[0].w, CONST[0].yyyy 15: MOV TEMP[3].w, IN[0].wwww 16: MAD TEMP[3].xyz, IN[3].xyzz, CONST[13].xxxx, IN[0].xyzz 17: DP4 TEMP[0].x, TEMP[3], CONST[54] 18: DP4 TEMP[4].x, TEMP[3], CONST[55] 19: MOV TEMP[0].y, TEMP[4].xxxx 20: DP4 TEMP[3].x, TEMP[3], CONST[56] 21: MOV TEMP[0].z, TEMP[3].xxxx 22: DP4 TEMP[3].x, TEMP[0], CONST[8] 23: DP4 TEMP[4].x, TEMP[0], CONST[9] 24: MOV TEMP[3].y, TEMP[4].xxxx 25: DP4 TEMP[5].x, TEMP[0], CONST[11] 26: MOV TEMP[3].w, TEMP[5].xxxx 27: DP4 TEMP[6].x, TEMP[0], CONST[10] 28: MOV TEMP[0].w, TEMP[6].xxxx 29: MOV TEMP[3].z, TEMP[6].xxxx 30: MOV TEMP[2].zw, CONST[0].xxxx 31: MOV TEMP[1].w, IN[1].wwww 32: MOV TEMP[7], TEMP[3] 33: MAD TEMP[6].x, TEMP[6].xxxx, CONST[0].zzzz, -TEMP[5].xxxx 34: MOV TEMP[3].z, TEMP[6].xxxx 35: MOV TEMP[3].y, -TEMP[4].xxxx 36: MAD TEMP[3].xy, CONST[57].xyyy, TEMP[5].xxxx, TEMP[3].xyyy 37: MOV OUT[2], TEMP[2] 38: MOV OUT[3], CONST[0].xxxx 39: MOV OUT[0], TEMP[3] 40: MOV OUT[1], TEMP[7] 41: MOV OUT[4], TEMP[1] 42: MOV OUT[5], TEMP[0] 43: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 128) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 132) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 136) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 140) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 144) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 148) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 152) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 156) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 160) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 164) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 168) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 172) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 176) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 180) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 184) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 188) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 208) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 776) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %38 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %39 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %40 = call float @llvm.SI.load.const(<16 x i8> %13, i32 792) %41 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %42 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %43 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %44 = call float @llvm.SI.load.const(<16 x i8> %13, i32 872) %45 = call float @llvm.SI.load.const(<16 x i8> %13, i32 876) %46 = call float @llvm.SI.load.const(<16 x i8> %13, i32 880) %47 = call float @llvm.SI.load.const(<16 x i8> %13, i32 884) %48 = call float @llvm.SI.load.const(<16 x i8> %13, i32 888) %49 = call float @llvm.SI.load.const(<16 x i8> %13, i32 892) %50 = call float @llvm.SI.load.const(<16 x i8> %13, i32 896) %51 = call float @llvm.SI.load.const(<16 x i8> %13, i32 900) %52 = call float @llvm.SI.load.const(<16 x i8> %13, i32 904) %53 = call float @llvm.SI.load.const(<16 x i8> %13, i32 908) %54 = call float @llvm.SI.load.const(<16 x i8> %13, i32 912) %55 = call float @llvm.SI.load.const(<16 x i8> %13, i32 916) %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 %58 = add i32 %5, %8 %59 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %58) %60 = extractelement <4 x float> %59, i32 0 %61 = extractelement <4 x float> %59, i32 1 %62 = extractelement <4 x float> %59, i32 2 %63 = extractelement <4 x float> %59, i32 3 %64 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 %66 = add i32 %5, %8 %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %66) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = extractelement <4 x float> %67, i32 2 %71 = extractelement <4 x float> %67, i32 3 %72 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0 %74 = add i32 %5, %8 %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %74) %76 = extractelement <4 x float> %75, i32 0 %77 = extractelement <4 x float> %75, i32 1 %78 = extractelement <4 x float> %75, i32 2 %79 = extractelement <4 x float> %75, i32 3 %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %8 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = call float @llvm.log2.f32(float %68) %88 = call float @llvm.log2.f32(float %69) %89 = call float @llvm.log2.f32(float %70) %90 = fmul float %87, 0x40019999A0000000 %91 = fmul float %88, 0x40019999A0000000 %92 = fmul float %89, 0x40019999A0000000 %93 = call float @llvm.AMDIL.exp.(float %90) %94 = call float @llvm.AMDIL.exp.(float %91) %95 = call float @llvm.AMDIL.exp.(float %92) %96 = fmul float %76, %34 %97 = fmul float %77, %35 %98 = fadd float %96, %97 %99 = fmul float %78, %36 %100 = fadd float %98, %99 %101 = fmul float %79, %37 %102 = fadd float %100, %101 %103 = fmul float %76, %38 %104 = fmul float %77, %39 %105 = fadd float %103, %104 %106 = fmul float %78, %40 %107 = fadd float %105, %106 %108 = fmul float %79, %41 %109 = fadd float %107, %108 %110 = fmul float %84, %33 %111 = fadd float %110, %60 %112 = fmul float %85, %33 %113 = fadd float %112, %61 %114 = fmul float %86, %33 %115 = fadd float %114, %62 %116 = fmul float %111, %42 %117 = fmul float %113, %43 %118 = fadd float %116, %117 %119 = fmul float %115, %44 %120 = fadd float %118, %119 %121 = fmul float %63, %45 %122 = fadd float %120, %121 %123 = fmul float %111, %46 %124 = fmul float %113, %47 %125 = fadd float %123, %124 %126 = fmul float %115, %48 %127 = fadd float %125, %126 %128 = fmul float %63, %49 %129 = fadd float %127, %128 %130 = fmul float %111, %50 %131 = fmul float %113, %51 %132 = fadd float %130, %131 %133 = fmul float %115, %52 %134 = fadd float %132, %133 %135 = fmul float %63, %53 %136 = fadd float %134, %135 %137 = fmul float %122, %17 %138 = fmul float %129, %18 %139 = fadd float %137, %138 %140 = fmul float %136, %19 %141 = fadd float %139, %140 %142 = fmul float %15, %20 %143 = fadd float %141, %142 %144 = fmul float %122, %21 %145 = fmul float %129, %22 %146 = fadd float %144, %145 %147 = fmul float %136, %23 %148 = fadd float %146, %147 %149 = fmul float %15, %24 %150 = fadd float %148, %149 %151 = fmul float %122, %29 %152 = fmul float %129, %30 %153 = fadd float %151, %152 %154 = fmul float %136, %31 %155 = fadd float %153, %154 %156 = fmul float %15, %32 %157 = fadd float %155, %156 %158 = fmul float %122, %25 %159 = fmul float %129, %26 %160 = fadd float %158, %159 %161 = fmul float %136, %27 %162 = fadd float %160, %161 %163 = fmul float %15, %28 %164 = fadd float %162, %163 %165 = fmul float %164, %16 %166 = fsub float %165, %157 %167 = fmul float %54, %157 %168 = fadd float %167, %143 %169 = fmul float %55, %157 %170 = fsub float %169, %150 %171 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %172 = load <16 x i8>, <16 x i8> addrspace(2)* %171, align 16, !tbaa !0 %173 = call float @llvm.SI.load.const(<16 x i8> %172, i32 0) %174 = fmul float %173, %143 %175 = call float @llvm.SI.load.const(<16 x i8> %172, i32 4) %176 = fmul float %175, %150 %177 = fadd float %174, %176 %178 = call float @llvm.SI.load.const(<16 x i8> %172, i32 8) %179 = fmul float %178, %164 %180 = fadd float %177, %179 %181 = call float @llvm.SI.load.const(<16 x i8> %172, i32 12) %182 = fmul float %181, %157 %183 = fadd float %180, %182 %184 = call float @llvm.SI.load.const(<16 x i8> %172, i32 16) %185 = fmul float %184, %143 %186 = call float @llvm.SI.load.const(<16 x i8> %172, i32 20) %187 = fmul float %186, %150 %188 = fadd float %185, %187 %189 = call float @llvm.SI.load.const(<16 x i8> %172, i32 24) %190 = fmul float %189, %164 %191 = fadd float %188, %190 %192 = call float @llvm.SI.load.const(<16 x i8> %172, i32 28) %193 = fmul float %192, %157 %194 = fadd float %191, %193 %195 = call float @llvm.SI.load.const(<16 x i8> %172, i32 32) %196 = fmul float %195, %143 %197 = call float @llvm.SI.load.const(<16 x i8> %172, i32 36) %198 = fmul float %197, %150 %199 = fadd float %196, %198 %200 = call float @llvm.SI.load.const(<16 x i8> %172, i32 40) %201 = fmul float %200, %164 %202 = fadd float %199, %201 %203 = call float @llvm.SI.load.const(<16 x i8> %172, i32 44) %204 = fmul float %203, %157 %205 = fadd float %202, %204 %206 = call float @llvm.SI.load.const(<16 x i8> %172, i32 48) %207 = fmul float %206, %143 %208 = call float @llvm.SI.load.const(<16 x i8> %172, i32 52) %209 = fmul float %208, %150 %210 = fadd float %207, %209 %211 = call float @llvm.SI.load.const(<16 x i8> %172, i32 56) %212 = fmul float %211, %164 %213 = fadd float %210, %212 %214 = call float @llvm.SI.load.const(<16 x i8> %172, i32 60) %215 = fmul float %214, %157 %216 = fadd float %213, %215 %217 = call float @llvm.SI.load.const(<16 x i8> %172, i32 64) %218 = fmul float %217, %143 %219 = call float @llvm.SI.load.const(<16 x i8> %172, i32 68) %220 = fmul float %219, %150 %221 = fadd float %218, %220 %222 = call float @llvm.SI.load.const(<16 x i8> %172, i32 72) %223 = fmul float %222, %164 %224 = fadd float %221, %223 %225 = call float @llvm.SI.load.const(<16 x i8> %172, i32 76) %226 = fmul float %225, %157 %227 = fadd float %224, %226 %228 = call float @llvm.SI.load.const(<16 x i8> %172, i32 80) %229 = fmul float %228, %143 %230 = call float @llvm.SI.load.const(<16 x i8> %172, i32 84) %231 = fmul float %230, %150 %232 = fadd float %229, %231 %233 = call float @llvm.SI.load.const(<16 x i8> %172, i32 88) %234 = fmul float %233, %164 %235 = fadd float %232, %234 %236 = call float @llvm.SI.load.const(<16 x i8> %172, i32 92) %237 = fmul float %236, %157 %238 = fadd float %235, %237 %239 = call float @llvm.SI.load.const(<16 x i8> %172, i32 96) %240 = fmul float %239, %143 %241 = call float @llvm.SI.load.const(<16 x i8> %172, i32 100) %242 = fmul float %241, %150 %243 = fadd float %240, %242 %244 = call float @llvm.SI.load.const(<16 x i8> %172, i32 104) %245 = fmul float %244, %164 %246 = fadd float %243, %245 %247 = call float @llvm.SI.load.const(<16 x i8> %172, i32 108) %248 = fmul float %247, %157 %249 = fadd float %246, %248 %250 = call float @llvm.SI.load.const(<16 x i8> %172, i32 112) %251 = fmul float %250, %143 %252 = call float @llvm.SI.load.const(<16 x i8> %172, i32 116) %253 = fmul float %252, %150 %254 = fadd float %251, %253 %255 = call float @llvm.SI.load.const(<16 x i8> %172, i32 120) %256 = fmul float %255, %164 %257 = fadd float %254, %256 %258 = call float @llvm.SI.load.const(<16 x i8> %172, i32 124) %259 = fmul float %258, %157 %260 = fadd float %257, %259 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %102, float %109, float %14, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %14, float %14, float %14, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %93, float %94, float %95, float %71) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %122, float %129, float %136, float %164) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %168, float %170, float %166, float %157) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %183, float %194, float %205, float %216) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %227, float %238, float %249, float %260) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[52:55], s[2:3], 0x0 ; C09A0300 s_load_dwordx4 s[24:27], s[2:3], 0x40 ; C08C0340 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[32:35], s[8:9], 0xc ; C090090C v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s3, s[52:55], 0x2c ; C201B52C s_buffer_load_dword s5, s[52:55], 0x2d ; C202B52D s_buffer_load_dword s1, s[52:55], 0x2e ; C200B52E s_buffer_load_dword s0, s[52:55], 0x2f ; C200352F s_buffer_load_dword s28, s[52:55], 0x34 ; C20E3534 buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 buffer_load_format_xyzw v[5:8], v0, s[16:19], 0 idxen ; E00C2000 80040500 buffer_load_format_xyzw v[9:12], v0, s[20:23], 0 idxen ; E00C2000 80050900 buffer_load_format_xyzw v[13:16], v0, s[32:35], 0 idxen ; E00C2000 80080D00 s_buffer_load_dword s14, s[52:55], 0xda ; C20735DA s_buffer_load_dword s11, s[52:55], 0xdb ; C205B5DB s_buffer_load_dword s17, s[52:55], 0xdc ; C208B5DC s_buffer_load_dword s33, s[52:55], 0xdd ; C210B5DD s_buffer_load_dword s15, s[52:55], 0xde ; C207B5DE s_buffer_load_dword s45, s[52:55], 0xc5 ; C216B5C5 s_buffer_load_dword s32, s[52:55], 0xc6 ; C21035C6 s_buffer_load_dword s29, s[52:55], 0xc7 ; C20EB5C7 s_buffer_load_dword s22, s[52:55], 0xd8 ; C20B35D8 s_buffer_load_dword s40, s[52:55], 0xd9 ; C21435D9 s_buffer_load_dword s18, s[52:55], 0xdf ; C20935DF s_buffer_load_dword s30, s[52:55], 0xe0 ; C20F35E0 s_buffer_load_dword s41, s[52:55], 0xe1 ; C214B5E1 s_buffer_load_dword s21, s[52:55], 0xe2 ; C20AB5E2 s_buffer_load_dword s20, s[52:55], 0xe3 ; C20A35E3 s_buffer_load_dword s48, s[52:55], 0xc0 ; C21835C0 s_buffer_load_dword s51, s[52:55], 0xc1 ; C219B5C1 s_buffer_load_dword s44, s[52:55], 0xc2 ; C21635C2 s_buffer_load_dword s42, s[52:55], 0xc3 ; C21535C3 s_buffer_load_dword s49, s[52:55], 0xc4 ; C218B5C4 s_buffer_load_dword s13, s[52:55], 0x22 ; C206B522 s_buffer_load_dword s2, s[52:55], 0x23 ; C2013523 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s0 ; 7E000200 s_buffer_load_dword s19, s[52:55], 0x24 ; C209B524 s_buffer_load_dword s31, s[52:55], 0x25 ; C20FB525 s_buffer_load_dword s16, s[52:55], 0x26 ; C2083526 s_buffer_load_dword s0, s[52:55], 0x0 ; C2003500 s_buffer_load_dword s12, s[52:55], 0x1 ; C2063501 s_buffer_load_dword s9, s[52:55], 0x2 ; C204B502 s_buffer_load_dword s34, s[52:55], 0x20 ; C2113520 s_buffer_load_dword s38, s[52:55], 0x21 ; C2133521 s_buffer_load_dword s6, s[52:55], 0x27 ; C2033527 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v16, s2 ; 7E200202 s_buffer_load_dword s39, s[52:55], 0x28 ; C213B528 s_buffer_load_dword s47, s[52:55], 0x29 ; C217B529 s_buffer_load_dword s37, s[52:55], 0x2a ; C212B52A s_buffer_load_dword s50, s[52:55], 0x2b ; C219352B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v17, s0 ; 7E220200 s_buffer_load_dword s7, s[24:27], 0x0 ; C2039900 s_buffer_load_dword s23, s[24:27], 0x1 ; C20B9901 s_buffer_load_dword s4, s[24:27], 0x2 ; C2021902 s_buffer_load_dword s0, s[24:27], 0x3 ; C2001903 s_buffer_load_dword s8, s[24:27], 0x4 ; C2041904 v_mov_b32_e32 v18, s6 ; 7E240206 s_buffer_load_dword s35, s[24:27], 0x5 ; C2119905 s_buffer_load_dword s6, s[24:27], 0x6 ; C2031906 s_buffer_load_dword s2, s[24:27], 0x7 ; C2011907 s_buffer_load_dword s10, s[24:27], 0x8 ; C2051908 s_buffer_load_dword s36, s[24:27], 0x9 ; C2121909 s_buffer_load_dword s43, s[52:55], 0xe4 ; C215B5E4 s_buffer_load_dword s46, s[52:55], 0xe5 ; C21735E5 s_buffer_load_dword s52, s[24:27], 0xa ; C21A190A s_buffer_load_dword s53, s[24:27], 0xb ; C21A990B s_buffer_load_dword s54, s[24:27], 0xc ; C21B190C s_buffer_load_dword s55, s[24:27], 0xd ; C21B990D s_buffer_load_dword s56, s[24:27], 0xe ; C21C190E s_buffer_load_dword s57, s[24:27], 0xf ; C21C990F s_buffer_load_dword s58, s[24:27], 0x10 ; C21D1910 s_buffer_load_dword s59, s[24:27], 0x11 ; C21D9911 s_buffer_load_dword s60, s[24:27], 0x12 ; C21E1912 s_buffer_load_dword s61, s[24:27], 0x13 ; C21E9913 s_buffer_load_dword s62, s[24:27], 0x14 ; C21F1914 s_buffer_load_dword s63, s[24:27], 0x15 ; C21F9915 s_buffer_load_dword s64, s[24:27], 0x16 ; C2201916 s_buffer_load_dword s65, s[24:27], 0x17 ; C2209917 s_buffer_load_dword s66, s[24:27], 0x18 ; C2211918 s_buffer_load_dword s67, s[24:27], 0x19 ; C2219919 s_buffer_load_dword s68, s[24:27], 0x1a ; C222191A s_buffer_load_dword s69, s[24:27], 0x1b ; C222991B s_buffer_load_dword s70, s[24:27], 0x1c ; C223191C s_buffer_load_dword s71, s[24:27], 0x1d ; C223991D s_buffer_load_dword s72, s[24:27], 0x1e ; C224191E s_buffer_load_dword s24, s[24:27], 0x1f ; C20C191F v_mul_f32_e32 v19, s51, v10 ; 10261433 v_mul_f32_e32 v10, s45, v10 ; 1014142D v_mad_f32 v1, s28, v13, v1 ; D2820001 04061A1C v_mad_f32 v2, s28, v14, v2 ; D2820002 040A1C1C v_mad_f32 v3, s28, v15, v3 ; D2820003 040E1E1C v_mac_f32_e32 v19, s48, v9 ; 3E261230 v_mac_f32_e32 v10, s49, v9 ; 3E141231 v_mul_f32_e32 v9, s40, v2 ; 10120428 v_mul_f32_e32 v13, s33, v2 ; 101A0421 v_mul_f32_e32 v2, s41, v2 ; 10040429 v_mac_f32_e32 v19, s44, v11 ; 3E26162C v_mac_f32_e32 v10, s32, v11 ; 3E141620 v_mac_f32_e32 v19, s42, v12 ; 3E26182A v_mac_f32_e32 v10, s29, v12 ; 3E14181D v_mac_f32_e32 v9, s22, v1 ; 3E120216 v_mac_f32_e32 v13, s17, v1 ; 3E1A0211 v_mac_f32_e32 v2, s30, v1 ; 3E04021E v_mac_f32_e32 v9, s14, v3 ; 3E12060E v_mac_f32_e32 v13, s15, v3 ; 3E1A060F v_mac_f32_e32 v2, s21, v3 ; 3E040615 v_mac_f32_e32 v9, s11, v4 ; 3E12080B v_mac_f32_e32 v13, s18, v4 ; 3E1A0812 v_mac_f32_e32 v2, s20, v4 ; 3E040814 v_mov_b32_e32 v1, s50 ; 7E020232 v_mul_f32_e32 v3, s38, v13 ; 10061A26 v_mul_f32_e32 v4, s31, v13 ; 10081A1F v_mul_f32_e32 v11, s5, v13 ; 10161A05 v_mul_f32_e32 v12, s47, v13 ; 10181A2F v_mac_f32_e32 v3, s34, v9 ; 3E061222 v_mac_f32_e32 v4, s19, v9 ; 3E081213 v_mac_f32_e32 v11, s3, v9 ; 3E161203 v_mac_f32_e32 v12, s39, v9 ; 3E181227 v_mac_f32_e32 v3, s13, v2 ; 3E06040D v_mac_f32_e32 v4, s16, v2 ; 3E080410 v_mac_f32_e32 v11, s1, v2 ; 3E160401 v_mac_f32_e32 v12, s37, v2 ; 3E180425 v_mac_f32_e32 v3, s12, v16 ; 3E06200C v_mac_f32_e32 v4, s12, v18 ; 3E08240C v_mac_f32_e32 v11, s12, v0 ; 3E16000C v_mac_f32_e32 v12, s12, v1 ; 3E18020C v_mad_f32 v0, v12, s9, -v11 ; D2820000 842C130C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s23, v4 ; 10020817 v_mul_f32_e32 v14, s35, v4 ; 101C0823 v_mul_f32_e32 v15, s36, v4 ; 101E0824 v_mul_f32_e32 v16, s55, v4 ; 10200837 v_mul_f32_e32 v18, s59, v4 ; 1024083B v_mul_f32_e32 v20, s63, v4 ; 1028083F v_mul_f32_e32 v21, s67, v4 ; 102A0843 v_log_f32_e32 v5, v5 ; 7E0A4F05 v_log_f32_e32 v6, v6 ; 7E0C4F06 exp 15, 32, 0, 0, 0, v19, v10, v17, v17 ; F800020F 11110A13 v_log_f32_e32 v7, v7 ; 7E0E4F07 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v10, 0x400ccccd ; 7E1402FF 400CCCCD v_mul_f32_e32 v5, v10, v5 ; 100A0B0A v_mul_f32_e32 v6, v10, v6 ; 100C0D0A v_mul_f32_e32 v7, v10, v7 ; 100E0F0A v_mul_f32_e32 v10, s71, v4 ; 10140847 v_mac_f32_e32 v1, s7, v3 ; 3E020607 v_mac_f32_e32 v14, s8, v3 ; 3E1C0608 v_mac_f32_e32 v15, s10, v3 ; 3E1E060A v_mac_f32_e32 v16, s54, v3 ; 3E200636 v_mac_f32_e32 v18, s58, v3 ; 3E24063A v_mac_f32_e32 v20, s62, v3 ; 3E28063E v_mac_f32_e32 v21, s66, v3 ; 3E2A0642 v_mac_f32_e32 v10, s70, v3 ; 3E140646 v_mad_f32 v3, s43, v11, v3 ; D2820003 040E162B v_mad_f32 v4, s46, v11, -v4 ; D2820004 8412162E v_mac_f32_e32 v1, s4, v12 ; 3E021804 v_mac_f32_e32 v14, s6, v12 ; 3E1C1806 v_mac_f32_e32 v15, s52, v12 ; 3E1E1834 v_mac_f32_e32 v16, s56, v12 ; 3E201838 v_mac_f32_e32 v18, s60, v12 ; 3E24183C v_mac_f32_e32 v20, s64, v12 ; 3E281840 v_mac_f32_e32 v21, s68, v12 ; 3E2A1844 v_mac_f32_e32 v10, s72, v12 ; 3E141848 v_mac_f32_e32 v1, s0, v11 ; 3E021600 v_mac_f32_e32 v14, s2, v11 ; 3E1C1602 v_mac_f32_e32 v15, s53, v11 ; 3E1E1635 v_mac_f32_e32 v16, s57, v11 ; 3E201639 v_mac_f32_e32 v18, s61, v11 ; 3E24163D v_mac_f32_e32 v20, s65, v11 ; 3E281641 v_mac_f32_e32 v21, s69, v11 ; 3E2A1645 v_mac_f32_e32 v10, s24, v11 ; 3E141618 exp 15, 33, 0, 0, 0, v17, v17, v17, v17 ; F800021F 11111111 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_exp_f32_e32 v7, v7 ; 7E0E4B07 exp 15, 34, 0, 0, 0, v5, v6, v7, v8 ; F800022F 08070605 exp 15, 35, 0, 0, 0, v9, v13, v2, v12 ; F800023F 0C020D09 exp 15, 12, 0, 0, 0, v3, v4, v0, v11 ; F80000CF 0B000403 exp 15, 13, 0, 0, 0, v1, v14, v15, v16 ; F80000DF 100F0E01 exp 15, 14, 0, 1, 0, v18, v20, v21, v10 ; F80008EF 0A151412 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 24 Code Size: 824 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], SHADOW2D, FLOAT DCL CONST[0..90] DCL TEMP[0..15], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, -0.5000} IMM[1] FLT32 { -0.0000, -1.0000, -2.0000, 0.0625} IMM[2] FLT32 { 0.0005, 0.0000, -0.0005, 0.1250} IMM[3] FLT32 { 0.2500, 0.0000, -1.0000, -2.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: UIF CONST[90].xxxx :0 3: MAD TEMP[1], IN[3].xyzx, IMM[0].xxxy, IMM[0].yyyx 4: DP4 TEMP[2].x, TEMP[1], CONST[69] 5: DP4 TEMP[3].x, TEMP[1], CONST[70] 6: MOV TEMP[2].y, TEMP[3].xxxx 7: MOV_SAT TEMP[4].xy, TEMP[2].xyyy 8: ADD TEMP[4].xy, -TEMP[2].xyyy, TEMP[4].xyyy 9: DP2 TEMP[5].x, TEMP[4].xyyy, IMM[0].xxxx 10: DP4 TEMP[4].x, TEMP[1], CONST[73] 11: DP4 TEMP[6].x, TEMP[1], CONST[74] 12: MOV TEMP[4].y, TEMP[6].xxxx 13: MOV_SAT TEMP[7].xy, TEMP[4].xyyy 14: ADD TEMP[7].xy, -TEMP[4].xyyy, TEMP[7].xyyy 15: DP2 TEMP[8].x, TEMP[7].xyyy, IMM[0].xxxx 16: MOV TEMP[4].w, TEMP[8].xxxx 17: DP4 TEMP[7].x, TEMP[1], CONST[77] 18: DP4 TEMP[9].x, TEMP[1], CONST[78] 19: MOV TEMP[4].z, IMM[0].xxxx 20: MOV TEMP[10].w, TEMP[4] 21: ABS TEMP[11].x, TEMP[8].xxxx 22: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 23: UIF TEMP[11].xxxx :0 24: MOV TEMP[11].x, TEMP[4].xxxx 25: ELSE :0 26: MOV TEMP[11].x, TEMP[7].xxxx 27: ENDIF 28: MOV TEMP[10].x, TEMP[11].xxxx 29: ABS TEMP[11].x, TEMP[8].xxxx 30: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 31: UIF TEMP[11].xxxx :0 32: MOV TEMP[6].x, TEMP[6].xxxx 33: ELSE :0 34: MOV TEMP[6].x, TEMP[9].xxxx 35: ENDIF 36: MOV TEMP[10].y, TEMP[6].xxxx 37: ABS TEMP[6].x, TEMP[8].xxxx 38: FSGE TEMP[6].x, -TEMP[6].xxxx, IMM[0].yyyy 39: UIF TEMP[6].xxxx :0 40: MOV TEMP[6].x, IMM[0].xxxx 41: ELSE :0 42: MOV TEMP[6].x, IMM[0].zzzz 43: ENDIF 44: MOV TEMP[10].z, TEMP[6].xxxx 45: MOV TEMP[4].xyz, TEMP[10] 46: ABS TEMP[6].x, TEMP[5].xxxx 47: FSGE TEMP[6].x, -TEMP[6].xxxx, IMM[0].yyyy 48: UIF TEMP[6].xxxx :0 49: MOV TEMP[6].x, TEMP[2].xxxx 50: ELSE :0 51: MOV TEMP[6].x, TEMP[4].xxxx 52: ENDIF 53: MOV TEMP[10].x, TEMP[6].xxxx 54: ABS TEMP[6].x, TEMP[5].xxxx 55: FSGE TEMP[6].x, -TEMP[6].xxxx, IMM[0].yyyy 56: UIF TEMP[6].xxxx :0 57: MOV TEMP[3].x, TEMP[3].xxxx 58: ELSE :0 59: MOV TEMP[3].x, TEMP[4].yyyy 60: ENDIF 61: MOV TEMP[10].y, TEMP[3].xxxx 62: ABS TEMP[3].x, TEMP[5].xxxx 63: FSGE TEMP[3].x, -TEMP[3].xxxx, IMM[0].yyyy 64: UIF TEMP[3].xxxx :0 65: MOV TEMP[3].x, IMM[0].yyyy 66: ELSE :0 67: MOV TEMP[3].x, TEMP[4].zzzz 68: ENDIF 69: MOV TEMP[10].z, TEMP[3].xxxx 70: MOV TEMP[2].z, TEMP[10].xyzx 71: DP4 TEMP[5].x, TEMP[1], CONST[71] 72: MOV TEMP[4].z, TEMP[5].xxxx 73: ADD TEMP[7].xy, TEMP[10].xyyy, IMM[0].wwww 74: ABS TEMP[6].xy, TEMP[7].xyyy 75: ADD TEMP[7].xy, TEMP[6].xyyy, -CONST[67].zzzz 76: MUL TEMP[7].xy, TEMP[7].xyyy, CONST[67].wwww 77: MOV_SAT TEMP[6].xy, TEMP[7].xyyy 78: ADD TEMP[7].xy, -TEMP[6].xyyy, IMM[0].xxxx 79: MUL TEMP[6].x, TEMP[7].yyyy, TEMP[7].xxxx 80: MOV_SAT TEMP[8].xy, TEMP[10].xyyy 81: ADD TEMP[7].xyz, TEMP[3].xxxx, IMM[1].xyzz 82: MOV TEMP[3].y, IMM[0].yyyy 83: ABS TEMP[11].x, TEMP[7].xxxx 84: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 85: UIF TEMP[11].xxxx :0 86: MOV TEMP[11].x, CONST[85].zzzz 87: ELSE :0 88: MOV TEMP[11].x, IMM[0].yyyy 89: ENDIF 90: ABS TEMP[12].x, TEMP[7].xxxx 91: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 92: UIF TEMP[12].xxxx :0 93: MOV TEMP[12].x, CONST[85].wwww 94: ELSE :0 95: MOV TEMP[12].x, IMM[0].yyyy 96: ENDIF 97: MOV TEMP[10].y, TEMP[12].xxxx 98: ABS TEMP[12].x, TEMP[7].xxxx 99: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 100: UIF TEMP[12].xxxx :0 101: MOV TEMP[12].x, CONST[85].xxxx 102: ELSE :0 103: MOV TEMP[12].x, IMM[0].yyyy 104: ENDIF 105: MOV TEMP[10].z, TEMP[12].xxxx 106: ABS TEMP[12].x, TEMP[7].xxxx 107: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 108: UIF TEMP[12].xxxx :0 109: MOV TEMP[12].x, CONST[85].yyyy 110: ELSE :0 111: MOV TEMP[12].x, IMM[0].yyyy 112: ENDIF 113: MOV TEMP[10].w, TEMP[12].xxxx 114: ABS TEMP[12].x, TEMP[7].yyyy 115: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 116: UIF TEMP[12].xxxx :0 117: MOV TEMP[12].x, CONST[86].zzzz 118: ELSE :0 119: MOV TEMP[12].x, TEMP[11].xxxx 120: ENDIF 121: MOV TEMP[10].x, TEMP[12].xxxx 122: ABS TEMP[11].x, TEMP[7].yyyy 123: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 124: UIF TEMP[11].xxxx :0 125: MOV TEMP[11].x, CONST[86].wwww 126: ELSE :0 127: MOV TEMP[11].x, TEMP[10].yyyy 128: ENDIF 129: MOV TEMP[10].y, TEMP[11].xxxx 130: ABS TEMP[11].x, TEMP[7].yyyy 131: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 132: UIF TEMP[11].xxxx :0 133: MOV TEMP[11].x, CONST[86].xxxx 134: ELSE :0 135: MOV TEMP[11].x, TEMP[10].zzzz 136: ENDIF 137: MOV TEMP[10].z, TEMP[11].xxxx 138: ABS TEMP[11].x, TEMP[7].yyyy 139: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 140: UIF TEMP[11].xxxx :0 141: MOV TEMP[11].x, CONST[86].yyyy 142: ELSE :0 143: MOV TEMP[11].x, TEMP[10].wwww 144: ENDIF 145: MOV TEMP[10].w, TEMP[11].xxxx 146: MOV TEMP[9], TEMP[10] 147: ABS TEMP[11].x, TEMP[7].zzzz 148: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 149: UIF TEMP[11].xxxx :0 150: MOV TEMP[11].x, CONST[87].zzzz 151: ELSE :0 152: MOV TEMP[11].x, TEMP[9].xxxx 153: ENDIF 154: MOV TEMP[10].x, TEMP[11].xxxx 155: ABS TEMP[11].x, TEMP[7].zzzz 156: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 157: UIF TEMP[11].xxxx :0 158: MOV TEMP[11].x, CONST[87].wwww 159: ELSE :0 160: MOV TEMP[11].x, TEMP[9].yyyy 161: ENDIF 162: MOV TEMP[10].y, TEMP[11].xxxx 163: ABS TEMP[11].x, TEMP[7].zzzz 164: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 165: UIF TEMP[11].xxxx :0 166: MOV TEMP[11].x, CONST[87].xxxx 167: ELSE :0 168: MOV TEMP[11].x, TEMP[9].zzzz 169: ENDIF 170: MOV TEMP[10].z, TEMP[11].xxxx 171: ABS TEMP[11].x, TEMP[7].zzzz 172: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 173: UIF TEMP[11].xxxx :0 174: MOV TEMP[11].x, CONST[87].yyyy 175: ELSE :0 176: MOV TEMP[11].x, TEMP[9].wwww 177: ENDIF 178: MOV TEMP[10].w, TEMP[11].xxxx 179: MAD TEMP[4].xy, TEMP[8].xyyy, TEMP[10].xyyy, TEMP[10].zwww 180: MOV TEMP[4].w, IMM[0].yyyy 181: ADD TEMP[7], TEMP[4], IMM[2].xxyy 182: TXL TEMP[8].x, TEMP[7], SAMP[1], SHADOW2D 183: MOV TEMP[7].x, TEMP[8].xxxx 184: ADD TEMP[9], TEMP[4], IMM[2].zxyy 185: ADD TEMP[8], TEMP[4], IMM[2].xzyy 186: ADD TEMP[11], TEMP[4], IMM[2].zzyy 187: TXL TEMP[12].x, TEMP[9], SAMP[1], SHADOW2D 188: MOV TEMP[7].y, TEMP[12].xxxx 189: TXL TEMP[12].x, TEMP[8], SAMP[1], SHADOW2D 190: MOV TEMP[7].z, TEMP[12].xxxx 191: TXL TEMP[12].x, TEMP[11], SAMP[1], SHADOW2D 192: MOV TEMP[7].w, TEMP[12].xxxx 193: DP4 TEMP[12].x, TEMP[7], IMM[1].wwww 194: ADD TEMP[7], TEMP[4], IMM[2].xyyy 195: TXL TEMP[13].x, TEMP[7], SAMP[1], SHADOW2D 196: MOV TEMP[7].x, TEMP[13].xxxx 197: ADD TEMP[9], TEMP[4], IMM[2].zyyy 198: TXL TEMP[13], TEMP[9], SAMP[1], SHADOW2D 199: MOV TEMP[9], TEMP[13] 200: ADD TEMP[8], TEMP[4], IMM[2].yzyy 201: TXL TEMP[14], TEMP[8], SAMP[1], SHADOW2D 202: MOV TEMP[8], TEMP[14] 203: ADD TEMP[11], TEMP[4], IMM[2].yxyy 204: TXL TEMP[15], TEMP[11], SAMP[1], SHADOW2D 205: MOV TEMP[11], TEMP[15] 206: MOV TEMP[7].y, TEMP[13].xxxx 207: MOV TEMP[7].z, TEMP[14].xxxx 208: MOV TEMP[7].w, TEMP[15].xxxx 209: DP4 TEMP[13].x, TEMP[7], IMM[2].wwww 210: MOV TEMP[14].xy, TEMP[4].xyyy 211: MOV TEMP[14].z, TEMP[5].xxxx 212: MOV TEMP[14].w, IMM[0].yyyy 213: TXL TEMP[14], TEMP[14], SAMP[1], SHADOW2D 214: MOV TEMP[7].xyz, TEMP[14] 215: ADD TEMP[2].x, TEMP[13].xxxx, TEMP[12].xxxx 216: MAD TEMP[2].x, TEMP[14].xxxx, IMM[3].xxxx, TEMP[2].xxxx 217: FSLT TEMP[12].x, TEMP[6].xxxx, IMM[0].xxxx 218: UIF TEMP[12].xxxx :0 219: ADD TEMP[7].xyz, TEMP[2].zzzz, IMM[3].yzww 220: ABS TEMP[12].x, TEMP[7].xxxx 221: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 222: UIF TEMP[12].xxxx :0 223: MOV TEMP[12].x, CONST[73].xxxx 224: ELSE :0 225: MOV TEMP[12].x, IMM[0].yyyy 226: ENDIF 227: MOV TEMP[10].x, TEMP[12].xxxx 228: ABS TEMP[12].x, TEMP[7].xxxx 229: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 230: UIF TEMP[12].xxxx :0 231: MOV TEMP[12].x, CONST[73].yyyy 232: ELSE :0 233: MOV TEMP[12].x, IMM[0].yyyy 234: ENDIF 235: MOV TEMP[10].y, TEMP[12].xxxx 236: ABS TEMP[12].x, TEMP[7].xxxx 237: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 238: UIF TEMP[12].xxxx :0 239: MOV TEMP[12].x, CONST[73].zzzz 240: ELSE :0 241: MOV TEMP[12].x, IMM[0].yyyy 242: ENDIF 243: MOV TEMP[10].z, TEMP[12].xxxx 244: ABS TEMP[12].x, TEMP[7].xxxx 245: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 246: UIF TEMP[12].xxxx :0 247: MOV TEMP[12].x, CONST[73].wwww 248: ELSE :0 249: MOV TEMP[12].x, IMM[0].yyyy 250: ENDIF 251: MOV TEMP[10].w, TEMP[12].xxxx 252: MOV TEMP[9], TEMP[10] 253: ABS TEMP[12].x, TEMP[7].xxxx 254: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 255: UIF TEMP[12].xxxx :0 256: MOV TEMP[12].x, CONST[74].xxxx 257: ELSE :0 258: MOV TEMP[12].x, IMM[0].yyyy 259: ENDIF 260: MOV TEMP[10].x, TEMP[12].xxxx 261: ABS TEMP[12].x, TEMP[7].xxxx 262: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 263: UIF TEMP[12].xxxx :0 264: MOV TEMP[12].x, CONST[74].yyyy 265: ELSE :0 266: MOV TEMP[12].x, IMM[0].yyyy 267: ENDIF 268: MOV TEMP[10].y, TEMP[12].xxxx 269: ABS TEMP[12].x, TEMP[7].xxxx 270: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 271: UIF TEMP[12].xxxx :0 272: MOV TEMP[12].x, CONST[74].zzzz 273: ELSE :0 274: MOV TEMP[12].x, IMM[0].yyyy 275: ENDIF 276: MOV TEMP[10].z, TEMP[12].xxxx 277: ABS TEMP[12].x, TEMP[7].xxxx 278: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 279: UIF TEMP[12].xxxx :0 280: MOV TEMP[12].x, CONST[74].wwww 281: ELSE :0 282: MOV TEMP[12].x, IMM[0].yyyy 283: ENDIF 284: MOV TEMP[10].w, TEMP[12].xxxx 285: MOV TEMP[8], TEMP[10] 286: ABS TEMP[12].x, TEMP[7].yyyy 287: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 288: UIF TEMP[12].xxxx :0 289: MOV TEMP[12].x, CONST[77].xxxx 290: ELSE :0 291: MOV TEMP[12].x, TEMP[9].xxxx 292: ENDIF 293: MOV TEMP[10].x, TEMP[12].xxxx 294: ABS TEMP[12].x, TEMP[7].yyyy 295: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 296: UIF TEMP[12].xxxx :0 297: MOV TEMP[12].x, CONST[77].yyyy 298: ELSE :0 299: MOV TEMP[12].x, TEMP[9].yyyy 300: ENDIF 301: MOV TEMP[10].y, TEMP[12].xxxx 302: ABS TEMP[12].x, TEMP[7].yyyy 303: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 304: UIF TEMP[12].xxxx :0 305: MOV TEMP[12].x, CONST[77].zzzz 306: ELSE :0 307: MOV TEMP[12].x, TEMP[9].zzzz 308: ENDIF 309: MOV TEMP[10].z, TEMP[12].xxxx 310: ABS TEMP[12].x, TEMP[7].yyyy 311: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 312: UIF TEMP[12].xxxx :0 313: MOV TEMP[12].x, CONST[77].wwww 314: ELSE :0 315: MOV TEMP[12].x, TEMP[9].wwww 316: ENDIF 317: MOV TEMP[10].w, TEMP[12].xxxx 318: MOV TEMP[9], TEMP[10] 319: ABS TEMP[12].x, TEMP[7].yyyy 320: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 321: UIF TEMP[12].xxxx :0 322: MOV TEMP[12].x, CONST[78].xxxx 323: ELSE :0 324: MOV TEMP[12].x, TEMP[8].xxxx 325: ENDIF 326: MOV TEMP[10].x, TEMP[12].xxxx 327: ABS TEMP[12].x, TEMP[7].yyyy 328: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 329: UIF TEMP[12].xxxx :0 330: MOV TEMP[12].x, CONST[78].yyyy 331: ELSE :0 332: MOV TEMP[12].x, TEMP[8].yyyy 333: ENDIF 334: MOV TEMP[10].y, TEMP[12].xxxx 335: ABS TEMP[12].x, TEMP[7].yyyy 336: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 337: UIF TEMP[12].xxxx :0 338: MOV TEMP[12].x, CONST[78].zzzz 339: ELSE :0 340: MOV TEMP[12].x, TEMP[8].zzzz 341: ENDIF 342: MOV TEMP[10].z, TEMP[12].xxxx 343: ABS TEMP[12].x, TEMP[7].yyyy 344: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 345: UIF TEMP[12].xxxx :0 346: MOV TEMP[12].x, CONST[78].wwww 347: ELSE :0 348: MOV TEMP[12].x, TEMP[8].wwww 349: ENDIF 350: MOV TEMP[10].w, TEMP[12].xxxx 351: MOV TEMP[8], TEMP[10] 352: ABS TEMP[12].x, TEMP[7].zzzz 353: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 354: UIF TEMP[12].xxxx :0 355: MOV TEMP[12].x, CONST[81].xxxx 356: ELSE :0 357: MOV TEMP[12].x, TEMP[9].xxxx 358: ENDIF 359: MOV TEMP[10].x, TEMP[12].xxxx 360: ABS TEMP[12].x, TEMP[7].zzzz 361: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 362: UIF TEMP[12].xxxx :0 363: MOV TEMP[12].x, CONST[81].yyyy 364: ELSE :0 365: MOV TEMP[12].x, TEMP[9].yyyy 366: ENDIF 367: MOV TEMP[10].y, TEMP[12].xxxx 368: ABS TEMP[12].x, TEMP[7].zzzz 369: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 370: UIF TEMP[12].xxxx :0 371: MOV TEMP[12].x, CONST[81].zzzz 372: ELSE :0 373: MOV TEMP[12].x, TEMP[9].zzzz 374: ENDIF 375: MOV TEMP[10].z, TEMP[12].xxxx 376: ABS TEMP[12].x, TEMP[7].zzzz 377: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 378: UIF TEMP[12].xxxx :0 379: MOV TEMP[12].x, CONST[81].wwww 380: ELSE :0 381: MOV TEMP[12].x, TEMP[9].wwww 382: ENDIF 383: MOV TEMP[10].w, TEMP[12].xxxx 384: MOV TEMP[9], TEMP[10] 385: ABS TEMP[12].x, TEMP[7].zzzz 386: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 387: UIF TEMP[12].xxxx :0 388: MOV TEMP[12].x, CONST[82].xxxx 389: ELSE :0 390: MOV TEMP[12].x, TEMP[8].xxxx 391: ENDIF 392: MOV TEMP[10].x, TEMP[12].xxxx 393: ABS TEMP[12].x, TEMP[7].zzzz 394: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 395: UIF TEMP[12].xxxx :0 396: MOV TEMP[12].x, CONST[82].yyyy 397: ELSE :0 398: MOV TEMP[12].x, TEMP[8].yyyy 399: ENDIF 400: MOV TEMP[10].y, TEMP[12].xxxx 401: ABS TEMP[12].x, TEMP[7].zzzz 402: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 403: UIF TEMP[12].xxxx :0 404: MOV TEMP[12].x, CONST[82].zzzz 405: ELSE :0 406: MOV TEMP[12].x, TEMP[8].zzzz 407: ENDIF 408: MOV TEMP[10].z, TEMP[12].xxxx 409: ABS TEMP[12].x, TEMP[7].zzzz 410: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 411: UIF TEMP[12].xxxx :0 412: MOV TEMP[12].x, CONST[82].wwww 413: ELSE :0 414: MOV TEMP[12].x, TEMP[8].wwww 415: ENDIF 416: MOV TEMP[10].w, TEMP[12].xxxx 417: DP4 TEMP[9].x, TEMP[1], TEMP[9] 418: MOV_SAT TEMP[9].x, TEMP[9].xxxx 419: DP4 TEMP[12].x, TEMP[1], TEMP[10] 420: MOV_SAT TEMP[12].x, TEMP[12].xxxx 421: MOV TEMP[9].y, TEMP[12].xxxx 422: ABS TEMP[12].x, TEMP[7].xxxx 423: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 424: UIF TEMP[12].xxxx :0 425: MOV TEMP[12].x, CONST[86].zzzz 426: ELSE :0 427: MOV TEMP[12].x, IMM[0].yyyy 428: ENDIF 429: ABS TEMP[13].x, TEMP[7].xxxx 430: FSGE TEMP[13].x, -TEMP[13].xxxx, IMM[0].yyyy 431: UIF TEMP[13].xxxx :0 432: MOV TEMP[13].x, CONST[86].wwww 433: ELSE :0 434: MOV TEMP[13].x, IMM[0].yyyy 435: ENDIF 436: MOV TEMP[10].y, TEMP[13].xxxx 437: ABS TEMP[13].x, TEMP[7].xxxx 438: FSGE TEMP[13].x, -TEMP[13].xxxx, IMM[0].yyyy 439: UIF TEMP[13].xxxx :0 440: MOV TEMP[13].x, CONST[86].xxxx 441: ELSE :0 442: MOV TEMP[13].x, IMM[0].yyyy 443: ENDIF 444: MOV TEMP[10].z, TEMP[13].xxxx 445: ABS TEMP[13].x, TEMP[7].xxxx 446: FSGE TEMP[13].x, -TEMP[13].xxxx, IMM[0].yyyy 447: UIF TEMP[13].xxxx :0 448: MOV TEMP[13].x, CONST[86].yyyy 449: ELSE :0 450: MOV TEMP[13].x, IMM[0].yyyy 451: ENDIF 452: MOV TEMP[10].w, TEMP[13].xxxx 453: ABS TEMP[13].x, TEMP[7].yyyy 454: FSGE TEMP[13].x, -TEMP[13].xxxx, IMM[0].yyyy 455: UIF TEMP[13].xxxx :0 456: MOV TEMP[13].x, CONST[87].zzzz 457: ELSE :0 458: MOV TEMP[13].x, TEMP[12].xxxx 459: ENDIF 460: ABS TEMP[12].x, TEMP[7].yyyy 461: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 462: UIF TEMP[12].xxxx :0 463: MOV TEMP[12].x, CONST[87].wwww 464: ELSE :0 465: MOV TEMP[12].x, TEMP[10].yyyy 466: ENDIF 467: MOV TEMP[10].y, TEMP[12].xxxx 468: ABS TEMP[12].x, TEMP[7].yyyy 469: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 470: UIF TEMP[12].xxxx :0 471: MOV TEMP[12].x, CONST[87].xxxx 472: ELSE :0 473: MOV TEMP[12].x, TEMP[10].zzzz 474: ENDIF 475: MOV TEMP[10].z, TEMP[12].xxxx 476: ABS TEMP[12].x, TEMP[7].yyyy 477: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 478: UIF TEMP[12].xxxx :0 479: MOV TEMP[12].x, CONST[87].yyyy 480: ELSE :0 481: MOV TEMP[12].x, TEMP[10].wwww 482: ENDIF 483: MOV TEMP[10].w, TEMP[12].xxxx 484: ABS TEMP[12].x, TEMP[7].zzzz 485: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 486: UIF TEMP[12].xxxx :0 487: MOV TEMP[12].x, CONST[88].zzzz 488: ELSE :0 489: MOV TEMP[12].x, TEMP[13].xxxx 490: ENDIF 491: MOV TEMP[10].x, TEMP[12].xxxx 492: ABS TEMP[12].x, TEMP[7].zzzz 493: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 494: UIF TEMP[12].xxxx :0 495: MOV TEMP[12].x, CONST[88].wwww 496: ELSE :0 497: MOV TEMP[12].x, TEMP[10].yyyy 498: ENDIF 499: MOV TEMP[10].y, TEMP[12].xxxx 500: ABS TEMP[12].x, TEMP[7].zzzz 501: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 502: UIF TEMP[12].xxxx :0 503: MOV TEMP[12].x, CONST[88].xxxx 504: ELSE :0 505: MOV TEMP[12].x, TEMP[10].zzzz 506: ENDIF 507: MOV TEMP[10].z, TEMP[12].xxxx 508: ABS TEMP[12].x, TEMP[7].zzzz 509: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 510: UIF TEMP[12].xxxx :0 511: MOV TEMP[12].x, CONST[88].yyyy 512: ELSE :0 513: MOV TEMP[12].x, TEMP[10].wwww 514: ENDIF 515: MOV TEMP[10].w, TEMP[12].xxxx 516: MAD TEMP[4].xy, TEMP[9].xyyy, TEMP[10].xyyy, TEMP[10].zwww 517: ADD TEMP[1], TEMP[4], IMM[2].xxyy 518: TXL TEMP[10].x, TEMP[1], SAMP[1], SHADOW2D 519: MOV TEMP[1].x, TEMP[10].xxxx 520: ADD TEMP[3], TEMP[4], IMM[2].zxyy 521: ADD TEMP[9], TEMP[4], IMM[2].xzyy 522: ADD TEMP[8], TEMP[4], IMM[2].zzyy 523: TXL TEMP[10].x, TEMP[3], SAMP[1], SHADOW2D 524: MOV TEMP[1].y, TEMP[10].xxxx 525: TXL TEMP[10].x, TEMP[9], SAMP[1], SHADOW2D 526: MOV TEMP[1].z, TEMP[10].xxxx 527: TXL TEMP[10].x, TEMP[8], SAMP[1], SHADOW2D 528: MOV TEMP[1].w, TEMP[10].xxxx 529: DP4 TEMP[10].x, TEMP[1], IMM[1].wwww 530: ADD TEMP[3], TEMP[4], IMM[2].xyyy 531: TXL TEMP[12].x, TEMP[3], SAMP[1], SHADOW2D 532: MOV TEMP[3].x, TEMP[12].xxxx 533: ADD TEMP[9], TEMP[4], IMM[2].zyyy 534: TXL TEMP[9].x, TEMP[9], SAMP[1], SHADOW2D 535: ADD TEMP[8], TEMP[4], IMM[2].yzyy 536: TXL TEMP[8].x, TEMP[8], SAMP[1], SHADOW2D 537: ADD TEMP[11], TEMP[4], IMM[2].yxyy 538: TXL TEMP[11].x, TEMP[11], SAMP[1], SHADOW2D 539: MOV TEMP[3].y, TEMP[9].xxxx 540: MOV TEMP[3].z, TEMP[8].xxxx 541: MOV TEMP[3].w, TEMP[11].xxxx 542: DP4 TEMP[3].x, TEMP[3], IMM[2].wwww 543: MOV TEMP[8].xy, TEMP[4].xyyy 544: MOV TEMP[8].z, TEMP[5].xxxx 545: MOV TEMP[8].w, IMM[0].yyyy 546: TXL TEMP[5].x, TEMP[8], SAMP[1], SHADOW2D 547: ADD TEMP[1].x, TEMP[3].xxxx, TEMP[10].xxxx 548: MAD TEMP[1].x, TEMP[5].xxxx, IMM[3].xxxx, TEMP[1].xxxx 549: FSGE TEMP[3].x, TEMP[7].zzzz, IMM[0].yyyy 550: UIF TEMP[3].xxxx :0 551: MOV TEMP[3].x, IMM[0].xxxx 552: ELSE :0 553: MOV TEMP[3].x, TEMP[1].xxxx 554: ENDIF 555: LRP TEMP[4].x, TEMP[6].xxxx, TEMP[2].xxxx, TEMP[3].xxxx 556: MOV TEMP[2].x, TEMP[4].xxxx 557: ENDIF 558: ADD TEMP[1].xyz, -CONST[89].xyzz, IN[3].xyzz 559: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz 560: MAD TEMP[1].x, TEMP[3].xxxx, CONST[68].yyyy, CONST[68].xxxx 561: MOV_SAT TEMP[3].x, TEMP[1].xxxx 562: LRP TEMP[4].x, TEMP[3].xxxx, IMM[0].xxxx, TEMP[2].xxxx 563: ELSE :0 564: MOV TEMP[4].x, IMM[0].xxxx 565: ENDIF 566: MAD TEMP[1].xyz, IN[1].xyzz, TEMP[4].xxxx, IN[2].xyzz 567: ADD TEMP[3].x, TEMP[0].wwww, IMM[1].yyyy 568: MAD TEMP[3].x, CONST[20].wwww, TEMP[3].xxxx, IMM[0].xxxx 569: ADD TEMP[4].x, TEMP[0].wwww, CONST[12].xxxx 570: ADD TEMP[2].xyz, IMM[1].yyyy, CONST[1].xyzz 571: MOV_SAT TEMP[4].x, TEMP[4].xxxx 572: MAD TEMP[2].xyz, TEMP[4].xxxx, TEMP[2].xyzz, IMM[0].xxxx 573: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz 574: MUL TEMP[2].x, TEMP[3].xxxx, CONST[1].wwww 575: MAD TEMP[3].x, TEMP[2].xxxx, IN[2].wwww, -TEMP[2].xxxx 576: MAD TEMP[2].x, CONST[12].wwww, TEMP[3].xxxx, TEMP[2].xxxx 577: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz 578: ADD TEMP[1].xyz, CONST[20].xyzz, -IN[3].xyzz 579: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 580: SQRT TEMP[1].x, TEMP[1].xxxx 581: MAD TEMP[1].x, TEMP[1].xxxx, CONST[21].wwww, CONST[21].xxxx 582: MOV_SAT TEMP[1].x, TEMP[1].xxxx 583: MIN TEMP[1].x, TEMP[1].xxxx, CONST[21].zzzz 584: ABS TEMP[3].x, CONST[12].yyyy 585: MUL TEMP[4].xyz, TEMP[0].xyzz, CONST[30].xxxx 586: MUL TEMP[5].x, CONST[29].wwww, IN[3].wwww 587: FSGE TEMP[3].x, -TEMP[3].xxxx, IMM[0].yyyy 588: UIF TEMP[3].xxxx :0 589: MOV TEMP[3].x, TEMP[2].xxxx 590: ELSE :0 591: MOV TEMP[3].x, TEMP[5].xxxx 592: ENDIF 593: MOV TEMP[2].w, TEMP[3].xxxx 594: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx 595: MAD TEMP[0].xyz, TEMP[0].xyzz, -CONST[30].xxxx, CONST[29].xyzz 596: MAD TEMP[2].xyz, TEMP[1].xxxx, TEMP[0].xyzz, TEMP[4].xyzz 597: MOV OUT[0], TEMP[2] 598: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 192) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 196) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 204) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 324) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 328) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 332) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 336) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 344) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 348) %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 464) %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 468) %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 472) %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 476) %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 480) %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1080) %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1084) %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1088) %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1092) %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1168) %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1172) %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1176) %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1180) %52 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1184) %53 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1188) %54 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1192) %55 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1196) %56 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1232) %57 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1236) %58 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1240) %59 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1244) %60 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1248) %61 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1252) %62 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1256) %63 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1260) %64 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1296) %65 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1300) %66 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1304) %67 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1308) %68 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1312) %69 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1316) %70 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1320) %71 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1324) %72 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1376) %73 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1380) %74 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1384) %75 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1388) %76 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1392) %77 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1396) %78 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1400) %79 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1404) %80 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1408) %81 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1412) %82 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1416) %83 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1420) %84 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1424) %85 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1428) %86 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1432) %87 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1440) %88 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %89 = load <8 x i32>, <8 x i32> addrspace(2)* %88, align 32, !tbaa !0 %90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %91 = load <4 x i32>, <4 x i32> addrspace(2)* %90, align 16, !tbaa !0 %92 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %93 = load <8 x i32>, <8 x i32> addrspace(2)* %92, align 32, !tbaa !0 %94 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %95 = load <4 x i32>, <4 x i32> addrspace(2)* %94, align 16, !tbaa !0 %96 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %97 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %98 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %99 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %100 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %101 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %102 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %103 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %104 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %105 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %106 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %107 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) %108 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) %109 = bitcast float %96 to i32 %110 = bitcast float %97 to i32 %111 = insertelement <2 x i32> undef, i32 %109, i32 0 %112 = insertelement <2 x i32> %111, i32 %110, i32 1 %113 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %112, <8 x i32> %89, <4 x i32> %91, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %114 = extractelement <4 x float> %113, i32 0 %115 = extractelement <4 x float> %113, i32 1 %116 = extractelement <4 x float> %113, i32 2 %117 = extractelement <4 x float> %113, i32 3 %118 = bitcast float %87 to i32 %119 = icmp eq i32 %118, 0 br i1 %119, label %ENDIF, label %IF IF: ; preds = %main_body %120 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1372) %121 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1368) %122 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1364) %123 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1360) %124 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1148) %125 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1144) %126 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1140) %127 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1136) %128 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1132) %129 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1128) %130 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1124) %131 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1120) %132 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1116) %133 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1112) %134 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1108) %135 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1104) %136 = fadd float %105, 0.000000e+00 %137 = fadd float %106, 0.000000e+00 %138 = fadd float %107, 0.000000e+00 %139 = fmul float %105, 0.000000e+00 %140 = fadd float %139, 1.000000e+00 %141 = fmul float %136, %135 %142 = fmul float %137, %134 %143 = fadd float %141, %142 %144 = fmul float %138, %133 %145 = fadd float %143, %144 %146 = fmul float %140, %132 %147 = fadd float %145, %146 %148 = fmul float %136, %131 %149 = fmul float %137, %130 %150 = fadd float %148, %149 %151 = fmul float %138, %129 %152 = fadd float %150, %151 %153 = fmul float %140, %128 %154 = fadd float %152, %153 %155 = call float @llvm.AMDIL.clamp.(float %147, float 0.000000e+00, float 1.000000e+00) %156 = call float @llvm.AMDIL.clamp.(float %154, float 0.000000e+00, float 1.000000e+00) %157 = fsub float %155, %147 %158 = fsub float %156, %154 %159 = fadd float %157, %158 %160 = fmul float %136, %48 %161 = fmul float %137, %49 %162 = fadd float %160, %161 %163 = fmul float %138, %50 %164 = fadd float %162, %163 %165 = fmul float %140, %51 %166 = fadd float %164, %165 %167 = fmul float %136, %52 %168 = fmul float %137, %53 %169 = fadd float %167, %168 %170 = fmul float %138, %54 %171 = fadd float %169, %170 %172 = fmul float %140, %55 %173 = fadd float %171, %172 %174 = call float @llvm.AMDIL.clamp.(float %166, float 0.000000e+00, float 1.000000e+00) %175 = call float @llvm.AMDIL.clamp.(float %173, float 0.000000e+00, float 1.000000e+00) %176 = fsub float %174, %166 %177 = fsub float %175, %173 %178 = fadd float %176, %177 %179 = fmul float %136, %56 %180 = fmul float %137, %57 %181 = fadd float %179, %180 %182 = fmul float %138, %58 %183 = fadd float %181, %182 %184 = fmul float %140, %59 %185 = fadd float %183, %184 %186 = fmul float %136, %60 %187 = fmul float %137, %61 %188 = fadd float %186, %187 %189 = fmul float %138, %62 %190 = fadd float %188, %189 %191 = fmul float %140, %63 %192 = fadd float %190, %191 %193 = call float @llvm.fabs.f32(float %178) %194 = fcmp ole float %193, -0.000000e+00 %. = select i1 %194, float %166, float %185 %195 = call float @llvm.fabs.f32(float %178) %196 = fcmp ole float %195, -0.000000e+00 %temp24.0 = select i1 %196, float %173, float %192 %197 = call float @llvm.fabs.f32(float %178) %198 = fcmp ole float %197, -0.000000e+00 %.236 = select i1 %198, float 1.000000e+00, float 2.000000e+00 %199 = call float @llvm.fabs.f32(float %159) %200 = fcmp ole float %199, -0.000000e+00 %temp24.2 = select i1 %200, float %147, float %. %201 = call float @llvm.fabs.f32(float %159) %202 = fcmp ole float %201, -0.000000e+00 %.temp24.0 = select i1 %202, float %154, float %temp24.0 %203 = call float @llvm.fabs.f32(float %159) %204 = fcmp ole float %203, -0.000000e+00 %temp12.1 = select i1 %204, float 0.000000e+00, float %.236 %205 = fmul float %136, %127 %206 = fmul float %137, %126 %207 = fadd float %205, %206 %208 = fmul float %138, %125 %209 = fadd float %207, %208 %210 = fmul float %140, %124 %211 = fadd float %209, %210 %212 = fadd float %temp24.2, -5.000000e-01 %213 = fadd float %.temp24.0, -5.000000e-01 %214 = call float @llvm.fabs.f32(float %212) %215 = call float @llvm.fabs.f32(float %213) %216 = fsub float %214, %44 %217 = fsub float %215, %44 %218 = fmul float %216, %45 %219 = fmul float %217, %45 %220 = call float @llvm.AMDIL.clamp.(float %218, float 0.000000e+00, float 1.000000e+00) %221 = call float @llvm.AMDIL.clamp.(float %219, float 0.000000e+00, float 1.000000e+00) %222 = fsub float 1.000000e+00, %220 %223 = fsub float 1.000000e+00, %221 %224 = fmul float %223, %222 %225 = call float @llvm.AMDIL.clamp.(float %temp24.2, float 0.000000e+00, float 1.000000e+00) %226 = call float @llvm.AMDIL.clamp.(float %.temp24.0, float 0.000000e+00, float 1.000000e+00) %227 = fadd float %temp12.1, -1.000000e+00 %228 = fadd float %temp12.1, -2.000000e+00 %229 = call float @llvm.fabs.f32(float %temp12.1) %230 = fcmp ole float %229, -0.000000e+00 %.237 = select i1 %230, float %121, float 0.000000e+00 %231 = call float @llvm.fabs.f32(float %temp12.1) %232 = fcmp ole float %231, -0.000000e+00 %temp48.0 = select i1 %232, float %120, float 0.000000e+00 %233 = call float @llvm.fabs.f32(float %temp12.1) %234 = fcmp ole float %233, -0.000000e+00 %.238 = select i1 %234, float %123, float 0.000000e+00 %235 = call float @llvm.fabs.f32(float %temp12.1) %236 = fcmp ole float %235, -0.000000e+00 %temp48.2 = select i1 %236, float %122, float 0.000000e+00 %237 = call float @llvm.fabs.f32(float %227) %238 = fcmp ole float %237, -0.000000e+00 %..237 = select i1 %238, float %74, float %.237 %239 = call float @llvm.fabs.f32(float %227) %240 = fcmp ole float %239, -0.000000e+00 %temp44.2 = select i1 %240, float %75, float %temp48.0 %241 = call float @llvm.fabs.f32(float %227) %242 = fcmp ole float %241, -0.000000e+00 %..238 = select i1 %242, float %72, float %.238 %243 = call float @llvm.fabs.f32(float %227) %244 = fcmp ole float %243, -0.000000e+00 %temp44.4 = select i1 %244, float %73, float %temp48.2 %245 = call float @llvm.fabs.f32(float %228) %246 = fcmp ole float %245, -0.000000e+00 %...237 = select i1 %246, float %78, float %..237 %247 = call float @llvm.fabs.f32(float %228) %248 = fcmp ole float %247, -0.000000e+00 %temp44.6 = select i1 %248, float %79, float %temp44.2 %249 = call float @llvm.fabs.f32(float %228) %250 = fcmp ole float %249, -0.000000e+00 %...238 = select i1 %250, float %76, float %..238 %251 = call float @llvm.fabs.f32(float %228) %252 = fcmp ole float %251, -0.000000e+00 %temp44.8 = select i1 %252, float %77, float %temp44.4 %253 = fmul float %225, %...237 %254 = fadd float %253, %...238 %255 = fmul float %226, %temp44.6 %256 = fadd float %255, %temp44.8 %257 = fadd float %254, 0x3F40000000000000 %258 = fadd float %256, 0x3F40000000000000 %259 = fadd float %211, 0.000000e+00 %260 = bitcast float %259 to i32 %261 = bitcast float %257 to i32 %262 = bitcast float %258 to i32 %263 = insertelement <4 x i32> undef, i32 %260, i32 0 %264 = insertelement <4 x i32> %263, i32 %261, i32 1 %265 = insertelement <4 x i32> %264, i32 %262, i32 2 %266 = insertelement <4 x i32> %265, i32 0, i32 3 %267 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %266, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %268 = extractelement <4 x float> %267, i32 0 %269 = fadd float %254, 0xBF40000000000000 %270 = fadd float %256, 0x3F40000000000000 %271 = fadd float %211, 0.000000e+00 %272 = fadd float %254, 0x3F40000000000000 %273 = fadd float %256, 0xBF40000000000000 %274 = fadd float %211, 0.000000e+00 %275 = fadd float %254, 0xBF40000000000000 %276 = fadd float %256, 0xBF40000000000000 %277 = fadd float %211, 0.000000e+00 %278 = bitcast float %271 to i32 %279 = bitcast float %269 to i32 %280 = bitcast float %270 to i32 %281 = insertelement <4 x i32> undef, i32 %278, i32 0 %282 = insertelement <4 x i32> %281, i32 %279, i32 1 %283 = insertelement <4 x i32> %282, i32 %280, i32 2 %284 = insertelement <4 x i32> %283, i32 0, i32 3 %285 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %284, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %286 = extractelement <4 x float> %285, i32 0 %287 = bitcast float %274 to i32 %288 = bitcast float %272 to i32 %289 = bitcast float %273 to i32 %290 = insertelement <4 x i32> undef, i32 %287, i32 0 %291 = insertelement <4 x i32> %290, i32 %288, i32 1 %292 = insertelement <4 x i32> %291, i32 %289, i32 2 %293 = insertelement <4 x i32> %292, i32 0, i32 3 %294 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %293, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %295 = extractelement <4 x float> %294, i32 0 %296 = bitcast float %277 to i32 %297 = bitcast float %275 to i32 %298 = bitcast float %276 to i32 %299 = insertelement <4 x i32> undef, i32 %296, i32 0 %300 = insertelement <4 x i32> %299, i32 %297, i32 1 %301 = insertelement <4 x i32> %300, i32 %298, i32 2 %302 = insertelement <4 x i32> %301, i32 0, i32 3 %303 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %302, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %304 = extractelement <4 x float> %303, i32 0 %305 = fmul float %268, 6.250000e-02 %306 = fmul float %286, 6.250000e-02 %307 = fadd float %305, %306 %308 = fmul float %295, 6.250000e-02 %309 = fadd float %307, %308 %310 = fmul float %304, 6.250000e-02 %311 = fadd float %309, %310 %312 = fadd float %254, 0x3F40000000000000 %313 = fadd float %256, 0.000000e+00 %314 = fadd float %211, 0.000000e+00 %315 = bitcast float %314 to i32 %316 = bitcast float %312 to i32 %317 = bitcast float %313 to i32 %318 = insertelement <4 x i32> undef, i32 %315, i32 0 %319 = insertelement <4 x i32> %318, i32 %316, i32 1 %320 = insertelement <4 x i32> %319, i32 %317, i32 2 %321 = insertelement <4 x i32> %320, i32 0, i32 3 %322 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %321, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %323 = extractelement <4 x float> %322, i32 0 %324 = fadd float %254, 0xBF40000000000000 %325 = fadd float %256, 0.000000e+00 %326 = fadd float %211, 0.000000e+00 %327 = bitcast float %326 to i32 %328 = bitcast float %324 to i32 %329 = bitcast float %325 to i32 %330 = insertelement <4 x i32> undef, i32 %327, i32 0 %331 = insertelement <4 x i32> %330, i32 %328, i32 1 %332 = insertelement <4 x i32> %331, i32 %329, i32 2 %333 = insertelement <4 x i32> %332, i32 0, i32 3 %334 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %333, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %335 = extractelement <4 x float> %334, i32 0 %336 = fadd float %254, 0.000000e+00 %337 = fadd float %256, 0xBF40000000000000 %338 = fadd float %211, 0.000000e+00 %339 = bitcast float %338 to i32 %340 = bitcast float %336 to i32 %341 = bitcast float %337 to i32 %342 = insertelement <4 x i32> undef, i32 %339, i32 0 %343 = insertelement <4 x i32> %342, i32 %340, i32 1 %344 = insertelement <4 x i32> %343, i32 %341, i32 2 %345 = insertelement <4 x i32> %344, i32 0, i32 3 %346 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %345, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %347 = extractelement <4 x float> %346, i32 0 %348 = fadd float %254, 0.000000e+00 %349 = fadd float %256, 0x3F40000000000000 %350 = fadd float %211, 0.000000e+00 %351 = bitcast float %350 to i32 %352 = bitcast float %348 to i32 %353 = bitcast float %349 to i32 %354 = insertelement <4 x i32> undef, i32 %351, i32 0 %355 = insertelement <4 x i32> %354, i32 %352, i32 1 %356 = insertelement <4 x i32> %355, i32 %353, i32 2 %357 = insertelement <4 x i32> %356, i32 0, i32 3 %358 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %357, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %359 = extractelement <4 x float> %358, i32 0 %360 = fmul float %323, 1.250000e-01 %361 = fmul float %335, 1.250000e-01 %362 = fadd float %360, %361 %363 = fmul float %347, 1.250000e-01 %364 = fadd float %362, %363 %365 = fmul float %359, 1.250000e-01 %366 = fadd float %364, %365 %367 = bitcast float %211 to i32 %368 = bitcast float %254 to i32 %369 = bitcast float %256 to i32 %370 = insertelement <4 x i32> undef, i32 %367, i32 0 %371 = insertelement <4 x i32> %370, i32 %368, i32 1 %372 = insertelement <4 x i32> %371, i32 %369, i32 2 %373 = insertelement <4 x i32> %372, i32 0, i32 3 %374 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %373, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %375 = extractelement <4 x float> %374, i32 0 %376 = fadd float %366, %311 %377 = fmul float %375, 2.500000e-01 %378 = fadd float %377, %376 %379 = fcmp olt float %224, 1.000000e+00 br i1 %379, label %IF119, label %ENDIF118 ENDIF: ; preds = %main_body, %ENDIF118 %temp16.0 = phi float [ %682, %ENDIF118 ], [ 1.000000e+00, %main_body ] %380 = fmul float %98, %temp16.0 %381 = fadd float %380, %101 %382 = fmul float %99, %temp16.0 %383 = fadd float %382, %102 %384 = fmul float %100, %temp16.0 %385 = fadd float %384, %103 %386 = fadd float %117, -1.000000e+00 %387 = fmul float %35, %386 %388 = fadd float %387, 1.000000e+00 %389 = fadd float %117, %29 %390 = fadd float %25, -1.000000e+00 %391 = fadd float %26, -1.000000e+00 %392 = fadd float %27, -1.000000e+00 %393 = call float @llvm.AMDIL.clamp.(float %389, float 0.000000e+00, float 1.000000e+00) %394 = fmul float %393, %390 %395 = fadd float %394, 1.000000e+00 %396 = fmul float %393, %391 %397 = fadd float %396, 1.000000e+00 %398 = fmul float %393, %392 %399 = fadd float %398, 1.000000e+00 %400 = fmul float %381, %395 %401 = fmul float %383, %397 %402 = fmul float %385, %399 %403 = fmul float %388, %28 %404 = fmul float %403, %104 %405 = fsub float %404, %403 %406 = fmul float %31, %405 %407 = fadd float %406, %403 %408 = fmul float %114, %400 %409 = fmul float %115, %401 %410 = fmul float %116, %402 %411 = fsub float %32, %105 %412 = fsub float %33, %106 %413 = fsub float %34, %107 %414 = fmul float %411, %411 %415 = fmul float %412, %412 %416 = fadd float %415, %414 %417 = fmul float %413, %413 %418 = fadd float %416, %417 %419 = call float @llvm.sqrt.f32(float %418) %420 = fmul float %419, %38 %421 = fadd float %420, %36 %422 = call float @llvm.AMDIL.clamp.(float %421, float 0.000000e+00, float 1.000000e+00) %423 = call float @llvm.minnum.f32(float %422, float %37) %424 = call float @llvm.fabs.f32(float %30) %425 = fmul float %408, %43 %426 = fmul float %409, %43 %427 = fmul float %410, %43 %428 = fmul float %42, %108 %429 = fcmp ole float %424, -0.000000e+00 %.235 = select i1 %429, float %407, float %428 %430 = fmul float %423, %423 %431 = fmul float %43, %408 %432 = fsub float %39, %431 %433 = fmul float %43, %409 %434 = fsub float %40, %433 %435 = fmul float %43, %410 %436 = fsub float %41, %435 %437 = fmul float %430, %432 %438 = fadd float %437, %425 %439 = fmul float %430, %434 %440 = fadd float %439, %426 %441 = fmul float %430, %436 %442 = fadd float %441, %427 %443 = call i32 @llvm.SI.packf16(float %438, float %440) %444 = bitcast i32 %443 to float %445 = call i32 @llvm.SI.packf16(float %442, float %.235) %446 = bitcast i32 %445 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %444, float %446, float %444, float %446) ret void IF119: ; preds = %IF %447 = fadd float %temp12.1, 0.000000e+00 %448 = fadd float %temp12.1, -1.000000e+00 %449 = fadd float %temp12.1, -2.000000e+00 %450 = call float @llvm.fabs.f32(float %447) %451 = fcmp ole float %450, -0.000000e+00 %.239 = select i1 %451, float %48, float 0.000000e+00 %452 = call float @llvm.fabs.f32(float %447) %453 = fcmp ole float %452, -0.000000e+00 %temp48.5 = select i1 %453, float %49, float 0.000000e+00 %454 = call float @llvm.fabs.f32(float %447) %455 = fcmp ole float %454, -0.000000e+00 %.240 = select i1 %455, float %50, float 0.000000e+00 %456 = call float @llvm.fabs.f32(float %447) %457 = fcmp ole float %456, -0.000000e+00 %temp48.7 = select i1 %457, float %51, float 0.000000e+00 %458 = call float @llvm.fabs.f32(float %447) %459 = fcmp ole float %458, -0.000000e+00 %.241 = select i1 %459, float %52, float 0.000000e+00 %460 = call float @llvm.fabs.f32(float %447) %461 = fcmp ole float %460, -0.000000e+00 %temp48.9 = select i1 %461, float %53, float 0.000000e+00 %462 = call float @llvm.fabs.f32(float %447) %463 = fcmp ole float %462, -0.000000e+00 %.242 = select i1 %463, float %54, float 0.000000e+00 %464 = call float @llvm.fabs.f32(float %447) %465 = fcmp ole float %464, -0.000000e+00 %temp48.11 = select i1 %465, float %55, float 0.000000e+00 %466 = call float @llvm.fabs.f32(float %448) %467 = fcmp ole float %466, -0.000000e+00 %..239 = select i1 %467, float %56, float %.239 %468 = call float @llvm.fabs.f32(float %448) %469 = fcmp ole float %468, -0.000000e+00 %temp48.13 = select i1 %469, float %57, float %temp48.5 %470 = call float @llvm.fabs.f32(float %448) %471 = fcmp ole float %470, -0.000000e+00 %..240 = select i1 %471, float %58, float %.240 %472 = call float @llvm.fabs.f32(float %448) %473 = fcmp ole float %472, -0.000000e+00 %temp48.15 = select i1 %473, float %59, float %temp48.7 %474 = call float @llvm.fabs.f32(float %448) %475 = fcmp ole float %474, -0.000000e+00 %..241 = select i1 %475, float %60, float %.241 %476 = call float @llvm.fabs.f32(float %448) %477 = fcmp ole float %476, -0.000000e+00 %temp48.17 = select i1 %477, float %61, float %temp48.9 %478 = call float @llvm.fabs.f32(float %448) %479 = fcmp ole float %478, -0.000000e+00 %..242 = select i1 %479, float %62, float %.242 %480 = call float @llvm.fabs.f32(float %448) %481 = fcmp ole float %480, -0.000000e+00 %temp48.19 = select i1 %481, float %63, float %temp48.11 %482 = call float @llvm.fabs.f32(float %449) %483 = fcmp ole float %482, -0.000000e+00 %...239 = select i1 %483, float %64, float %..239 %484 = call float @llvm.fabs.f32(float %449) %485 = fcmp ole float %484, -0.000000e+00 %temp48.21 = select i1 %485, float %65, float %temp48.13 %486 = call float @llvm.fabs.f32(float %449) %487 = fcmp ole float %486, -0.000000e+00 %...240 = select i1 %487, float %66, float %..240 %488 = call float @llvm.fabs.f32(float %449) %489 = fcmp ole float %488, -0.000000e+00 %temp48.23 = select i1 %489, float %67, float %temp48.15 %490 = call float @llvm.fabs.f32(float %449) %491 = fcmp ole float %490, -0.000000e+00 %...241 = select i1 %491, float %68, float %..241 %492 = call float @llvm.fabs.f32(float %449) %493 = fcmp ole float %492, -0.000000e+00 %temp48.25 = select i1 %493, float %69, float %temp48.17 %494 = call float @llvm.fabs.f32(float %449) %495 = fcmp ole float %494, -0.000000e+00 %...242 = select i1 %495, float %70, float %..242 %496 = call float @llvm.fabs.f32(float %449) %497 = fcmp ole float %496, -0.000000e+00 %temp48.27 = select i1 %497, float %71, float %temp48.19 %498 = fmul float %136, %...239 %499 = fmul float %137, %temp48.21 %500 = fadd float %498, %499 %501 = fmul float %138, %...240 %502 = fadd float %500, %501 %503 = fmul float %140, %temp48.23 %504 = fadd float %502, %503 %505 = call float @llvm.AMDIL.clamp.(float %504, float 0.000000e+00, float 1.000000e+00) %506 = fmul float %136, %...241 %507 = fmul float %137, %temp48.25 %508 = fadd float %506, %507 %509 = fmul float %138, %...242 %510 = fadd float %508, %509 %511 = fmul float %140, %temp48.27 %512 = fadd float %510, %511 %513 = call float @llvm.AMDIL.clamp.(float %512, float 0.000000e+00, float 1.000000e+00) %514 = call float @llvm.fabs.f32(float %447) %515 = fcmp ole float %514, -0.000000e+00 %.243 = select i1 %515, float %74, float 0.000000e+00 %516 = call float @llvm.fabs.f32(float %447) %517 = fcmp ole float %516, -0.000000e+00 %temp52.0 = select i1 %517, float %75, float 0.000000e+00 %518 = call float @llvm.fabs.f32(float %447) %519 = fcmp ole float %518, -0.000000e+00 %.244 = select i1 %519, float %72, float 0.000000e+00 %520 = call float @llvm.fabs.f32(float %447) %521 = fcmp ole float %520, -0.000000e+00 %temp52.2 = select i1 %521, float %73, float 0.000000e+00 %522 = call float @llvm.fabs.f32(float %448) %523 = fcmp ole float %522, -0.000000e+00 %..243 = select i1 %523, float %78, float %.243 %524 = call float @llvm.fabs.f32(float %448) %525 = fcmp ole float %524, -0.000000e+00 %temp48.29 = select i1 %525, float %79, float %temp52.0 %526 = call float @llvm.fabs.f32(float %448) %527 = fcmp ole float %526, -0.000000e+00 %..244 = select i1 %527, float %76, float %.244 %528 = call float @llvm.fabs.f32(float %448) %529 = fcmp ole float %528, -0.000000e+00 %temp48.31 = select i1 %529, float %77, float %temp52.2 %530 = call float @llvm.fabs.f32(float %449) %531 = fcmp ole float %530, -0.000000e+00 %...243 = select i1 %531, float %82, float %..243 %532 = call float @llvm.fabs.f32(float %449) %533 = fcmp ole float %532, -0.000000e+00 %temp48.33 = select i1 %533, float %83, float %temp48.29 %534 = call float @llvm.fabs.f32(float %449) %535 = fcmp ole float %534, -0.000000e+00 %...244 = select i1 %535, float %80, float %..244 %536 = call float @llvm.fabs.f32(float %449) %537 = fcmp ole float %536, -0.000000e+00 %temp48.35 = select i1 %537, float %81, float %temp48.31 %538 = fmul float %505, %...243 %539 = fadd float %538, %...244 %540 = fmul float %513, %temp48.33 %541 = fadd float %540, %temp48.35 %542 = fadd float %539, 0x3F40000000000000 %543 = fadd float %541, 0x3F40000000000000 %544 = fadd float %211, 0.000000e+00 %545 = bitcast float %544 to i32 %546 = bitcast float %542 to i32 %547 = bitcast float %543 to i32 %548 = insertelement <4 x i32> undef, i32 %545, i32 0 %549 = insertelement <4 x i32> %548, i32 %546, i32 1 %550 = insertelement <4 x i32> %549, i32 %547, i32 2 %551 = insertelement <4 x i32> %550, i32 0, i32 3 %552 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %551, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %553 = extractelement <4 x float> %552, i32 0 %554 = fadd float %539, 0xBF40000000000000 %555 = fadd float %541, 0x3F40000000000000 %556 = fadd float %211, 0.000000e+00 %557 = fadd float %539, 0x3F40000000000000 %558 = fadd float %541, 0xBF40000000000000 %559 = fadd float %211, 0.000000e+00 %560 = fadd float %539, 0xBF40000000000000 %561 = fadd float %541, 0xBF40000000000000 %562 = fadd float %211, 0.000000e+00 %563 = bitcast float %556 to i32 %564 = bitcast float %554 to i32 %565 = bitcast float %555 to i32 %566 = insertelement <4 x i32> undef, i32 %563, i32 0 %567 = insertelement <4 x i32> %566, i32 %564, i32 1 %568 = insertelement <4 x i32> %567, i32 %565, i32 2 %569 = insertelement <4 x i32> %568, i32 0, i32 3 %570 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %569, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %571 = extractelement <4 x float> %570, i32 0 %572 = bitcast float %559 to i32 %573 = bitcast float %557 to i32 %574 = bitcast float %558 to i32 %575 = insertelement <4 x i32> undef, i32 %572, i32 0 %576 = insertelement <4 x i32> %575, i32 %573, i32 1 %577 = insertelement <4 x i32> %576, i32 %574, i32 2 %578 = insertelement <4 x i32> %577, i32 0, i32 3 %579 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %578, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %580 = extractelement <4 x float> %579, i32 0 %581 = bitcast float %562 to i32 %582 = bitcast float %560 to i32 %583 = bitcast float %561 to i32 %584 = insertelement <4 x i32> undef, i32 %581, i32 0 %585 = insertelement <4 x i32> %584, i32 %582, i32 1 %586 = insertelement <4 x i32> %585, i32 %583, i32 2 %587 = insertelement <4 x i32> %586, i32 0, i32 3 %588 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %587, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %589 = extractelement <4 x float> %588, i32 0 %590 = fmul float %553, 6.250000e-02 %591 = fmul float %571, 6.250000e-02 %592 = fadd float %590, %591 %593 = fmul float %580, 6.250000e-02 %594 = fadd float %592, %593 %595 = fmul float %589, 6.250000e-02 %596 = fadd float %594, %595 %597 = fadd float %539, 0x3F40000000000000 %598 = fadd float %541, 0.000000e+00 %599 = fadd float %211, 0.000000e+00 %600 = bitcast float %599 to i32 %601 = bitcast float %597 to i32 %602 = bitcast float %598 to i32 %603 = insertelement <4 x i32> undef, i32 %600, i32 0 %604 = insertelement <4 x i32> %603, i32 %601, i32 1 %605 = insertelement <4 x i32> %604, i32 %602, i32 2 %606 = insertelement <4 x i32> %605, i32 0, i32 3 %607 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %606, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %608 = extractelement <4 x float> %607, i32 0 %609 = fadd float %539, 0xBF40000000000000 %610 = fadd float %541, 0.000000e+00 %611 = fadd float %211, 0.000000e+00 %612 = bitcast float %611 to i32 %613 = bitcast float %609 to i32 %614 = bitcast float %610 to i32 %615 = insertelement <4 x i32> undef, i32 %612, i32 0 %616 = insertelement <4 x i32> %615, i32 %613, i32 1 %617 = insertelement <4 x i32> %616, i32 %614, i32 2 %618 = insertelement <4 x i32> %617, i32 0, i32 3 %619 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %618, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %620 = extractelement <4 x float> %619, i32 0 %621 = fadd float %539, 0.000000e+00 %622 = fadd float %541, 0xBF40000000000000 %623 = fadd float %211, 0.000000e+00 %624 = bitcast float %623 to i32 %625 = bitcast float %621 to i32 %626 = bitcast float %622 to i32 %627 = insertelement <4 x i32> undef, i32 %624, i32 0 %628 = insertelement <4 x i32> %627, i32 %625, i32 1 %629 = insertelement <4 x i32> %628, i32 %626, i32 2 %630 = insertelement <4 x i32> %629, i32 0, i32 3 %631 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %630, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %632 = extractelement <4 x float> %631, i32 0 %633 = fadd float %539, 0.000000e+00 %634 = fadd float %541, 0x3F40000000000000 %635 = fadd float %211, 0.000000e+00 %636 = bitcast float %635 to i32 %637 = bitcast float %633 to i32 %638 = bitcast float %634 to i32 %639 = insertelement <4 x i32> undef, i32 %636, i32 0 %640 = insertelement <4 x i32> %639, i32 %637, i32 1 %641 = insertelement <4 x i32> %640, i32 %638, i32 2 %642 = insertelement <4 x i32> %641, i32 0, i32 3 %643 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %642, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %644 = extractelement <4 x float> %643, i32 0 %645 = fmul float %608, 1.250000e-01 %646 = fmul float %620, 1.250000e-01 %647 = fadd float %645, %646 %648 = fmul float %632, 1.250000e-01 %649 = fadd float %647, %648 %650 = fmul float %644, 1.250000e-01 %651 = fadd float %649, %650 %652 = bitcast float %211 to i32 %653 = bitcast float %539 to i32 %654 = bitcast float %541 to i32 %655 = insertelement <4 x i32> undef, i32 %652, i32 0 %656 = insertelement <4 x i32> %655, i32 %653, i32 1 %657 = insertelement <4 x i32> %656, i32 %654, i32 2 %658 = insertelement <4 x i32> %657, i32 0, i32 3 %659 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %658, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %660 = extractelement <4 x float> %659, i32 0 %661 = fadd float %651, %596 %662 = fmul float %660, 2.500000e-01 %663 = fadd float %662, %661 %664 = fcmp oge float %449, 0.000000e+00 %.245 = select i1 %664, float 1.000000e+00, float %663 %665 = fsub float 1.000000e+00, %224 %666 = fmul float %378, %224 %667 = fmul float %.245, %665 %668 = fadd float %666, %667 br label %ENDIF118 ENDIF118: ; preds = %IF, %IF119 %temp8.0 = phi float [ %668, %IF119 ], [ %378, %IF ] %669 = fsub float %105, %84 %670 = fsub float %106, %85 %671 = fsub float %107, %86 %672 = fmul float %669, %669 %673 = fmul float %670, %670 %674 = fadd float %673, %672 %675 = fmul float %671, %671 %676 = fadd float %674, %675 %677 = fmul float %676, %47 %678 = fadd float %677, %46 %679 = call float @llvm.AMDIL.clamp.(float %678, float 0.000000e+00, float 1.000000e+00) %680 = fsub float 1.000000e+00, %679 %681 = fmul float %temp8.0, %680 %682 = fadd float %679, %681 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_movk_i32 s0, 0x5a0 ; B00005A0 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v12, v0, 0, 0, [m0] ; C8300000 v_interp_p2_f32 v12, [v12], v1, 0, 0, [m0] ; C8310001 v_interp_p1_f32 v13, v0, 1, 0, [m0] ; C8340100 v_interp_p2_f32 v13, [v13], v1, 1, 0, [m0] ; C8350101 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[12:15], s0 ; C2000C00 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v2, v0, 0, 2, [m0] ; C8080800 v_interp_p2_f32 v2, [v2], v1, 0, 2, [m0] ; C8090801 v_interp_p1_f32 v3, v0, 1, 2, [m0] ; C80C0900 v_interp_p2_f32 v3, [v3], v1, 1, 2, [m0] ; C80D0901 v_interp_p1_f32 v4, v0, 2, 2, [m0] ; C8100A00 v_interp_p2_f32 v4, [v4], v1, 2, 2, [m0] ; C8110A01 v_interp_p1_f32 v9, v0, 3, 2, [m0] ; C8240B00 v_interp_p2_f32 v9, [v9], v1, 3, 2, [m0] ; C8250B01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 s_buffer_load_dword s3, s[12:15], 0x57 ; C2018D57 s_buffer_load_dword s2, s[12:15], 0x78 ; C2010D78 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v10, v0, 1, 3, [m0] ; C8280D00 v_interp_p2_f32 v10, [v10], v1, 1, 3, [m0] ; C8290D01 v_interp_p1_f32 v11, v0, 2, 3, [m0] ; C82C0E00 v_interp_p2_f32 v11, [v11], v1, 2, 3, [m0] ; C82D0E01 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[16:23], s[8:11] ; F0800F00 00440C0C s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[0:1], 0, s0 ; D10A0000 00000080 v_mov_b32_e32 v16, 1.0 ; 7E2002F2 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[8:9], s[0:1] ; BE882400 s_xor_b64 s[8:9], exec, s[8:9] ; 8988087E s_cbranch_execz BB0_4 ; BF880000 s_movk_i32 s10, 0x438 ; B00A0438 s_movk_i32 s11, 0x43c ; B00B043C s_movk_i32 s16, 0x440 ; B0100440 s_movk_i32 s17, 0x444 ; B0110444 s_movk_i32 s18, 0x490 ; B0120490 s_movk_i32 s19, 0x494 ; B0130494 s_movk_i32 s20, 0x498 ; B0140498 s_movk_i32 s21, 0x49c ; B015049C s_movk_i32 s22, 0x4a0 ; B01604A0 s_movk_i32 s23, 0x4a4 ; B01704A4 s_movk_i32 s24, 0x4a8 ; B01804A8 s_movk_i32 s25, 0x4ac ; B01904AC s_movk_i32 s26, 0x4d0 ; B01A04D0 s_movk_i32 s27, 0x4d4 ; B01B04D4 s_movk_i32 s28, 0x4d8 ; B01C04D8 s_movk_i32 s29, 0x4dc ; B01D04DC s_movk_i32 s30, 0x4e0 ; B01E04E0 s_movk_i32 s31, 0x4e4 ; B01F04E4 s_movk_i32 s32, 0x4e8 ; B02004E8 s_movk_i32 s33, 0x4ec ; B02104EC s_movk_i32 s34, 0x560 ; B0220560 s_movk_i32 s35, 0x564 ; B0230564 s_movk_i32 s36, 0x568 ; B0240568 s_movk_i32 s37, 0x56c ; B025056C s_movk_i32 s38, 0x570 ; B0260570 s_movk_i32 s39, 0x574 ; B0270574 s_movk_i32 s40, 0x578 ; B0280578 s_movk_i32 s41, 0x57c ; B029057C s_movk_i32 s1, 0x590 ; B0010590 s_movk_i32 s52, 0x594 ; B0340594 s_movk_i32 s0, 0x598 ; B0000598 s_movk_i32 s42, 0x55c ; B02A055C s_movk_i32 s43, 0x558 ; B02B0558 s_movk_i32 s44, 0x554 ; B02C0554 s_movk_i32 s45, 0x550 ; B02D0550 s_movk_i32 s46, 0x47c ; B02E047C s_movk_i32 s47, 0x478 ; B02F0478 v_add_f32_e32 v16, 0, v8 ; 06201080 v_add_f32_e32 v19, 0, v10 ; 06261480 v_add_f32_e32 v18, 0, v11 ; 06241680 v_mad_f32 v17, 0, v8, 1.0 ; D2820011 03CA1080 s_buffer_load_dword s54, s[12:15], s10 ; C21B0C0A s_buffer_load_dword s53, s[12:15], s11 ; C21A8C0B s_buffer_load_dword s10, s[12:15], s16 ; C2050C10 s_buffer_load_dword s48, s[12:15], s17 ; C2180C11 s_buffer_load_dword s11, s[12:15], s18 ; C2058C12 s_buffer_load_dword s16, s[12:15], s19 ; C2080C13 s_buffer_load_dword s17, s[12:15], s20 ; C2088C14 s_buffer_load_dword s18, s[12:15], s21 ; C2090C15 s_buffer_load_dword s19, s[12:15], s22 ; C2098C16 s_buffer_load_dword s20, s[12:15], s23 ; C20A0C17 s_buffer_load_dword s21, s[12:15], s24 ; C20A8C18 s_buffer_load_dword s22, s[12:15], s25 ; C20B0C19 s_buffer_load_dword s23, s[12:15], s26 ; C20B8C1A s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s48 ; 7E020230 s_buffer_load_dword s24, s[12:15], s27 ; C20C0C1B s_buffer_load_dword s25, s[12:15], s28 ; C20C8C1C s_buffer_load_dword s26, s[12:15], s29 ; C20D0C1D s_buffer_load_dword s27, s[12:15], s30 ; C20D8C1E s_buffer_load_dword s28, s[12:15], s31 ; C20E0C1F s_buffer_load_dword s29, s[12:15], s32 ; C20E8C20 s_buffer_load_dword s30, s[12:15], s33 ; C20F0C21 s_buffer_load_dword s31, s[12:15], s34 ; C20F8C22 s_buffer_load_dword s32, s[12:15], s35 ; C2100C23 s_buffer_load_dword s33, s[12:15], s36 ; C2108C24 s_movk_i32 s34, 0x46c ; B022046C s_buffer_load_dword s34, s[12:15], s34 ; C2110C22 s_movk_i32 s35, 0x468 ; B0230468 s_buffer_load_dword s35, s[12:15], s35 ; C2118C23 s_movk_i32 s36, 0x464 ; B0240464 s_buffer_load_dword s36, s[12:15], s36 ; C2120C24 s_movk_i32 s48, 0x460 ; B0300460 s_buffer_load_dword s48, s[12:15], s48 ; C2180C30 s_movk_i32 s49, 0x45c ; B031045C s_buffer_load_dword s49, s[12:15], s49 ; C2188C31 s_movk_i32 s50, 0x458 ; B0320458 s_buffer_load_dword s50, s[12:15], s50 ; C2190C32 s_movk_i32 s51, 0x454 ; B0330454 s_buffer_load_dword s51, s[12:15], s51 ; C2198C33 s_movk_i32 s55, 0x450 ; B0370450 s_buffer_load_dword s55, s[12:15], s55 ; C21B8C37 v_mul_f32_e32 v20, s16, v19 ; 10282610 v_mac_f32_e32 v20, s11, v16 ; 3E28200B v_mac_f32_e32 v20, s17, v18 ; 3E282411 v_mac_f32_e32 v20, s18, v17 ; 3E282212 v_mul_f32_e32 v21, s20, v19 ; 102A2614 v_mac_f32_e32 v21, s19, v16 ; 3E2A2013 v_mac_f32_e32 v21, s21, v18 ; 3E2A2415 v_mac_f32_e32 v21, s22, v17 ; 3E2A2216 v_add_f32_e64 v22, 0, v20 clamp ; D2060816 00022880 v_add_f32_e64 v23, 0, v21 clamp ; D2060817 00022A80 v_subrev_f32_e32 v22, v20, v22 ; 0A2C2D14 v_subrev_f32_e32 v23, v21, v23 ; 0A2E2F15 v_add_f32_e32 v22, v23, v22 ; 062C2D17 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v23, s24, v19 ; 102E2618 v_mac_f32_e32 v23, s23, v16 ; 3E2E2017 v_mac_f32_e32 v23, s25, v18 ; 3E2E2419 v_mac_f32_e32 v23, s26, v17 ; 3E2E221A v_mul_f32_e32 v24, s28, v19 ; 1030261C v_mac_f32_e32 v24, s27, v16 ; 3E30201B v_mac_f32_e32 v24, s29, v18 ; 3E30241D v_mac_f32_e32 v24, s30, v17 ; 3E30221E v_mov_b32_e32 v26, 0x80000000 ; 7E3402FF 80000000 v_cmp_le_f32_e64 vcc, |v22|, v26 ; D006016A 00023516 v_cndmask_b32_e32 v20, v23, v20 ; 00282917 v_cndmask_b32_e32 v21, v24, v21 ; 002A2B18 v_cndmask_b32_e64 v22, 2.0, 1.0, vcc ; D2000016 01A9E4F4 v_mul_f32_e32 v23, s51, v19 ; 102E2633 v_mac_f32_e32 v23, s55, v16 ; 3E2E2037 v_mac_f32_e32 v23, s50, v18 ; 3E2E2432 v_mac_f32_e32 v23, s49, v17 ; 3E2E2231 v_mul_f32_e32 v24, s36, v19 ; 10302624 v_mac_f32_e32 v24, s48, v16 ; 3E302030 v_mac_f32_e32 v24, s35, v18 ; 3E302423 v_mac_f32_e32 v24, s34, v17 ; 3E302222 s_movk_i32 s34, 0x474 ; B0220474 s_buffer_load_dword s35, s[12:15], s34 ; C2118C22 v_add_f32_e64 v25, 0, v23 clamp ; D2060819 00022E80 v_add_f32_e64 v27, 0, v24 clamp ; D206081B 00023080 v_subrev_f32_e32 v25, v23, v25 ; 0A323317 v_subrev_f32_e32 v27, v24, v27 ; 0A363718 v_add_f32_e32 v25, v27, v25 ; 0632331B v_cmp_le_f32_e64 vcc, |v25|, v26 ; D006016A 00023519 v_cndmask_b32_e32 v23, v20, v23 ; 002E2F14 v_cndmask_b32_e32 v24, v21, v24 ; 00303115 v_cndmask_b32_e64 v25, v22, 0, vcc ; D2000019 01A90116 s_buffer_load_dword s34, s[12:15], s37 ; C2110C25 s_buffer_load_dword s43, s[12:15], s43 ; C2158C2B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v20, s35, v19 ; 10282623 s_movk_i32 s35, 0x470 ; B0230470 s_buffer_load_dword s48, s[12:15], s35 ; C2180C23 s_buffer_load_dword s47, s[12:15], s47 ; C2178C2F s_buffer_load_dword s46, s[12:15], s46 ; C2170C2E s_buffer_load_dword s35, s[12:15], s38 ; C2118C26 s_buffer_load_dword s36, s[12:15], s39 ; C2120C27 s_buffer_load_dword s37, s[12:15], s40 ; C2128C28 s_buffer_load_dword s39, s[12:15], s42 ; C2138C2A s_buffer_load_dword s45, s[12:15], s45 ; C2168C2D s_buffer_load_dword s38, s[12:15], s41 ; C2130C29 s_buffer_load_dword s44, s[12:15], s44 ; C2160C2C s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v20, s48, v16 ; 3E282030 v_mac_f32_e32 v20, s47, v18 ; 3E28242F v_mac_f32_e32 v20, s46, v17 ; 3E28222E v_mov_b32_e32 v21, s43 ; 7E2A022B s_load_dwordx4 s[40:43], s[4:5], 0x4 ; C0940504 v_add_f32_e64 v27, 0, v23 clamp ; D206081B 00022E80 v_add_f32_e64 v28, 0, v24 clamp ; D206081C 00023080 v_add_f32_e32 v22, -1.0, v25 ; 062C32F3 v_add_f32_e32 v29, -2.0, v25 ; 063A32F5 v_mov_b32_e32 v30, s39 ; 7E3C0227 v_mov_b32_e32 v31, s45 ; 7E3E022D v_cmp_le_f32_e64 vcc, |v25|, v26 ; D006016A 00023519 v_cndmask_b32_e32 v21, 0, v21 ; 002A2A80 v_cndmask_b32_e32 v30, 0, v30 ; 003C3C80 v_cndmask_b32_e32 v31, 0, v31 ; 003E3E80 v_mov_b32_e32 v32, s44 ; 7E40022C v_cndmask_b32_e32 v32, 0, v32 ; 00404080 v_cmp_le_f32_e64 vcc, |v22|, v26 ; D006016A 00023516 v_mov_b32_e32 v22, s33 ; 7E2C0221 v_cndmask_b32_e32 v21, v21, v22 ; 002A2D15 v_mov_b32_e32 v22, s34 ; 7E2C0222 v_cndmask_b32_e32 v22, v30, v22 ; 002C2D1E v_mov_b32_e32 v30, s31 ; 7E3C021F v_cndmask_b32_e32 v30, v31, v30 ; 003C3D1F v_mov_b32_e32 v31, s32 ; 7E3E0220 v_cndmask_b32_e32 v31, v32, v31 ; 003E3F20 v_cmp_le_f32_e64 vcc, |v29|, v26 ; D006016A 0002351D v_mov_b32_e32 v26, s37 ; 7E340225 v_cndmask_b32_e32 v26, v21, v26 ; 00343515 v_mov_b32_e32 v21, s38 ; 7E2A0226 v_cndmask_b32_e32 v29, v22, v21 ; 003A2B16 v_mov_b32_e32 v21, s35 ; 7E2A0223 v_cndmask_b32_e32 v21, v30, v21 ; 002A2B1E v_mov_b32_e32 v22, s36 ; 7E2C0224 v_cndmask_b32_e32 v22, v31, v22 ; 002C2D1F v_mac_f32_e32 v21, v26, v27 ; 3E2A371A v_mac_f32_e32 v22, v29, v28 ; 3E2C391D v_mov_b32_e32 v26, 0x3a000000 ; 7E3402FF 3A000000 v_add_f32_e32 v28, v26, v21 ; 06382B1A v_add_f32_e32 v29, v26, v22 ; 063A2D1A v_add_f32_e32 v27, 0, v20 ; 06362880 s_mov_b32 s39, 0 ; BEA70380 s_load_dwordx8 s[44:51], s[6:7], 0x8 ; C0D60708 v_mov_b32_e32 v30, s39 ; 7E3C0227 v_mov_b32_e32 v26, 0xba000000 ; 7E3402FF BA000000 v_add_f32_e32 v31, v26, v21 ; 063E2B1A v_mov_b32_e32 v32, v27 ; 7E40031B v_mov_b32_e32 v33, v28 ; 7E42031C v_mov_b32_e32 v34, v29 ; 7E44031D v_mov_b32_e32 v35, v30 ; 7E46031E v_mov_b32_e32 v33, v31 ; 7E42031F v_add_f32_e32 v23, -0.5, v23 ; 062E2EF1 v_add_f32_e32 v24, -0.5, v24 ; 063030F1 v_add_f32_e32 v26, v26, v22 ; 06342D1A v_mov_b32_e32 v36, v27 ; 7E48031B v_mov_b32_e32 v37, v28 ; 7E4A031C v_mov_b32_e32 v38, v29 ; 7E4C031D v_mov_b32_e32 v39, v30 ; 7E4E031E v_sub_f32_e64 v23, |v23|, s54 ; D2080117 00006D17 v_mov_b32_e32 v34, v29 ; 7E44031D v_mov_b32_e32 v38, v26 ; 7E4C031A v_sub_f32_e64 v24, |v24|, s54 ; D2080118 00006D18 v_mov_b32_e32 v35, s39 ; 7E460227 v_mov_b32_e32 v39, s39 ; 7E4E0227 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_c_l v31, 1, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[44:51], s[40:43] ; F0B00100 014B1F1B image_sample_c_l v40, 1, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[44:51], s[40:43] ; F0B00100 014B2820 v_mov_b32_e32 v34, v26 ; 7E44031A image_sample_c_l v36, 1, 0, 0, 0, 0, 0, 0, 0, v[36:39], s[44:51], s[40:43] ; F0B00100 014B2424 v_mov_b32_e32 v35, s39 ; 7E460227 image_sample_c_l v37, 1, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[44:51], s[40:43] ; F0B00100 014B2520 v_add_f32_e32 v34, 0, v22 ; 06442C80 v_mov_b32_e32 v41, v27 ; 7E52031B v_mov_b32_e32 v42, v28 ; 7E54031C v_mov_b32_e32 v43, v29 ; 7E56031D v_mov_b32_e32 v44, v30 ; 7E58031E v_mul_f32_e32 v23, s53, v23 ; 102E2E35 v_mov_b32_e32 v43, v34 ; 7E560322 v_add_f32_e32 v28, 0, v21 ; 06382A80 v_mul_f32_e32 v24, s53, v24 ; 10303035 v_mov_b32_e32 v44, s39 ; 7E580227 v_mov_b32_e32 v45, v27 ; 7E5A031B v_mov_b32_e32 v46, v28 ; 7E5C031C v_mov_b32_e32 v47, v29 ; 7E5E031D v_mov_b32_e32 v48, v30 ; 7E60031E image_sample_c_l v38, 1, 0, 0, 0, 0, 0, 0, 0, v[41:44], s[44:51], s[40:43] ; F0B00100 014B2629 v_mov_b32_e32 v47, v26 ; 7E5E031A v_mov_b32_e32 v35, s39 ; 7E460227 image_sample_c_l v26, 1, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[44:51], s[40:43] ; F0B00100 014B1A20 v_mov_b32_e32 v48, s39 ; 7E600227 image_sample_c_l v32, 1, 0, 0, 0, 0, 0, 0, 0, v[45:48], s[44:51], s[40:43] ; F0B00100 014B202D v_mov_b32_e32 v30, s39 ; 7E3C0227 image_sample_c_l v27, 1, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[44:51], s[40:43] ; F0B00100 014B1B1B v_mov_b32_e32 v28, 0x3d800000 ; 7E3802FF 3D800000 s_waitcnt vmcnt(6) ; BF8C0776 v_mul_f32_e32 v29, v28, v40 ; 103A511C v_mac_f32_e32 v29, v28, v31 ; 3E3A3F1C s_waitcnt vmcnt(5) ; BF8C0775 v_mac_f32_e32 v29, v28, v36 ; 3E3A491C s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v29, v28, v37 ; 3E3A4B1C v_mov_b32_e32 v28, 0x3e000000 ; 7E3802FF 3E000000 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v30, v28, v26 ; 103C351C v_mac_f32_e32 v30, v28, v38 ; 3E3C4D1C s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v30, v28, v32 ; 3E3C411C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v30, v28, v27 ; 3E3C371C s_buffer_load_dword s54, s[12:15], s1 ; C21B0C01 s_buffer_load_dword s53, s[12:15], s52 ; C21A8C34 s_buffer_load_dword s52, s[12:15], s0 ; C21A0C00 v_add_f32_e64 v23, 0, v23 clamp ; D2060817 00022E80 v_add_f32_e64 v24, 0, v24 clamp ; D2060818 00023080 v_sub_f32_e32 v23, 1.0, v23 ; 082E2EF2 v_mad_f32 v26, -v24, v23, v23 ; D282001A 245E2F18 v_mov_b32_e32 v23, 0 ; 7E2E0280 v_add_f32_e32 v24, v29, v30 ; 06303D1D image_sample_c_l v21, 1, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[44:51], s[40:43] ; F0B00100 014B1514 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v24, v21, v24, 0x3e800000 ; 40303115 3E800000 v_cmp_gt_f32_e32 vcc, 1.0, v26 ; 7C0834F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[56:57], vcc ; BEB8246A s_xor_b64 s[56:57], exec, s[56:57] ; 89B8387E s_cbranch_execz BB0_5 ; BF880000 s_movk_i32 s0, 0x510 ; B0000510 s_movk_i32 s1, 0x514 ; B0010514 v_mov_b32_e32 v21, s11 ; 7E2A020B v_mov_b32_e32 v22, s16 ; 7E2C0210 v_mov_b32_e32 v23, s17 ; 7E2E0211 v_mov_b32_e32 v27, s18 ; 7E360212 v_mov_b32_e32 v28, s19 ; 7E380213 v_mov_b32_e32 v29, s20 ; 7E3A0214 v_mov_b32_e32 v30, s21 ; 7E3C0215 v_mov_b32_e32 v31, s22 ; 7E3E0216 v_mov_b32_e32 v32, s23 ; 7E400217 v_mov_b32_e32 v33, s24 ; 7E420218 v_mov_b32_e32 v34, s25 ; 7E440219 v_mov_b32_e32 v35, s26 ; 7E46021A v_mov_b32_e32 v36, s27 ; 7E48021B s_buffer_load_dword s55, s[12:15], s0 ; C21B8C00 s_buffer_load_dword s58, s[12:15], s1 ; C21D0C01 s_movk_i32 s0, 0x518 ; B0000518 s_buffer_load_dword s59, s[12:15], s0 ; C21D8C00 s_movk_i32 s0, 0x51c ; B000051C s_buffer_load_dword s60, s[12:15], s0 ; C21E0C00 s_movk_i32 s0, 0x520 ; B0000520 s_buffer_load_dword s61, s[12:15], s0 ; C21E8C00 v_add_f32_e32 v37, 0, v25 ; 064A3280 v_mov_b32_e32 v38, 0x80000000 ; 7E4C02FF 80000000 v_cmp_le_f32_e64 vcc, |v37|, v38 ; D006016A 00024D25 v_add_f32_e32 v37, -1.0, v25 ; 064A32F3 v_cmp_le_f32_e64 s[0:1], |v37|, v38 ; D0060100 00024D25 v_cndmask_b32_e32 v21, 0, v21 ; 002A2A80 v_cndmask_b32_e64 v21, v21, v32, s[0:1] ; D2000015 00024115 v_cndmask_b32_e32 v22, 0, v22 ; 002C2C80 v_cndmask_b32_e64 v22, v22, v33, s[0:1] ; D2000016 00024316 v_mov_b32_e32 v32, s28 ; 7E40021C v_cndmask_b32_e32 v23, 0, v23 ; 002E2E80 v_cndmask_b32_e64 v23, v23, v34, s[0:1] ; D2000017 00024517 v_mov_b32_e32 v33, s29 ; 7E42021D v_cndmask_b32_e32 v27, 0, v27 ; 00363680 v_cndmask_b32_e64 v27, v27, v35, s[0:1] ; D200001B 0002471B v_mov_b32_e32 v34, s30 ; 7E44021E v_cndmask_b32_e32 v28, 0, v28 ; 00383880 v_cndmask_b32_e64 v28, v28, v36, s[0:1] ; D200001C 0002491C v_mov_b32_e32 v35, s31 ; 7E46021F v_cndmask_b32_e32 v29, 0, v29 ; 003A3A80 v_cndmask_b32_e64 v29, v29, v32, s[0:1] ; D200001D 0002411D v_mov_b32_e32 v32, s33 ; 7E400221 v_cndmask_b32_e32 v30, 0, v30 ; 003C3C80 v_cndmask_b32_e64 v30, v30, v33, s[0:1] ; D200001E 0002431E v_mov_b32_e32 v33, s34 ; 7E420222 v_cndmask_b32_e32 v31, 0, v31 ; 003E3E80 v_cndmask_b32_e64 v31, v31, v34, s[0:1] ; D200001F 0002451F v_mov_b32_e32 v34, s37 ; 7E440225 v_cndmask_b32_e32 v32, 0, v32 ; 00404080 v_cndmask_b32_e64 v32, v32, v34, s[0:1] ; D2000020 00024520 v_mov_b32_e32 v34, s38 ; 7E440226 v_cndmask_b32_e32 v33, 0, v33 ; 00424280 v_cndmask_b32_e64 v33, v33, v34, s[0:1] ; D2000021 00024521 v_mov_b32_e32 v34, s35 ; 7E440223 v_cndmask_b32_e32 v35, 0, v35 ; 00464680 v_cndmask_b32_e64 v34, v35, v34, s[0:1] ; D2000022 00024523 v_mov_b32_e32 v35, s32 ; 7E460220 v_cndmask_b32_e32 v35, 0, v35 ; 00464680 v_mov_b32_e32 v36, s36 ; 7E480224 v_cndmask_b32_e64 v35, v35, v36, s[0:1] ; D2000023 00024923 s_movk_i32 s0, 0x524 ; B0000524 s_buffer_load_dword s0, s[12:15], s0 ; C2000C00 s_movk_i32 s1, 0x528 ; B0010528 s_buffer_load_dword s1, s[12:15], s1 ; C2008C01 s_movk_i32 s62, 0x52c ; B03E052C s_buffer_load_dword s62, s[12:15], s62 ; C21F0C3E s_movk_i32 s63, 0x580 ; B03F0580 s_buffer_load_dword s63, s[12:15], s63 ; C21F8C3F v_add_f32_e32 v25, -2.0, v25 ; 063232F5 v_cmp_le_f32_e64 vcc, |v25|, v38 ; D006016A 00024D19 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v36, s55 ; 7E480237 s_movk_i32 s55, 0x588 ; B0370588 s_buffer_load_dword s55, s[12:15], s55 ; C21B8C37 v_cndmask_b32_e32 v21, v21, v36 ; 002A4915 v_mov_b32_e32 v36, s58 ; 7E48023A s_movk_i32 s58, 0x58c ; B03A058C s_buffer_load_dword s58, s[12:15], s58 ; C21D0C3A v_cndmask_b32_e32 v22, v22, v36 ; 002C4916 v_mov_b32_e32 v36, s59 ; 7E48023B s_movk_i32 s59, 0x584 ; B03B0584 s_buffer_load_dword s59, s[12:15], s59 ; C21D8C3B v_cndmask_b32_e32 v23, v23, v36 ; 002E4917 v_mov_b32_e32 v36, s60 ; 7E48023C v_cndmask_b32_e32 v27, v27, v36 ; 0036491B v_mov_b32_e32 v36, s61 ; 7E48023D v_cndmask_b32_e32 v28, v28, v36 ; 0038491C v_mov_b32_e32 v36, s0 ; 7E480200 v_cndmask_b32_e32 v29, v29, v36 ; 003A491D v_mov_b32_e32 v36, s1 ; 7E480201 v_cndmask_b32_e32 v30, v30, v36 ; 003C491E v_mov_b32_e32 v36, s62 ; 7E48023E v_cndmask_b32_e32 v31, v31, v36 ; 003E491F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v36, s55 ; 7E480237 v_mul_f32_e32 v22, v22, v19 ; 102C2716 v_mac_f32_e32 v22, v21, v16 ; 3E2C2115 v_mov_b32_e32 v21, s58 ; 7E2A023A v_mac_f32_e32 v22, v23, v18 ; 3E2C2517 v_mov_b32_e32 v23, s63 ; 7E2E023F v_mac_f32_e32 v22, v27, v17 ; 3E2C231B v_mov_b32_e32 v27, s59 ; 7E36023B v_add_f32_e64 v37, 0, v22 clamp ; D2060825 00022C80 v_mul_f32_e32 v19, v29, v19 ; 1026271D v_mac_f32_e32 v19, v28, v16 ; 3E26211C v_mac_f32_e32 v19, v30, v18 ; 3E26251E v_mac_f32_e32 v19, v31, v17 ; 3E26231F v_add_f32_e64 v16, 0, v19 clamp ; D2060810 00022680 v_cndmask_b32_e32 v17, v32, v36 ; 00224920 v_cndmask_b32_e32 v18, v33, v21 ; 00242B21 v_cndmask_b32_e32 v21, v34, v23 ; 002A2F22 v_cndmask_b32_e32 v22, v35, v27 ; 002C3723 v_mac_f32_e32 v21, v17, v37 ; 3E2A4B11 v_mac_f32_e32 v22, v18, v16 ; 3E2C2112 v_mov_b32_e32 v16, 0x3a000000 ; 7E2002FF 3A000000 v_add_f32_e32 v28, v16, v21 ; 06382B10 v_add_f32_e32 v29, v16, v22 ; 063A2D10 v_add_f32_e32 v27, 0, v20 ; 06362880 v_mov_b32_e32 v30, s39 ; 7E3C0227 v_mov_b32_e32 v16, 0xba000000 ; 7E2002FF BA000000 v_add_f32_e32 v17, v16, v21 ; 06222B10 v_mov_b32_e32 v31, v27 ; 7E3E031B v_mov_b32_e32 v32, v28 ; 7E40031C v_mov_b32_e32 v33, v29 ; 7E42031D v_mov_b32_e32 v34, v30 ; 7E44031E v_mov_b32_e32 v32, v17 ; 7E400311 v_add_f32_e32 v16, v16, v22 ; 06202D10 v_mov_b32_e32 v33, v29 ; 7E42031D v_mov_b32_e32 v35, v27 ; 7E46031B v_mov_b32_e32 v36, v28 ; 7E48031C v_mov_b32_e32 v37, v29 ; 7E4A031D v_mov_b32_e32 v38, v30 ; 7E4C031E image_sample_c_l v17, 1, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[44:51], s[40:43] ; F0B00100 014B111B v_mov_b32_e32 v34, s39 ; 7E440227 v_mov_b32_e32 v37, v16 ; 7E4A0310 image_sample_c_l v18, 1, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[44:51], s[40:43] ; F0B00100 014B121F v_mov_b32_e32 v38, s39 ; 7E4C0227 v_mov_b32_e32 v33, v16 ; 7E420310 image_sample_c_l v19, 1, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[44:51], s[40:43] ; F0B00100 014B1323 v_mov_b32_e32 v34, s39 ; 7E440227 image_sample_c_l v23, 1, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[44:51], s[40:43] ; F0B00100 014B171F v_mov_b32_e32 v30, 0x3d800000 ; 7E3C02FF 3D800000 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v18, v30, v18 ; 1024251E v_add_f32_e32 v33, 0, v22 ; 06422C80 v_mov_b32_e32 v34, v27 ; 7E44031B v_mov_b32_e32 v35, v28 ; 7E46031C v_mov_b32_e32 v36, v29 ; 7E48031D v_mov_b32_e32 v37, v30 ; 7E4A031E v_mac_f32_e32 v18, v30, v17 ; 3E24231E v_mov_b32_e32 v36, v33 ; 7E480321 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v18, v30, v19 ; 3E24271E v_mov_b32_e32 v37, s39 ; 7E4A0227 v_add_f32_e32 v28, 0, v21 ; 06382A80 image_sample_c_l v17, 1, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[44:51], s[40:43] ; F0B00100 014B1122 v_mov_b32_e32 v35, v27 ; 7E46031B v_mov_b32_e32 v36, v28 ; 7E48031C v_mov_b32_e32 v37, v29 ; 7E4A031D v_mov_b32_e32 v38, v30 ; 7E4C031E s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v18, v30, v23 ; 3E242F1E v_mov_b32_e32 v34, s39 ; 7E440227 v_mov_b32_e32 v37, v16 ; 7E4A0310 image_sample_c_l v16, 1, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[44:51], s[40:43] ; F0B00100 014B101F v_mov_b32_e32 v38, s39 ; 7E4C0227 image_sample_c_l v19, 1, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[44:51], s[40:43] ; F0B00100 014B1323 v_mov_b32_e32 v30, s39 ; 7E3C0227 image_sample_c_l v23, 1, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[44:51], s[40:43] ; F0B00100 014B171B v_mov_b32_e32 v27, 0x3e000000 ; 7E3602FF 3E000000 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v16, v27, v16 ; 1020211B v_mac_f32_e32 v16, v27, v17 ; 3E20231B s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v16, v27, v19 ; 3E20271B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v16, v27, v23 ; 3E202F1B v_mov_b32_e32 v23, 0 ; 7E2E0280 image_sample_c_l v17, 1, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[44:51], s[40:43] ; F0B00100 014B1114 v_add_f32_e32 v16, v18, v16 ; 06202112 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v16, v17, v16, 0x3e800000 ; 40202111 3E800000 v_cmp_le_f32_e32 vcc, 0, v25 ; 7C063280 v_cndmask_b32_e64 v16, v16, 1.0, vcc ; D2000010 01A9E510 v_mad_f32 v16, -v26, v16, v16 ; D2820010 2442211A v_mac_f32_e32 v16, v26, v24 ; 3E20311A v_mov_b32_e32 v24, v16 ; 7E300310 s_or_b64 exec, exec, s[56:57] ; 88FE387E v_subrev_f32_e32 v16, s54, v8 ; 0A201036 v_subrev_f32_e32 v17, s53, v10 ; 0A221435 v_subrev_f32_e32 v18, s52, v11 ; 0A241634 v_mul_f32_e32 v16, v16, v16 ; 10202110 v_mac_f32_e32 v16, v17, v17 ; 3E202311 v_mac_f32_e32 v16, v18, v18 ; 3E202512 v_mad_f32 v1, v1, v16, s10 ; D2820001 002A2101 v_add_f32_e64 v16, 0, v1 clamp ; D2060810 00020280 v_sub_f32_e32 v1, 1.0, v16 ; 080220F2 v_mac_f32_e32 v16, v1, v24 ; 3E203101 s_or_b64 exec, exec, s[8:9] ; 88FE087E s_buffer_load_dword s20, s[12:15], 0x4 ; C20A0D04 s_buffer_load_dword s21, s[12:15], 0x5 ; C20A8D05 s_buffer_load_dword s22, s[12:15], 0x6 ; C20B0D06 s_buffer_load_dword s18, s[12:15], 0x7 ; C2090D07 s_buffer_load_dword s23, s[12:15], 0x30 ; C20B8D30 s_buffer_load_dword s0, s[12:15], 0x31 ; C2000D31 s_buffer_load_dword s19, s[12:15], 0x33 ; C2098D33 s_buffer_load_dword s11, s[12:15], 0x50 ; C2058D50 s_buffer_load_dword s16, s[12:15], 0x51 ; C2080D51 s_buffer_load_dword s17, s[12:15], 0x52 ; C2088D52 s_buffer_load_dword s24, s[12:15], 0x53 ; C20C0D53 s_buffer_load_dword s7, s[12:15], 0x54 ; C2038D54 s_buffer_load_dword s6, s[12:15], 0x56 ; C2030D56 s_buffer_load_dword s1, s[12:15], 0x74 ; C2008D74 s_buffer_load_dword s4, s[12:15], 0x75 ; C2020D75 s_buffer_load_dword s5, s[12:15], 0x76 ; C2028D76 s_buffer_load_dword s10, s[12:15], 0x77 ; C2050D77 v_mov_b32_e32 v17, s3 ; 7E220203 v_mov_b32_e32 v1, s2 ; 7E020202 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v2, v16, v5 ; 3E040B10 v_mac_f32_e32 v3, v16, v6 ; 3E060D10 v_mac_f32_e32 v4, v16, v7 ; 3E080F10 v_mad_f32 v5, v15, s24, -s24 ; D2820005 8060310F v_add_f32_e32 v6, s23, v15 ; 060C1E17 v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 v_mad_f32 v7, s20, v6, -v6 ; D2820007 841A0C14 v_mad_f32 v15, s21, v6, -v6 ; D282000F 841A0C15 v_mad_f32 v6, s22, v6, -v6 ; D2820006 841A0C16 v_mac_f32_e32 v2, v2, v7 ; 3E040F02 v_mac_f32_e32 v3, v3, v15 ; 3E061F03 v_mac_f32_e32 v4, v4, v6 ; 3E080D04 v_mad_f32 v5, v5, s18, s18 ; D2820005 00482505 v_mad_f32 v6, v5, v9, -v5 ; D2820006 84161305 v_mac_f32_e32 v5, s19, v6 ; 3E0A0C13 v_mul_f32_e32 v2, v2, v12 ; 10041902 v_mul_f32_e32 v3, v3, v13 ; 10061B03 v_mul_f32_e32 v4, v4, v14 ; 10081D04 v_sub_f32_e32 v6, s11, v8 ; 080C100B v_sub_f32_e32 v7, s16, v10 ; 080E1410 v_sub_f32_e32 v8, s17, v11 ; 08101611 v_mul_f32_e32 v6, v6, v6 ; 100C0D06 v_mac_f32_e32 v6, v7, v7 ; 3E0C0F07 v_mac_f32_e32 v6, v8, v8 ; 3E0C1108 v_sqrt_f32_e32 v6, v6 ; 7E0C6706 v_mad_f32 v6, v17, v6, s7 ; D2820006 001E0D11 v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 v_min_f32_e32 v6, s6, v6 ; 1E0C0C06 v_mul_f32_e32 v7, s2, v2 ; 100E0402 v_mul_f32_e32 v8, s2, v3 ; 10100602 v_mul_f32_e32 v9, s2, v4 ; 10120802 v_mul_f32_e32 v0, s10, v0 ; 1000000A v_mov_b32_e32 v10, 0x80000000 ; 7E1402FF 80000000 v_cmp_le_f32_e64 vcc, |s0|, v10 ; D006016A 00021400 v_cndmask_b32_e32 v0, v0, v5 ; 00000B00 v_mul_f32_e32 v5, v6, v6 ; 100A0D06 v_mad_f32 v2, -v2, v1, s1 ; D2820002 20060302 v_mad_f32 v3, -v3, v1, s4 ; D2820003 20120303 v_mad_f32 v1, -v4, v1, s5 ; D2820001 20160304 v_mac_f32_e32 v7, v2, v5 ; 3E0E0B02 v_mac_f32_e32 v8, v3, v5 ; 3E100B03 v_mac_f32_e32 v9, v1, v5 ; 3E120B01 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 v_cvt_pkrtz_f16_f32_e32 v0, v9, v0 ; 5E000109 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 52 Code Size: 2676 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 1020.0400, 2.1000} IMM[1] FLT32 { 3.1000, 0.1000, 1.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MAD TEMP[1].x, IMM[0].zzzz, IN[1].zzzz, IMM[0].wwww 2: F2I TEMP[1].x, TEMP[1].xxxx 3: UARL ADDR[0].x, TEMP[1].xxxx 4: UARL ADDR[0].x, TEMP[1].xxxx 5: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 6: MAD TEMP[1].x, IMM[0].zzzz, IN[1].zzzz, IMM[1].xxxx 7: F2I TEMP[1].x, TEMP[1].xxxx 8: UARL ADDR[0].x, TEMP[1].xxxx 9: DP4 TEMP[1].x, IN[2], CONST[ADDR[0].x] 10: MOV TEMP[0].y, TEMP[1].xxxx 11: MAD TEMP[1].x, IMM[0].zzzz, IN[1].zzzz, IMM[1].yyyy 12: F2I TEMP[1].x, TEMP[1].xxxx 13: UARL ADDR[0].x, TEMP[1].xxxx 14: MOV TEMP[1], CONST[ADDR[0].x] 15: MAD TEMP[2].x, IMM[0].zzzz, IN[1].zzzz, IMM[1].zzzz 16: F2I TEMP[2].x, TEMP[2].xxxx 17: UARL ADDR[0].x, TEMP[2].xxxx 18: MOV TEMP[2], CONST[ADDR[0].x] 19: MOV OUT[2], IN[1] 20: MOV OUT[3], TEMP[1] 21: MOV OUT[1], IN[0] 22: MOV OUT[4], TEMP[2] 23: MOV OUT[0], TEMP[0] 24: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 %16 = add i32 %5, %8 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %16) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = add i32 %5, %8 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = extractelement <4 x float> %25, i32 3 %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %28, 0x408FE051E0000000 %39 = fadd float %38, 0x4000CCCCC0000000 %40 = fptosi float %39 to i32 %41 = shl i32 %40, 4 %42 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %41) %43 = shl i32 %40, 4 %44 = or i32 %43, 4 %45 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %44) %46 = shl i32 %40, 4 %47 = or i32 %46, 8 %48 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %47) %49 = shl i32 %40, 4 %50 = or i32 %49, 12 %51 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %50) %52 = fmul float %34, %42 %53 = fmul float %35, %45 %54 = fadd float %52, %53 %55 = fmul float %36, %48 %56 = fadd float %54, %55 %57 = fmul float %37, %51 %58 = fadd float %56, %57 %59 = fmul float %28, 0x408FE051E0000000 %60 = fadd float %59, 0x4008CCCCC0000000 %61 = fptosi float %60 to i32 %62 = shl i32 %61, 4 %63 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %62) %64 = shl i32 %61, 4 %65 = or i32 %64, 4 %66 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %65) %67 = shl i32 %61, 4 %68 = or i32 %67, 8 %69 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %68) %70 = shl i32 %61, 4 %71 = or i32 %70, 12 %72 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %71) %73 = fmul float %34, %63 %74 = fmul float %35, %66 %75 = fadd float %73, %74 %76 = fmul float %36, %69 %77 = fadd float %75, %76 %78 = fmul float %37, %72 %79 = fadd float %77, %78 %80 = fmul float %28, 0x408FE051E0000000 %81 = fadd float %80, 0x3FB99999A0000000 %82 = fptosi float %81 to i32 %83 = shl i32 %82, 4 %84 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %83) %85 = shl i32 %82, 4 %86 = or i32 %85, 4 %87 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %86) %88 = shl i32 %82, 4 %89 = or i32 %88, 8 %90 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %89) %91 = shl i32 %82, 4 %92 = or i32 %91, 12 %93 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %92) %94 = fmul float %28, 0x408FE051E0000000 %95 = fadd float %94, 0x3FF19999A0000000 %96 = fptosi float %95 to i32 %97 = shl i32 %96, 4 %98 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %97) %99 = shl i32 %96, 4 %100 = or i32 %99, 4 %101 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %100) %102 = shl i32 %96, 4 %103 = or i32 %102, 8 %104 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %103) %105 = shl i32 %96, 4 %106 = or i32 %105, 12 %107 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %106) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %26, float %27, float %28, float %29) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %84, float %87, float %90, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %98, float %101, float %104, float %107) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %79, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_mov_b32_e32 v1, 0x447f028f ; 7E0202FF 447F028F v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[3:6], v0, s[4:7], 0 idxen ; E00C2000 80010300 buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00 s_waitcnt vmcnt(2) ; BF8C0772 exp 15, 32, 0, 0, 0, v3, v4, v5, v6 ; F800020F 06050403 s_waitcnt vmcnt(1) ; BF8C0771 v_madak_f32_e32 v0, v9, v1, 0x40066666 ; 42000309 40066666 s_waitcnt expcnt(0) ; BF8C070F v_madak_f32_e32 v3, v9, v1, 0x40466666 ; 42060309 40466666 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v3, v3 ; 7E061103 v_madak_f32_e32 v4, v9, v1, 0x3dcccccd ; 42080309 3DCCCCCD v_madak_f32_e32 v1, v9, v1, 0x3f8ccccd ; 42020309 3F8CCCCD v_cvt_i32_f32_e32 v4, v4 ; 7E081104 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v3, 4, v3 ; 34060684 v_lshlrev_b32_e32 v4, 4, v4 ; 34080884 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 buffer_load_dword v5, v0, s[0:3], 0 offen ; E0301000 80000500 v_or_b32_e32 v6, 4, v0 ; 380C0084 v_or_b32_e32 v15, 8, v0 ; 381E0088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v16, v3, s[0:3], 0 offen ; E0301000 80001003 v_or_b32_e32 v17, 4, v3 ; 38220684 v_or_b32_e32 v18, 8, v3 ; 38240688 v_or_b32_e32 v3, 12, v3 ; 3806068C buffer_load_dword v19, v4, s[0:3], 0 offen ; E0301000 80001304 v_or_b32_e32 v20, 4, v4 ; 38280884 v_or_b32_e32 v21, 8, v4 ; 382A0888 v_or_b32_e32 v4, 12, v4 ; 3808088C buffer_load_dword v22, v1, s[0:3], 0 offen ; E0301000 80001601 v_or_b32_e32 v23, 4, v1 ; 382E0284 v_or_b32_e32 v24, 8, v1 ; 38300288 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F buffer_load_dword v18, v18, s[0:3], 0 offen ; E0301000 80001212 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 exp 15, 33, 0, 0, 0, v7, v8, v9, v10 ; F800021F 0A090807 s_waitcnt vmcnt(9) ; BF8C0779 exp 15, 34, 0, 0, 0, v19, v20, v21, v4 ; F800022F 04151413 s_waitcnt vmcnt(8) expcnt(0) ; BF8C0708 v_mul_f32_e32 v4, v6, v12 ; 10081906 s_waitcnt vmcnt(7) ; BF8C0777 v_mul_f32_e32 v6, v17, v12 ; 100C1911 v_mac_f32_e32 v4, v5, v11 ; 3E081705 v_mac_f32_e32 v6, v16, v11 ; 3E0C1710 s_waitcnt vmcnt(4) ; BF8C0774 exp 15, 35, 0, 0, 0, v22, v23, v24, v1 ; F800023F 01181716 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v4, v15, v13 ; 3E081B0F s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v6, v18, v13 ; 3E0C1B12 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v4, v0, v14 ; 3E081D00 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v6, v3, v14 ; 3E0C1D03 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v4, v6, v0, v2 ; F80008CF 02000604 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 28 Code Size: 424 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MAD TEMP[0], IN[0], IN[3], IN[2] 1: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %24 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %25 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %27 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) %36 = fmul float %23, %32 %37 = fadd float %36, %28 %38 = fmul float %24, %33 %39 = fadd float %38, %29 %40 = fmul float %25, %34 %41 = fadd float %40, %30 %42 = fmul float %26, %35 %43 = fadd float %42, %31 %44 = fmul float %43, %27 %45 = call i32 @llvm.SI.packf16(float %37, float %39) %46 = bitcast i32 %45 to float %47 = call i32 @llvm.SI.packf16(float %41, float %44) %48 = bitcast i32 %47 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %46, float %48, float %46, float %48) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 2, 3, [m0] ; C8340E00 v_interp_p2_f32 v13, [v13], v1, 2, 3, [m0] ; C8350E01 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 v_mac_f32_e32 v7, v11, v2 ; 3E0E050B v_mac_f32_e32 v8, v12, v3 ; 3E10070C v_mac_f32_e32 v9, v13, v4 ; 3E12090D v_mac_f32_e32 v10, v0, v5 ; 3E140B00 v_mul_f32_e32 v0, v6, v10 ; 10001506 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 v_cvt_pkrtz_f16_f32_e32 v0, v9, v0 ; 5E000109 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 148 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[2] 2: DP4 TEMP[1].x, IN[2], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MAD TEMP[1], IN[0], CONST[1], CONST[0] 5: MOV TEMP[2].xy, IN[1].xyxx 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: MOV OUT[2], TEMP[2] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 %46 = add i32 %5, %8 %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = fmul float %48, %22 %53 = fmul float %49, %23 %54 = fadd float %52, %53 %55 = fmul float %50, %24 %56 = fadd float %54, %55 %57 = fmul float %51, %25 %58 = fadd float %56, %57 %59 = fmul float %48, %26 %60 = fmul float %49, %27 %61 = fadd float %59, %60 %62 = fmul float %50, %28 %63 = fadd float %61, %62 %64 = fmul float %51, %29 %65 = fadd float %63, %64 %66 = fmul float %34, %18 %67 = fadd float %66, %14 %68 = fmul float %35, %19 %69 = fadd float %68, %15 %70 = fmul float %36, %20 %71 = fadd float %70, %16 %72 = fmul float %37, %21 %73 = fadd float %72, %17 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %67, float %69, float %71, float %73) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %42, float %43, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %65, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103 s_buffer_load_dword s20, s[0:3], 0x4 ; C20A0104 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s12, s[0:3], 0x9 ; C2060109 s_buffer_load_dword s13, s[0:3], 0xa ; C206810A s_buffer_load_dword s14, s[0:3], 0xb ; C207010B s_buffer_load_dword s15, s[0:3], 0xc ; C207810C s_buffer_load_dword s16, s[0:3], 0xd ; C208010D s_buffer_load_dword s17, s[0:3], 0xe ; C208810E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s8 ; 7E000208 v_mov_b32_e32 v12, s9 ; 7E180209 v_mov_b32_e32 v13, s10 ; 7E1A020A v_mov_b32_e32 v14, s11 ; 7E1C020B v_mac_f32_e32 v0, s20, v2 ; 3E000414 v_mac_f32_e32 v12, s4, v3 ; 3E180604 v_mac_f32_e32 v13, s5, v4 ; 3E1A0805 v_mac_f32_e32 v14, s6, v5 ; 3E1C0A06 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, s12, v9 ; 1004120C exp 15, 32, 0, 0, 0, v0, v12, v13, v14 ; F800020F 0E0D0C00 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s16, v9 ; 10001210 v_mac_f32_e32 v2, s7, v8 ; 3E041007 v_mac_f32_e32 v0, s15, v8 ; 3E00100F v_mac_f32_e32 v2, s13, v10 ; 3E04140D v_mac_f32_e32 v0, s17, v10 ; 3E001411 exp 15, 33, 0, 0, 0, v6, v7, v0, v0 ; F800021F 00000706 v_mac_f32_e32 v2, s14, v11 ; 3E04160E s_waitcnt expcnt(0) ; BF8C070F v_mac_f32_e32 v0, s0, v11 ; 3E001600 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 12, 0, 1, 0, v2, v0, v3, v1 ; F80008CF 01030002 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 232 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MOV TEMP[1].xy, IN[1].xyyy 2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 3: MUL TEMP[1].x, IN[0].wwww, TEMP[1].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %33 = bitcast float %31 to i32 %34 = bitcast float %32 to i32 %35 = insertelement <2 x i32> undef, i32 %33, i32 0 %36 = insertelement <2 x i32> %35, i32 %34, i32 1 %37 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %36, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %38 = extractelement <4 x float> %37, i32 3 %39 = fmul float %30, %38 %40 = call i32 @llvm.SI.packf16(float %27, float %28) %41 = bitcast i32 %40 to float %42 = call i32 @llvm.SI.packf16(float %29, float %39) %43 = bitcast i32 %42 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %41, float %43, float %41, float %43) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800800 00020006 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 104 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..47] DCL TEMP[0..1], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 510.0200, 0.1000} IMM[1] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MAD TEMP[1].x, IMM[0].zzzz, IN[1].zzzz, IMM[0].wwww 2: F2I TEMP[1].x, TEMP[1].xxxx 3: UARL ADDR[0].x, TEMP[1].xxxx 4: UARL ADDR[0].x, TEMP[1].xxxx 5: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 6: MAD TEMP[1].x, IMM[0].zzzz, IN[1].zzzz, IMM[1].xxxx 7: F2I TEMP[1].x, TEMP[1].xxxx 8: UARL ADDR[0].x, TEMP[1].xxxx 9: DP4 TEMP[1].x, IN[2], CONST[ADDR[0].x] 10: MOV TEMP[0].y, TEMP[1].xxxx 11: MOV OUT[2], IN[1] 12: MOV OUT[1], IN[0] 13: MOV OUT[0], TEMP[0] 14: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 %16 = add i32 %5, %8 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %16) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = add i32 %5, %8 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = extractelement <4 x float> %25, i32 3 %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %28, 0x407FE051E0000000 %39 = fadd float %38, 0x3FB99999A0000000 %40 = fptosi float %39 to i32 %41 = shl i32 %40, 4 %42 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %41) %43 = shl i32 %40, 4 %44 = or i32 %43, 4 %45 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %44) %46 = shl i32 %40, 4 %47 = or i32 %46, 8 %48 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %47) %49 = shl i32 %40, 4 %50 = or i32 %49, 12 %51 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %50) %52 = fmul float %34, %42 %53 = fmul float %35, %45 %54 = fadd float %52, %53 %55 = fmul float %36, %48 %56 = fadd float %54, %55 %57 = fmul float %37, %51 %58 = fadd float %56, %57 %59 = fmul float %28, 0x407FE051E0000000 %60 = fadd float %59, 0x3FF19999A0000000 %61 = fptosi float %60 to i32 %62 = shl i32 %61, 4 %63 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %62) %64 = shl i32 %61, 4 %65 = or i32 %64, 4 %66 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %65) %67 = shl i32 %61, 4 %68 = or i32 %67, 8 %69 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %68) %70 = shl i32 %61, 4 %71 = or i32 %70, 12 %72 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %71) %73 = fmul float %34, %63 %74 = fmul float %35, %66 %75 = fadd float %73, %74 %76 = fmul float %36, %69 %77 = fadd float %75, %76 %78 = fmul float %37, %72 %79 = fadd float %77, %78 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %26, float %27, float %28, float %29) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %79, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_mov_b32_e32 v1, 0x43ff028f ; 7E0202FF 43FF028F v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[3:6], v0, s[4:7], 0 idxen ; E00C2000 80010300 buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00 s_waitcnt vmcnt(2) ; BF8C0772 exp 15, 32, 0, 0, 0, v3, v4, v5, v6 ; F800020F 06050403 s_waitcnt vmcnt(1) ; BF8C0771 v_madak_f32_e32 v0, v9, v1, 0x3dcccccd ; 42000309 3DCCCCCD v_madak_f32_e32 v1, v9, v1, 0x3f8ccccd ; 42020309 3F8CCCCD v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 s_waitcnt expcnt(0) ; BF8C070F buffer_load_dword v3, v0, s[0:3], 0 offen ; E0301000 80000300 v_or_b32_e32 v4, 4, v0 ; 38080084 v_or_b32_e32 v5, 8, v0 ; 380A0088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v6, v1, s[0:3], 0 offen ; E0301000 80000601 v_or_b32_e32 v15, 4, v1 ; 381E0284 buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F v_or_b32_e32 v16, 8, v1 ; 38200288 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 exp 15, 33, 0, 0, 0, v7, v8, v9, v10 ; F800021F 0A090807 s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v4, v4, v12 ; 10081904 s_waitcnt vmcnt(4) expcnt(0) ; BF8C0704 v_mul_f32_e32 v7, v15, v12 ; 100E190F v_mac_f32_e32 v4, v3, v11 ; 3E081703 v_mac_f32_e32 v7, v6, v11 ; 3E0E1706 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v4, v5, v13 ; 3E081B05 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v7, v16, v13 ; 3E0E1B10 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v4, v0, v14 ; 3E081D00 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v7, v1, v14 ; 3E0E1D01 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v4, v7, v0, v2 ; F80008CF 02000704 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 20 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MUL TEMP[1].x, IN[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %24 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %25 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %27 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %28 = fmul float %26, %27 %29 = call i32 @llvm.SI.packf16(float %23, float %24) %30 = bitcast i32 %29 to float %31 = call i32 @llvm.SI.packf16(float %25, float %28) %32 = bitcast i32 %31 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %30, float %32, float %30, float %32) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL CONST[0..54] DCL TEMP[0..2], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[0].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[0].xxxx 8: MOV TEMP[0].xw, IN[0].xxxw 9: MOV TEMP[1].xy, IN[1].xyxx 10: MAD TEMP[2].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 11: MOV TEMP[0].z, TEMP[2].xxxx 12: MOV TEMP[0].y, -IN[0].yyyy 13: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 14: MOV OUT[2], TEMP[1] 15: MOV OUT[0], TEMP[0] 16: MOV OUT[1], IN[0] 17: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = add i32 %5, %8 %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %25) %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = extractelement <4 x float> %26, i32 2 %30 = extractelement <4 x float> %26, i32 3 %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = add i32 %5, %8 %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %33) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = fmul float %15, %35 %38 = fmul float %16, %36 %39 = fadd float %38, %37 %40 = fadd float %39, %17 %41 = fmul float %18, %35 %42 = fmul float %19, %36 %43 = fadd float %42, %41 %44 = fadd float %43, %20 %45 = fmul float %29, %14 %46 = fsub float %45, %30 %47 = fmul float %21, %30 %48 = fadd float %47, %27 %49 = fmul float %22, %30 %50 = fsub float %49, %28 %51 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = call float @llvm.SI.load.const(<16 x i8> %52, i32 0) %54 = fmul float %53, %27 %55 = call float @llvm.SI.load.const(<16 x i8> %52, i32 4) %56 = fmul float %55, %28 %57 = fadd float %54, %56 %58 = call float @llvm.SI.load.const(<16 x i8> %52, i32 8) %59 = fmul float %58, %29 %60 = fadd float %57, %59 %61 = call float @llvm.SI.load.const(<16 x i8> %52, i32 12) %62 = fmul float %61, %30 %63 = fadd float %60, %62 %64 = call float @llvm.SI.load.const(<16 x i8> %52, i32 16) %65 = fmul float %64, %27 %66 = call float @llvm.SI.load.const(<16 x i8> %52, i32 20) %67 = fmul float %66, %28 %68 = fadd float %65, %67 %69 = call float @llvm.SI.load.const(<16 x i8> %52, i32 24) %70 = fmul float %69, %29 %71 = fadd float %68, %70 %72 = call float @llvm.SI.load.const(<16 x i8> %52, i32 28) %73 = fmul float %72, %30 %74 = fadd float %71, %73 %75 = call float @llvm.SI.load.const(<16 x i8> %52, i32 32) %76 = fmul float %75, %27 %77 = call float @llvm.SI.load.const(<16 x i8> %52, i32 36) %78 = fmul float %77, %28 %79 = fadd float %76, %78 %80 = call float @llvm.SI.load.const(<16 x i8> %52, i32 40) %81 = fmul float %80, %29 %82 = fadd float %79, %81 %83 = call float @llvm.SI.load.const(<16 x i8> %52, i32 44) %84 = fmul float %83, %30 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %52, i32 48) %87 = fmul float %86, %27 %88 = call float @llvm.SI.load.const(<16 x i8> %52, i32 52) %89 = fmul float %88, %28 %90 = fadd float %87, %89 %91 = call float @llvm.SI.load.const(<16 x i8> %52, i32 56) %92 = fmul float %91, %29 %93 = fadd float %90, %92 %94 = call float @llvm.SI.load.const(<16 x i8> %52, i32 60) %95 = fmul float %94, %30 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %52, i32 64) %98 = fmul float %97, %27 %99 = call float @llvm.SI.load.const(<16 x i8> %52, i32 68) %100 = fmul float %99, %28 %101 = fadd float %98, %100 %102 = call float @llvm.SI.load.const(<16 x i8> %52, i32 72) %103 = fmul float %102, %29 %104 = fadd float %101, %103 %105 = call float @llvm.SI.load.const(<16 x i8> %52, i32 76) %106 = fmul float %105, %30 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %52, i32 80) %109 = fmul float %108, %27 %110 = call float @llvm.SI.load.const(<16 x i8> %52, i32 84) %111 = fmul float %110, %28 %112 = fadd float %109, %111 %113 = call float @llvm.SI.load.const(<16 x i8> %52, i32 88) %114 = fmul float %113, %29 %115 = fadd float %112, %114 %116 = call float @llvm.SI.load.const(<16 x i8> %52, i32 92) %117 = fmul float %116, %30 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %52, i32 96) %120 = fmul float %119, %27 %121 = call float @llvm.SI.load.const(<16 x i8> %52, i32 100) %122 = fmul float %121, %28 %123 = fadd float %120, %122 %124 = call float @llvm.SI.load.const(<16 x i8> %52, i32 104) %125 = fmul float %124, %29 %126 = fadd float %123, %125 %127 = call float @llvm.SI.load.const(<16 x i8> %52, i32 108) %128 = fmul float %127, %30 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %52, i32 112) %131 = fmul float %130, %27 %132 = call float @llvm.SI.load.const(<16 x i8> %52, i32 116) %133 = fmul float %132, %28 %134 = fadd float %131, %133 %135 = call float @llvm.SI.load.const(<16 x i8> %52, i32 120) %136 = fmul float %135, %29 %137 = fadd float %134, %136 %138 = call float @llvm.SI.load.const(<16 x i8> %52, i32 124) %139 = fmul float %138, %30 %140 = fadd float %137, %139 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %35, float %36, float %40, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %48, float %50, float %46, float %30) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %63, float %74, float %85, float %96) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %107, float %118, float %129, float %140) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xd8 ; C20785D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s16, s[0:3], 0x3 ; C2080103 s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 s_buffer_load_dword s18, s[0:3], 0x5 ; C2090105 s_buffer_load_dword s19, s[0:3], 0x6 ; C2098106 s_buffer_load_dword s20, s[0:3], 0x7 ; C20A0107 s_buffer_load_dword s21, s[0:3], 0x8 ; C20A8108 s_buffer_load_dword s22, s[0:3], 0x9 ; C20B0109 s_buffer_load_dword s23, s[0:3], 0xa ; C20B810A s_buffer_load_dword s24, s[0:3], 0xb ; C20C010B s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C s_buffer_load_dword s26, s[0:3], 0xd ; C20D010D s_buffer_load_dword s27, s[0:3], 0xe ; C20D810E s_buffer_load_dword s28, s[0:3], 0xf ; C20E010F s_buffer_load_dword s29, s[0:3], 0x10 ; C20E8110 s_buffer_load_dword s30, s[0:3], 0x11 ; C20F0111 s_buffer_load_dword s31, s[0:3], 0x12 ; C20F8112 s_buffer_load_dword s32, s[0:3], 0x13 ; C2100113 s_buffer_load_dword s33, s[0:3], 0x14 ; C2108114 s_buffer_load_dword s34, s[0:3], 0x15 ; C2110115 s_buffer_load_dword s35, s[0:3], 0x16 ; C2118116 s_buffer_load_dword s36, s[0:3], 0x17 ; C2120117 s_buffer_load_dword s37, s[0:3], 0x18 ; C2128118 s_buffer_load_dword s38, s[0:3], 0x19 ; C2130119 s_buffer_load_dword s39, s[0:3], 0x1a ; C213811A s_buffer_load_dword s40, s[0:3], 0x1b ; C214011B s_buffer_load_dword s41, s[0:3], 0x1c ; C214811C s_buffer_load_dword s42, s[0:3], 0x1d ; C215011D s_buffer_load_dword s43, s[0:3], 0x1e ; C215811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mul_f32_e32 v8, s6, v2 ; 10100406 v_mul_f32_e32 v9, s18, v2 ; 10120412 v_mul_f32_e32 v10, s22, v2 ; 10140416 v_mul_f32_e32 v11, s26, v2 ; 1016041A v_mul_f32_e32 v12, s30, v2 ; 1018041E v_mul_f32_e32 v13, s34, v2 ; 101A0422 v_mul_f32_e32 v14, s38, v2 ; 101C0426 v_mul_f32_e32 v15, s42, v2 ; 101E042A v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s5, v1 ; 3E100205 v_mac_f32_e32 v9, s17, v1 ; 3E120211 v_mac_f32_e32 v10, s21, v1 ; 3E140215 v_mac_f32_e32 v11, s25, v1 ; 3E160219 v_mac_f32_e32 v12, s29, v1 ; 3E18021D v_mac_f32_e32 v13, s33, v1 ; 3E1A0221 v_mac_f32_e32 v14, s37, v1 ; 3E1C0225 v_mac_f32_e32 v15, s41, v1 ; 3E1E0229 v_mac_f32_e32 v8, s7, v3 ; 3E100607 v_mac_f32_e32 v9, s19, v3 ; 3E120613 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s23, v3 ; 3E140617 v_mac_f32_e32 v11, s27, v3 ; 3E16061B v_mac_f32_e32 v12, s31, v3 ; 3E18061F s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v1, s15, v4, v1 ; D2820001 0406080F v_mad_f32 v2, s4, v4, -v2 ; D2820002 840A0804 v_mac_f32_e32 v13, s35, v3 ; 3E1A0623 v_mac_f32_e32 v14, s39, v3 ; 3E1C0627 v_mac_f32_e32 v15, s43, v3 ; 3E1E062B v_mac_f32_e32 v8, s16, v4 ; 3E100810 v_mac_f32_e32 v9, s20, v4 ; 3E120814 v_mac_f32_e32 v10, s24, v4 ; 3E140818 v_mac_f32_e32 v11, s28, v4 ; 3E16081C v_mac_f32_e32 v12, s32, v4 ; 3E180820 v_mac_f32_e32 v13, s36, v4 ; 3E1A0824 v_mac_f32_e32 v14, s40, v4 ; 3E1C0828 v_mac_f32_e32 v15, s0, v4 ; 3E1E0800 exp 15, 12, 0, 0, 0, v1, v2, v0, v4 ; F80000CF 04000201 exp 15, 13, 0, 0, 0, v8, v9, v10, v11 ; F80000DF 0B0A0908 exp 15, 14, 0, 1, 0, v12, v13, v14, v15 ; F80008EF 0F0E0D0C s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 16 Code Size: 432 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.9960, 1.0000, 0.0000, -1.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D 2: ADD TEMP[0].x, -TEMP[0].wwww, IMM[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1].x, TEMP[1], SAMP[2], 2D 5: ADD TEMP[1].x, -TEMP[1].xxxx, IMM[0].yyyy 6: FSGE TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz 7: UIF TEMP[0].xxxx :0 8: MOV TEMP[0].x, TEMP[1].xxxx 9: ELSE :0 10: MOV TEMP[0].x, IMM[0].yyyy 11: ENDIF 12: MOV TEMP[1].xy, IN[0].xyyy 13: TEX TEMP[1].xyz, TEMP[1], SAMP[1], 2D 14: ADD TEMP[2].x, TEMP[1].xxxx, -CONST[3].yyyy 15: MAD TEMP[2].x, TEMP[0].xxxx, TEMP[2].xxxx, CONST[3].yyyy 16: ADD TEMP[3].x, TEMP[1].yyyy, IMM[0].wwww 17: MAD TEMP[1].xy, TEMP[1].zzzz, IMM[0].yzzz, IMM[0].zyyy 18: MOV TEMP[2].zw, TEMP[1].yyxy 19: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx, IMM[0].yyyy 20: MOV TEMP[2].y, TEMP[0].xxxx 21: MOV OUT[0], TEMP[2] 22: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 52) %26 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %27 = load <8 x i32>, <8 x i32> addrspace(2)* %26, align 32, !tbaa !0 %28 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %29 = load <4 x i32>, <4 x i32> addrspace(2)* %28, align 16, !tbaa !0 %30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, align 32, !tbaa !0 %32 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %35 = load <8 x i32>, <8 x i32> addrspace(2)* %34, align 32, !tbaa !0 %36 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %37 = load <4 x i32>, <4 x i32> addrspace(2)* %36, align 16, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %40 = bitcast float %38 to i32 %41 = bitcast float %39 to i32 %42 = insertelement <2 x i32> undef, i32 %40, i32 0 %43 = insertelement <2 x i32> %42, i32 %41, i32 1 %44 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %43, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %45 = extractelement <4 x float> %44, i32 3 %46 = fsub float 0x3FEFDF3B60000000, %45 %47 = bitcast float %38 to i32 %48 = bitcast float %39 to i32 %49 = insertelement <2 x i32> undef, i32 %47, i32 0 %50 = insertelement <2 x i32> %49, i32 %48, i32 1 %51 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %50, <8 x i32> %35, <4 x i32> %37, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %52 = extractelement <4 x float> %51, i32 0 %53 = fsub float 1.000000e+00, %52 %54 = fcmp oge float %46, 0.000000e+00 %. = select i1 %54, float %53, float 1.000000e+00 %55 = bitcast float %38 to i32 %56 = bitcast float %39 to i32 %57 = insertelement <2 x i32> undef, i32 %55, i32 0 %58 = insertelement <2 x i32> %57, i32 %56, i32 1 %59 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %58, <8 x i32> %31, <4 x i32> %33, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %60 = extractelement <4 x float> %59, i32 0 %61 = extractelement <4 x float> %59, i32 1 %62 = extractelement <4 x float> %59, i32 2 %63 = fsub float %60, %25 %64 = fmul float %., %63 %65 = fadd float %64, %25 %66 = fadd float %61, -1.000000e+00 %67 = fadd float %62, 0.000000e+00 %68 = fmul float %62, 0.000000e+00 %69 = fadd float %68, 1.000000e+00 %70 = fmul float %., %66 %71 = fadd float %70, 1.000000e+00 %72 = call i32 @llvm.SI.packf16(float %65, float %71) %73 = bitcast i32 %72 to float %74 = call i32 @llvm.SI.packf16(float %67, float %69) %75 = bitcast i32 %74 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %73, float %75, float %73, float %75) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx8 s[20:27], s[6:7], 0x10 ; C0CA0710 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 s_mov_b32 m0, s10 ; BEFC030A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0xd ; C200010D v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[28:31] ; F0800800 00E30002 image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[36:39] ; F0800100 01250102 image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[40:47], s[32:35] ; F0800700 010A0202 s_waitcnt vmcnt(2) ; BF8C0772 v_sub_f32_e32 v0, 0x3f7ef9db, v0 ; 080000FF 3F7EF9DB s_waitcnt vmcnt(1) ; BF8C0771 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_cmp_le_f32_e32 vcc, 0, v0 ; 7C060080 v_cndmask_b32_e32 v0, 1.0, v1 ; 000002F2 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_subrev_f32_e32 v1, s0, v2 ; 0A020400 v_mad_f32 v1, v0, v1, s0 ; D2820001 00020300 v_add_f32_e32 v2, -1.0, v3 ; 060406F3 v_add_f32_e32 v3, 0, v4 ; 06060880 v_mad_f32 v4, 0, v4, 1.0 ; D2820004 03CA0880 v_mad_f32 v0, v0, v2, 1.0 ; D2820000 03CA0500 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 8 Code Size: 172 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 1.0000, 0.1000, 0.7500, 0.0800} IMM[1] FLT32 { 6.6667, -2.0000, 3.0000, -0.1000} IMM[2] FLT32 { 0.3000, 0.5900, 0.1100, 10.0000} IMM[3] FLT32 { 0.2500, -1.0000, -0.0800, 14.2857} IMM[4] FLT32 { 0.0050, 2.0000, 0.0000, 0.0000} 0: SQRT TEMP[0].x, CONST[3].wwww 1: ADD TEMP[1].x, -CONST[1].xxxx, CONST[0].wwww 2: ADD TEMP[2].x, -CONST[1].yyyy, CONST[1].wwww 3: MOV TEMP[1].y, TEMP[2].xxxx 4: ADD TEMP[3].x, -CONST[1].zzzz, CONST[2].wwww 5: MOV TEMP[1].z, TEMP[3].xxxx 6: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[1].xyzz, CONST[1].xyzz 7: ADD TEMP[1].xyz, -CONST[1].xyzz, CONST[2].xyzz 8: MAD TEMP[2].xyz, CONST[3].wwww, TEMP[1].xyzz, CONST[1].xyzz 9: MOV TEMP[3].xy, IN[1].zwww 10: TEX TEMP[3], TEMP[3], SAMP[4], 2D 11: MUL TEMP[4].x, TEMP[3].yyyy, TEMP[3].xxxx 12: MUL TEMP[5].xy, IMM[0].zyyy, CONST[3].wwww 13: MOV TEMP[6].xy, IN[0].xyyy 14: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 15: MAD TEMP[7].x, TEMP[6].xxxx, TEMP[6].yyyy, -TEMP[5].yyyy 16: MOV_SAT TEMP[7].x, TEMP[7].xxxx 17: MAD TEMP[4].x, TEMP[4].xxxx, -TEMP[3].zzzz, TEMP[7].xxxx 18: ADD TEMP[4].x, TEMP[4].xxxx, IMM[0].wwww 19: MUL TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx 20: MOV_SAT TEMP[4].x, TEMP[4].xxxx 21: MAD TEMP[7].x, TEMP[4].xxxx, IMM[1].yyyy, IMM[1].zzzz 22: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx 23: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[7].xxxx 24: LRP TEMP[7].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[0].xyzz 25: MOV TEMP[8].xy, IN[0].zwww 26: TEX TEMP[8].xyz, TEMP[8], SAMP[5], 2D 27: MUL TEMP[0].xyz, TEMP[8].xyzz, TEMP[7].xyzz 28: DP3 TEMP[8].x, TEMP[8].xyzz, IMM[2].xyzz 29: MAD TEMP[2].xyz, CONST[0].xyzz, TEMP[8].xxxx, -TEMP[0].xyzz 30: MOV TEMP[8].xy, IN[1].xyyy 31: TEX TEMP[8].y, TEMP[8], SAMP[1], 2D 32: MUL TEMP[8].x, TEMP[6].yyyy, TEMP[8].yyyy 33: MUL TEMP[9].x, TEMP[6].xxxx, TEMP[6].xxxx 34: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 35: MAD TEMP[8].x, TEMP[8].xxxx, CONST[3].wwww, IMM[1].wwww 36: MUL TEMP[8].x, TEMP[8].xxxx, IMM[2].wwww 37: MOV_SAT TEMP[8].x, TEMP[8].xxxx 38: MAD TEMP[9].x, TEMP[8].xxxx, IMM[1].yyyy, IMM[1].zzzz 39: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[8].xxxx 40: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 41: MAD TEMP[0].xyz, TEMP[8].xxxx, TEMP[2].xyzz, TEMP[0].xyzz 42: ADD TEMP[2].x, -TEMP[6].xxxx, IMM[0].xxxx 43: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 44: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 45: MAD TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx, TEMP[5].xxxx 46: ADD TEMP[3], TEMP[3], IMM[3].yyyy 47: MAD TEMP[7], TEMP[2].xxxx, TEMP[3], IMM[0].xxxx 48: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz 49: DP3 TEMP[3].x, TEMP[0].xyzz, IMM[2].xyzz 50: ADD TEMP[2].x, TEMP[3].xxxx, IMM[3].zzzz 51: MUL TEMP[2].x, TEMP[2].xxxx, IMM[3].wwww 52: MOV_SAT TEMP[3].x, TEMP[2].xxxx 53: MAD TEMP[5].x, TEMP[3].xxxx, IMM[1].yyyy, IMM[1].zzzz 54: MUL TEMP[2].x, TEMP[3].xxxx, TEMP[3].xxxx 55: MAD TEMP[2].x, TEMP[5].xxxx, -TEMP[2].xxxx, IMM[0].xxxx 56: MAD TEMP[2].xyz, TEMP[2].xxxx, IMM[4].xxxx, TEMP[0].xyzz 57: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].zzzz 58: MAD TEMP[0].xyz, TEMP[2].xyzz, IMM[4].yyyy, TEMP[0].xyzz 59: MOV_SAT TEMP[3].xyz, TEMP[0].xyzz 60: MUL TEMP[2].xyz, TEMP[6].yyyy, TEMP[3].xyzz 61: MOV TEMP[5].xy, IN[0].xyyy 62: TEX TEMP[5], TEMP[5], SAMP[2], 2D 63: MAD TEMP[0].xyz, TEMP[3].xyzz, -TEMP[6].yyyy, TEMP[5].xyzz 64: MUL TEMP[1].x, TEMP[6].yyyy, CONST[3].zzzz 65: MUL TEMP[1].x, TEMP[7].wwww, TEMP[1].xxxx 66: MOV TEMP[3].xy, IN[0].xyyy 67: TEX TEMP[3].x, TEMP[3], SAMP[3], 2D 68: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx 69: MAD TEMP[2].xyz, TEMP[3].xxxx, TEMP[0].xyzz, TEMP[2].xyzz 70: MAD TEMP[0].x, TEMP[8].xxxx, -CONST[3].wwww, IMM[0].xxxx 71: MUL TEMP[6].x, TEMP[0].xxxx, TEMP[4].xxxx 72: MAD TEMP[0].x, TEMP[4].xxxx, -TEMP[0].xxxx, IMM[0].xxxx 73: MAD TEMP[0].x, TEMP[8].xxxx, TEMP[0].xxxx, TEMP[6].xxxx 74: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx 75: MAD TEMP[1].x, TEMP[0].xxxx, -CONST[3].xxxx, TEMP[5].wwww 76: MUL TEMP[0].x, TEMP[0].xxxx, CONST[3].xxxx 77: MAD TEMP[0].x, TEMP[3].xxxx, TEMP[1].xxxx, TEMP[0].xxxx 78: MOV TEMP[2].w, TEMP[0].xxxx 79: MOV OUT[0], TEMP[2] 80: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 48) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 56) %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %40 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %41 = load <8 x i32>, <8 x i32> addrspace(2)* %40, align 32, !tbaa !0 %42 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %43 = load <4 x i32>, <4 x i32> addrspace(2)* %42, align 16, !tbaa !0 %44 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %45 = load <8 x i32>, <8 x i32> addrspace(2)* %44, align 32, !tbaa !0 %46 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 %52 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 %54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %55 = load <4 x i32>, <4 x i32> addrspace(2)* %54, align 16, !tbaa !0 %56 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %59 = load <4 x i32>, <4 x i32> addrspace(2)* %58, align 16, !tbaa !0 %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0 %64 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %65 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %66 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %67 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %68 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %69 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %70 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %71 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %72 = call float @llvm.sqrt.f32(float %39) %73 = fsub float %28, %29 %74 = fsub float %32, %30 %75 = fsub float %36, %31 %76 = fmul float %72, %73 %77 = fadd float %76, %29 %78 = fmul float %72, %74 %79 = fadd float %78, %30 %80 = fmul float %72, %75 %81 = fadd float %80, %31 %82 = fsub float %33, %29 %83 = fsub float %34, %30 %84 = fsub float %35, %31 %85 = fmul float %39, %82 %86 = fadd float %85, %29 %87 = fmul float %39, %83 %88 = fadd float %87, %30 %89 = fmul float %39, %84 %90 = fadd float %89, %31 %91 = bitcast float %70 to i32 %92 = bitcast float %71 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %94, <8 x i32> %57, <4 x i32> %59, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = extractelement <4 x float> %95, i32 2 %99 = extractelement <4 x float> %95, i32 3 %100 = fmul float %97, %96 %101 = fmul float %39, 7.500000e-01 %102 = fmul float %39, 0x3FB99999A0000000 %103 = bitcast float %64 to i32 %104 = bitcast float %65 to i32 %105 = insertelement <2 x i32> undef, i32 %103, i32 0 %106 = insertelement <2 x i32> %105, i32 %104, i32 1 %107 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %106, <8 x i32> %41, <4 x i32> %43, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %108 = extractelement <4 x float> %107, i32 0 %109 = extractelement <4 x float> %107, i32 1 %110 = extractelement <4 x float> %107, i32 2 %111 = fmul float %108, %109 %112 = fsub float %111, %102 %113 = call float @llvm.AMDIL.clamp.(float %112, float 0.000000e+00, float 1.000000e+00) %114 = fmul float %98, %100 %115 = fsub float %113, %114 %116 = fadd float %115, 0x3FB47AE140000000 %117 = fmul float %116, 0x401AAAAAA0000000 %118 = call float @llvm.AMDIL.clamp.(float %117, float 0.000000e+00, float 1.000000e+00) %119 = fmul float %118, -2.000000e+00 %120 = fadd float %119, 3.000000e+00 %121 = fmul float %118, %118 %122 = fmul float %121, %120 %123 = fsub float 1.000000e+00, %122 %124 = fmul float %86, %122 %125 = fmul float %77, %123 %126 = fadd float %124, %125 %127 = fsub float 1.000000e+00, %122 %128 = fmul float %88, %122 %129 = fmul float %79, %127 %130 = fadd float %128, %129 %131 = fsub float 1.000000e+00, %122 %132 = fmul float %90, %122 %133 = fmul float %81, %131 %134 = fadd float %132, %133 %135 = bitcast float %66 to i32 %136 = bitcast float %67 to i32 %137 = insertelement <2 x i32> undef, i32 %135, i32 0 %138 = insertelement <2 x i32> %137, i32 %136, i32 1 %139 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %138, <8 x i32> %61, <4 x i32> %63, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %140 = extractelement <4 x float> %139, i32 0 %141 = extractelement <4 x float> %139, i32 1 %142 = extractelement <4 x float> %139, i32 2 %143 = fmul float %140, %126 %144 = fmul float %141, %130 %145 = fmul float %142, %134 %146 = fmul float %140, 0x3FD3333340000000 %147 = fmul float %141, 0x3FE2E147A0000000 %148 = fadd float %147, %146 %149 = fmul float %142, 0x3FBC28F5C0000000 %150 = fadd float %148, %149 %151 = fmul float %25, %150 %152 = fsub float %151, %143 %153 = fmul float %26, %150 %154 = fsub float %153, %144 %155 = fmul float %27, %150 %156 = fsub float %155, %145 %157 = bitcast float %68 to i32 %158 = bitcast float %69 to i32 %159 = insertelement <2 x i32> undef, i32 %157, i32 0 %160 = insertelement <2 x i32> %159, i32 %158, i32 1 %161 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %160, <8 x i32> %45, <4 x i32> %47, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %162 = extractelement <4 x float> %161, i32 1 %163 = fmul float %109, %162 %164 = fmul float %108, %108 %165 = fmul float %163, %164 %166 = fmul float %165, %39 %167 = fadd float %166, 0xBFB99999A0000000 %168 = fmul float %167, 1.000000e+01 %169 = call float @llvm.AMDIL.clamp.(float %168, float 0.000000e+00, float 1.000000e+00) %170 = fmul float %169, -2.000000e+00 %171 = fadd float %170, 3.000000e+00 %172 = fmul float %169, %169 %173 = fmul float %172, %171 %174 = fmul float %173, %152 %175 = fadd float %174, %143 %176 = fmul float %173, %154 %177 = fadd float %176, %144 %178 = fmul float %173, %156 %179 = fadd float %178, %145 %180 = fsub float 1.000000e+00, %108 %181 = fmul float %180, %180 %182 = fmul float %181, %181 %183 = fmul float %182, 2.500000e-01 %184 = fadd float %183, %101 %185 = fadd float %96, -1.000000e+00 %186 = fadd float %97, -1.000000e+00 %187 = fadd float %98, -1.000000e+00 %188 = fadd float %99, -1.000000e+00 %189 = fmul float %184, %185 %190 = fadd float %189, 1.000000e+00 %191 = fmul float %184, %186 %192 = fadd float %191, 1.000000e+00 %193 = fmul float %184, %187 %194 = fadd float %193, 1.000000e+00 %195 = fmul float %184, %188 %196 = fadd float %195, 1.000000e+00 %197 = fmul float %175, %190 %198 = fmul float %177, %192 %199 = fmul float %179, %194 %200 = fmul float %197, 0x3FD3333340000000 %201 = fmul float %198, 0x3FE2E147A0000000 %202 = fadd float %201, %200 %203 = fmul float %199, 0x3FBC28F5C0000000 %204 = fadd float %202, %203 %205 = fadd float %204, 0xBFB47AE140000000 %206 = fmul float %205, 0x402C924920000000 %207 = call float @llvm.AMDIL.clamp.(float %206, float 0.000000e+00, float 1.000000e+00) %208 = fmul float %207, -2.000000e+00 %209 = fadd float %208, 3.000000e+00 %210 = fmul float %207, %207 %211 = fmul float %210, %209 %212 = fsub float 1.000000e+00, %211 %213 = fmul float %212, 0x3F747AE140000000 %214 = fadd float %213, %197 %215 = fmul float %212, 0x3F747AE140000000 %216 = fadd float %215, %198 %217 = fmul float %212, 0x3F747AE140000000 %218 = fadd float %217, %199 %219 = fmul float %214, %110 %220 = fmul float %216, %110 %221 = fmul float %218, %110 %222 = fmul float %219, 2.000000e+00 %223 = fadd float %222, %197 %224 = fmul float %220, 2.000000e+00 %225 = fadd float %224, %198 %226 = fmul float %221, 2.000000e+00 %227 = fadd float %226, %199 %228 = call float @llvm.AMDIL.clamp.(float %223, float 0.000000e+00, float 1.000000e+00) %229 = call float @llvm.AMDIL.clamp.(float %225, float 0.000000e+00, float 1.000000e+00) %230 = call float @llvm.AMDIL.clamp.(float %227, float 0.000000e+00, float 1.000000e+00) %231 = fmul float %109, %228 %232 = fmul float %109, %229 %233 = fmul float %109, %230 %234 = bitcast float %64 to i32 %235 = bitcast float %65 to i32 %236 = insertelement <2 x i32> undef, i32 %234, i32 0 %237 = insertelement <2 x i32> %236, i32 %235, i32 1 %238 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %237, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %239 = extractelement <4 x float> %238, i32 0 %240 = extractelement <4 x float> %238, i32 1 %241 = extractelement <4 x float> %238, i32 2 %242 = extractelement <4 x float> %238, i32 3 %243 = fmul float %109, %228 %244 = fsub float %239, %243 %245 = fmul float %109, %229 %246 = fsub float %240, %245 %247 = fmul float %109, %230 %248 = fsub float %241, %247 %249 = fmul float %109, %38 %250 = fmul float %196, %249 %251 = bitcast float %64 to i32 %252 = bitcast float %65 to i32 %253 = insertelement <2 x i32> undef, i32 %251, i32 0 %254 = insertelement <2 x i32> %253, i32 %252, i32 1 %255 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %254, <8 x i32> %53, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %256 = extractelement <4 x float> %255, i32 0 %257 = fsub float 1.000000e+00, %256 %258 = fmul float %257, %244 %259 = fadd float %258, %231 %260 = fmul float %257, %246 %261 = fadd float %260, %232 %262 = fmul float %257, %248 %263 = fadd float %262, %233 %264 = fmul float %39, %173 %265 = fsub float 1.000000e+00, %264 %266 = fmul float %265, %122 %267 = fmul float %265, %122 %268 = fsub float 1.000000e+00, %267 %269 = fmul float %173, %268 %270 = fadd float %269, %266 %271 = fmul float %270, %250 %272 = fmul float %37, %271 %273 = fsub float %242, %272 %274 = fmul float %271, %37 %275 = fmul float %257, %273 %276 = fadd float %275, %274 %277 = call i32 @llvm.SI.packf16(float %259, float %261) %278 = bitcast i32 %277 to float %279 = call i32 @llvm.SI.packf16(float %263, float %276) %280 = bitcast i32 %279 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %278, float %280, float %278, float %280) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A v_mov_b32_e32 v2, 0x3f400000 ; 7E0402FF 3F400000 v_mov_b32_e32 v3, 0xbdcccccd ; 7E0602FF BDCCCCCD v_mov_b32_e32 v4, 0x40400000 ; 7E0802FF 40400000 v_mov_b32_e32 v5, 0x3e99999a ; 7E0A02FF 3E99999A v_mov_b32_e32 v6, 0x3f170a3d ; 7E0C02FF 3F170A3D v_mov_b32_e32 v7, 0x3de147ae ; 7E0E02FF 3DE147AE v_mov_b32_e32 v8, 0xbda3d70a ; 7E1002FF BDA3D70A v_mov_b32_e32 v9, 0x3ba3d70a ; 7E1202FF 3BA3D70A v_interp_p1_f32 v10, v0, 0, 0, [m0] ; C8280000 v_interp_p2_f32 v10, [v10], v1, 0, 0, [m0] ; C8290001 v_interp_p1_f32 v11, v0, 1, 0, [m0] ; C82C0100 v_interp_p2_f32 v11, [v11], v1, 1, 0, [m0] ; C82D0101 v_interp_p1_f32 v12, v0, 2, 0, [m0] ; C8300200 v_interp_p2_f32 v12, [v12], v1, 2, 0, [m0] ; C8310201 v_interp_p1_f32 v13, v0, 3, 0, [m0] ; C8340300 v_interp_p2_f32 v13, [v13], v1, 3, 0, [m0] ; C8350301 v_interp_p1_f32 v14, v0, 0, 1, [m0] ; C8380400 v_interp_p2_f32 v14, [v14], v1, 0, 1, [m0] ; C8390401 v_interp_p1_f32 v15, v0, 1, 1, [m0] ; C83C0500 v_interp_p2_f32 v15, [v15], v1, 1, 1, [m0] ; C83D0501 v_interp_p1_f32 v16, v0, 2, 1, [m0] ; C8400600 v_interp_p2_f32 v16, [v16], v1, 2, 1, [m0] ; C8410601 v_interp_p1_f32 v17, v0, 3, 1, [m0] ; C8440700 v_interp_p2_f32 v17, [v17], v1, 3, 1, [m0] ; C8450701 s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[56:59], s[4:5], 0x0 ; C09C0500 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C s_load_dwordx4 s[24:27], s[4:5], 0x10 ; C08C0510 s_load_dwordx4 s[44:47], s[4:5], 0x14 ; C0960514 s_load_dwordx8 s[60:67], s[6:7], 0x0 ; C0DE0700 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[28:31], 0x0 ; C2009D00 s_buffer_load_dword s2, s[28:31], 0x1 ; C2011D01 s_buffer_load_dword s3, s[28:31], 0x2 ; C2019D02 s_buffer_load_dword s4, s[28:31], 0x3 ; C2021D03 s_buffer_load_dword s5, s[28:31], 0x4 ; C2029D04 s_buffer_load_dword s76, s[28:31], 0x5 ; C2261D05 s_buffer_load_dword s77, s[28:31], 0x6 ; C2269D06 s_buffer_load_dword s78, s[28:31], 0x7 ; C2271D07 s_buffer_load_dword s79, s[28:31], 0x8 ; C2279D08 s_buffer_load_dword s80, s[28:31], 0x9 ; C2281D09 s_buffer_load_dword s81, s[28:31], 0xa ; C2289D0A s_buffer_load_dword s82, s[28:31], 0xb ; C2291D0B s_buffer_load_dword s0, s[28:31], 0xc ; C2001D0C s_buffer_load_dword s83, s[28:31], 0xe ; C2299D0E s_buffer_load_dword s84, s[28:31], 0xf ; C22A1D0F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_sub_f32_e32 v1, s4, v0 ; 08020004 v_mov_b32_e32 v18, s76 ; 7E24024C v_mov_b32_e32 v19, s77 ; 7E26024D s_load_dwordx8 s[68:75], s[6:7], 0x20 ; C0E20720 s_load_dwordx8 s[48:55], s[6:7], 0x28 ; C0D80728 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[68:75], s[24:27] ; F0800F00 00D11410 s_load_dwordx8 s[24:31], s[6:7], 0x18 ; C0CC0718 image_sample v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[60:67], s[56:59] ; F0800700 01CF180A image_sample v[27:29], 7, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[48:55], s[44:47] ; F0800700 016C1B0C image_sample v12, 2, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[36:43], s[32:35] ; F0800200 01090C0E v_sqrt_f32_e32 v13, s84 ; 7E1A6654 v_mad_f32 v1, v13, v1, s5 ; D2820001 0016030D v_sub_f32_e32 v14, s78, v18 ; 081C244E v_mad_f32 v14, v13, v14, s76 ; D282000E 01321D0D v_sub_f32_e32 v15, s82, v19 ; 081E2652 v_mad_f32 v13, v13, v15, s77 ; D282000D 01361F0D v_sub_f32_e32 v15, s79, v0 ; 081E004F v_mad_f32 v0, s84, v15, v0 ; D2820000 04021E54 v_sub_f32_e32 v15, s80, v18 ; 081E2450 v_mad_f32 v15, s84, v15, v18 ; D282000F 044A1E54 v_sub_f32_e32 v16, s81, v19 ; 08202651 v_mad_f32 v16, s84, v16, v19 ; D2820010 044E2054 v_mul_f32_e32 v17, s84, v3 ; 10220654 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v17, v25, v24 ; 3E223119 v_add_f32_e64 v17, 0, v17 clamp ; D2060811 00022280 v_mul_f32_e32 v18, v20, v21 ; 10242B14 v_mad_f32 v17, -v22, v18, v17 ; D2820011 24462516 v_add_f32_e32 v17, 0x3da3d70a, v17 ; 062222FF 3DA3D70A v_mul_f32_e32 v17, 0x40d55555, v17 ; 102222FF 40D55555 v_add_f32_e64 v17, 0, v17 clamp ; D2060811 00022280 v_mad_f32 v18, -2.0, v17, v4 ; D2820012 041222F5 v_mul_f32_e32 v17, v17, v17 ; 10222311 v_mul_f32_e32 v17, v18, v17 ; 10222312 v_mad_f32 v1, -v17, v1, v1 ; D2820001 24060311 v_mac_f32_e32 v1, v17, v0 ; 3E020111 v_mad_f32 v0, -v17, v14, v14 ; D2820000 243A1D11 v_mac_f32_e32 v0, v17, v15 ; 3E001F11 v_mad_f32 v13, -v17, v13, v13 ; D282000D 24361B11 v_mac_f32_e32 v13, v17, v16 ; 3E1A2111 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v1, v1, v27 ; 10023701 v_mul_f32_e32 v0, v0, v28 ; 10003900 v_mul_f32_e32 v13, v13, v29 ; 101A3B0D v_mul_f32_e32 v14, v5, v27 ; 101C3705 v_mac_f32_e32 v14, v6, v28 ; 3E1C3906 v_mac_f32_e32 v14, v7, v29 ; 3E1C3B07 v_mul_f32_e32 v2, s84, v2 ; 10040454 v_sub_f32_e32 v15, 1.0, v24 ; 081E30F2 v_mad_f32 v15, -v24, v15, v15 ; D282000F 243E1F18 v_mul_f32_e32 v15, v15, v15 ; 101E1F0F v_madmk_f32_e32 v2, v15, v2, 0x3e800000 ; 4004050F 3E800000 v_add_f32_e32 v15, -1.0, v20 ; 061E28F3 v_add_f32_e32 v16, -1.0, v21 ; 06202AF3 v_add_f32_e32 v18, -1.0, v22 ; 06242CF3 v_mad_f32 v19, v20, v2, -v2 ; D2820013 840A0514 v_mad_f32 v20, v21, v2, -v2 ; D2820014 840A0515 v_mad_f32 v21, v22, v2, -v2 ; D2820015 840A0516 v_mad_f32 v22, v23, v2, -v2 ; D2820016 840A0517 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, v12, v25 ; 1018330C v_mul_f32_e32 v23, v24, v24 ; 102E3118 v_mul_f32_e32 v12, v23, v12 ; 10181917 v_mac_f32_e32 v3, s84, v12 ; 3E061854 v_mul_f32_e32 v3, 0x41200000, v3 ; 100606FF 41200000 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mad_f32 v12, -2.0, v3, v4 ; D282000C 041206F5 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mul_f32_e32 v3, v12, v3 ; 1006070C v_mad_f32 v12, s1, v14, -v1 ; D282000C 84061C01 v_mac_f32_e32 v1, v12, v3 ; 3E02070C v_mad_f32 v12, s2, v14, -v0 ; D282000C 84021C02 v_mac_f32_e32 v0, v12, v3 ; 3E00070C v_mad_f32 v12, v1, v19, v1 ; D282000C 04062701 v_mul_f32_e32 v5, v5, v12 ; 100A1905 v_mad_f32 v12, v0, v20, v0 ; D282000C 04022900 v_mac_f32_e32 v5, v6, v12 ; 3E0A1906 v_mad_f32 v6, s3, v14, -v13 ; D2820006 84361C03 v_mac_f32_e32 v13, v6, v3 ; 3E1A0706 v_mad_f32 v6, v2, v15, 1.0 ; D2820006 03CA1F02 v_mad_f32 v12, v2, v16, 1.0 ; D282000C 03CA2102 v_mad_f32 v2, v2, v18, 1.0 ; D2820002 03CA2502 v_mad_f32 v14, v13, v21, v13 ; D282000E 04362B0D v_mac_f32_e32 v5, v7, v14 ; 3E0A1D07 v_add_f32_e32 v5, v5, v8 ; 060A1105 v_mul_f32_e32 v5, 0x41649249, v5 ; 100A0AFF 41649249 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mac_f32_e32 v4, -2.0, v5 ; 3E080AF5 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mad_f32 v4, -v4, v9, v9 ; D2820004 24261304 v_mad_f32 v5, v6, v1, v4 ; D2820005 04120306 v_mad_f32 v7, v12, v0, v4 ; D2820007 0412010C v_mad_f32 v4, v2, v13, v4 ; D2820004 04121B02 v_mul_f32_e32 v8, v26, v5 ; 10100B1A v_mul_f32_e32 v9, v26, v7 ; 10120F1A v_mul_f32_e32 v14, v26, v4 ; 101C091A v_mac_f32_e32 v8, v26, v5 ; 3E100B1A v_mac_f32_e32 v8, v6, v1 ; 3E100306 v_mac_f32_e32 v9, v26, v7 ; 3E120F1A v_mac_f32_e32 v9, v12, v0 ; 3E12010C v_mac_f32_e32 v14, v26, v4 ; 3E1C091A v_mac_f32_e32 v14, v2, v13 ; 3E1C1B02 v_add_f32_e64 v0, 0, v8 clamp ; D2060800 00021080 v_add_f32_e64 v1, 0, v9 clamp ; D2060801 00021280 v_add_f32_e64 v2, 0, v14 clamp ; D2060802 00021C80 v_mul_f32_e32 v4, v0, v25 ; 10083300 v_mul_f32_e32 v5, v1, v25 ; 100A3301 v_mul_f32_e32 v6, v2, v25 ; 100C3302 image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[16:23], s[12:15] ; F0800F00 00640C0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, -v25, v0, v12 ; D2820000 24320119 v_mad_f32 v1, -v25, v1, v13 ; D2820001 24360319 v_mad_f32 v2, -v25, v2, v14 ; D2820002 243A0519 v_mul_f32_e32 v7, s83, v25 ; 100E3253 v_mac_f32_e32 v7, v7, v22 ; 3E0E2D07 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[24:31], s[8:11] ; F0800100 0046080A s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v8, 1.0, v8 ; 081010F2 v_mac_f32_e32 v4, v0, v8 ; 3E081100 v_mac_f32_e32 v5, v1, v8 ; 3E0A1101 v_mac_f32_e32 v6, v2, v8 ; 3E0C1102 v_mul_f32_e32 v0, s84, v3 ; 10000654 v_mad_f32 v1, -s84, v3, 1.0 ; D2820001 23CA0654 v_mad_f32 v0, -v0, v17, v17 ; D2820000 24462300 v_mad_f32 v1, -v1, v17, 1.0 ; D2820001 23CA2301 v_mac_f32_e32 v0, v1, v3 ; 3E000701 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_mul_f32_e32 v1, s0, v0 ; 10020000 v_mad_f32 v0, -s0, v0, v15 ; D2820000 243E0000 v_mac_f32_e32 v1, v0, v8 ; 3E021100 v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 v_cvt_pkrtz_f16_f32_e32 v1, v6, v1 ; 5E020306 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 88 VGPRS: 32 Code Size: 1020 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 64) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 68) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 72) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 76) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 92) %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %22 %47 = fmul float %43, %23 %48 = fadd float %46, %47 %49 = fmul float %44, %24 %50 = fadd float %48, %49 %51 = fmul float %45, %25 %52 = fadd float %50, %51 %53 = fmul float %42, %26 %54 = fmul float %43, %27 %55 = fadd float %53, %54 %56 = fmul float %44, %28 %57 = fadd float %55, %56 %58 = fmul float %45, %29 %59 = fadd float %57, %58 %60 = fmul float %42, %30 %61 = fmul float %43, %31 %62 = fadd float %60, %61 %63 = fmul float %44, %32 %64 = fadd float %62, %63 %65 = fmul float %45, %33 %66 = fadd float %64, %65 %67 = fmul float %42, %34 %68 = fmul float %43, %35 %69 = fadd float %67, %68 %70 = fmul float %44, %36 %71 = fadd float %69, %70 %72 = fmul float %45, %37 %73 = fadd float %71, %72 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %66, float %73, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %52, float %59, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mov_b32_e32 v6, s6 ; 7E0C0206 v_mov_b32_e32 v7, s7 ; 7E0E0207 exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, s8 ; 7E080208 v_mov_b32_e32 v5, s9 ; 7E0A0209 v_mov_b32_e32 v6, s10 ; 7E0C020A v_mov_b32_e32 v7, s11 ; 7E0E020B exp 15, 33, 0, 0, 0, v4, v5, v6, v7 ; F800021F 07060504 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mul_f32_e32 v5, s17, v1 ; 100A0211 v_mul_f32_e32 v6, s21, v1 ; 100C0215 v_mul_f32_e32 v1, s23, v1 ; 10020217 v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mac_f32_e32 v6, s20, v0 ; 3E0C0014 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mac_f32_e32 v4, s14, v2 ; 3E08040E v_mac_f32_e32 v5, s18, v2 ; 3E0A0412 v_mac_f32_e32 v6, s24, v2 ; 3E0C0418 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s25, v3 ; 3E0C0619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 34, 0, 0, 0, v6, v1, v0, v0 ; F800022F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0] DCL CONST[2] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1], IMM[0].xxxx 2: MOV TEMP[2], IMM[0].xxxx 3: MOV TEMP[3].x, -CONST[0].xxxx 4: BGNLOOP :0 5: FSLT TEMP[4].x, CONST[0].xxxx, TEMP[3].xxxx 6: UIF TEMP[4].xxxx :0 7: BRK 8: ENDIF 9: MAD TEMP[5].xy, TEMP[3].xxxx, CONST[2].xyyy, TEMP[0].xyyy 10: MOV TEMP[6].xy, TEMP[5].xyyy 11: MOV TEMP[6].w, IMM[0].xxxx 12: TXB TEMP[7], TEMP[6], SAMP[0], 2D 13: ADD TEMP[2], TEMP[2], TEMP[7] 14: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 15: ENDLOOP :0 16: MUL TEMP[1], TEMP[2], CONST[0].wwww 17: MOV TEMP[0].w, IMM[0].yyyy 18: MOV TEMP[0].xyz, IN[1].xyzx 19: MUL TEMP[0], TEMP[1], TEMP[0] 20: MUL TEMP[1], TEMP[0], IN[1].wwww 21: MAD TEMP[1], IN[0], TEMP[1].wwww, TEMP[1] 22: MOV OUT[0], TEMP[1] 23: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) %29 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 %31 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %36 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %37 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %38 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %39 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %40 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %41 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %42 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %43 = fsub float -0.000000e+00, %25 br label %LOOP LOOP: ; preds = %ENDIF, %main_body %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %81, %ENDIF ] %temp9.0 = phi float [ 0.000000e+00, %main_body ], [ %82, %ENDIF ] %temp10.0 = phi float [ 0.000000e+00, %main_body ], [ %83, %ENDIF ] %temp11.0 = phi float [ 0.000000e+00, %main_body ], [ %84, %ENDIF ] %temp12.0 = phi float [ %43, %main_body ], [ %85, %ENDIF ] %44 = fcmp olt float %25, %temp12.0 br i1 %44, label %IF, label %ENDIF IF: ; preds = %LOOP %45 = fmul float %temp8.0, %26 %46 = fmul float %temp9.0, %26 %47 = fmul float %temp10.0, %26 %48 = fmul float %temp11.0, %26 %49 = fmul float %45, %37 %50 = fmul float %46, %38 %51 = fmul float %47, %39 %52 = fmul float %49, %40 %53 = fmul float %50, %40 %54 = fmul float %51, %40 %55 = fmul float %48, %40 %56 = fmul float %33, %55 %57 = fadd float %56, %52 %58 = fmul float %34, %55 %59 = fadd float %58, %53 %60 = fmul float %35, %55 %61 = fadd float %60, %54 %62 = fmul float %36, %55 %63 = fadd float %62, %55 %64 = call i32 @llvm.SI.packf16(float %57, float %59) %65 = bitcast i32 %64 to float %66 = call i32 @llvm.SI.packf16(float %61, float %63) %67 = bitcast i32 %66 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %65, float %67, float %65, float %67) ret void ENDIF: ; preds = %LOOP %68 = fmul float %temp12.0, %27 %69 = fadd float %68, %41 %70 = fmul float %temp12.0, %28 %71 = fadd float %70, %42 %72 = bitcast float %69 to i32 %73 = bitcast float %71 to i32 %74 = insertelement <4 x i32> , i32 %72, i32 1 %75 = insertelement <4 x i32> %74, i32 %73, i32 2 %76 = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> %75, <8 x i32> %30, <4 x i32> %32, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %77 = extractelement <4 x float> %76, i32 0 %78 = extractelement <4 x float> %76, i32 1 %79 = extractelement <4 x float> %76, i32 2 %80 = extractelement <4 x float> %76, i32 3 %81 = fadd float %temp8.0, %77 %82 = fadd float %temp9.0, %78 %83 = fadd float %temp10.0, %79 %84 = fadd float %temp11.0, %80 %85 = fadd float %temp12.0, 1.000000e+00 br label %LOOP } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[16:19], s[2:3], 0x0 ; C0880300 s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[16:19], 0x0 ; C2009100 s_buffer_load_dword s0, s[16:19], 0x3 ; C2001103 s_buffer_load_dword s2, s[16:19], 0x8 ; C2011108 s_buffer_load_dword s3, s[16:19], 0x9 ; C2019109 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900 v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901 v_mov_b32_e32 v1, 0x80000000 ; 7E0202FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v11, s1, v1 ; 3A160201 v_mov_b32_e32 v15, 0 ; 7E1E0280 s_mov_b64 s[16:17], 0 ; BE900480 v_mov_b32_e32 v16, 0 ; 7E200280 v_mov_b32_e32 v17, 0 ; 7E220280 v_mov_b32_e32 v18, 0 ; 7E240280 v_mov_b32_e32 v1, v18 ; 7E020312 v_mov_b32_e32 v12, v17 ; 7E180311 v_mov_b32_e32 v13, v16 ; 7E1A0310 v_mov_b32_e32 v14, v15 ; 7E1C030F v_cmp_nlt_f32_e32 vcc, s1, v11 ; 7C1C1601 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mad_f32 v16, s2, v11, v10 ; D2820010 042A1602 v_mad_f32 v17, s3, v11, v0 ; D2820011 04021603 v_mov_b32_e32 v15, 0 ; 7E1E0280 image_sample_b v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[8:15], s[4:7] ; F0940F00 0022120F s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v15, v18, v14 ; 061E1D12 v_add_f32_e32 v16, v19, v13 ; 06201B13 v_add_f32_e32 v17, v20, v12 ; 06221914 v_add_f32_e32 v18, v21, v1 ; 06240315 v_add_f32_e32 v11, 1.0, v11 ; 061616F2 s_or_b64 exec, exec, s[18:19] ; 88FE127E s_or_b64 s[16:17], s[18:19], s[16:17] ; 88901012 s_andn2_b64 exec, exec, s[16:17] ; 8AFE107E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[16:17] ; 88FE107E v_mul_f32_e32 v0, s0, v14 ; 10001C00 v_mul_f32_e32 v10, s0, v13 ; 10141A00 v_mul_f32_e32 v11, s0, v12 ; 10161800 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mul_f32_e32 v6, v7, v10 ; 100C1507 v_mul_f32_e32 v7, v8, v11 ; 100E1708 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mac_f32_e32 v0, v1, v2 ; 3E000501 v_mac_f32_e32 v6, v1, v3 ; 3E0C0701 v_mac_f32_e32 v7, v1, v4 ; 3E0E0901 v_mac_f32_e32 v1, v1, v5 ; 3E020B01 v_cvt_pkrtz_f16_f32_e32 v0, v0, v6 ; 5E000D00 v_cvt_pkrtz_f16_f32_e32 v1, v7, v1 ; 5E020307 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 24 Code Size: 336 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 64) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 68) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 72) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 76) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 92) %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %22 %47 = fmul float %43, %23 %48 = fadd float %46, %47 %49 = fmul float %44, %24 %50 = fadd float %48, %49 %51 = fmul float %45, %25 %52 = fadd float %50, %51 %53 = fmul float %42, %26 %54 = fmul float %43, %27 %55 = fadd float %53, %54 %56 = fmul float %44, %28 %57 = fadd float %55, %56 %58 = fmul float %45, %29 %59 = fadd float %57, %58 %60 = fmul float %42, %30 %61 = fmul float %43, %31 %62 = fadd float %60, %61 %63 = fmul float %44, %32 %64 = fadd float %62, %63 %65 = fmul float %45, %33 %66 = fadd float %64, %65 %67 = fmul float %42, %34 %68 = fmul float %43, %35 %69 = fadd float %67, %68 %70 = fmul float %44, %36 %71 = fadd float %69, %70 %72 = fmul float %45, %37 %73 = fadd float %71, %72 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %66, float %73, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %52, float %59, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mov_b32_e32 v6, s6 ; 7E0C0206 v_mov_b32_e32 v7, s7 ; 7E0E0207 exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, s8 ; 7E080208 v_mov_b32_e32 v5, s9 ; 7E0A0209 v_mov_b32_e32 v6, s10 ; 7E0C020A v_mov_b32_e32 v7, s11 ; 7E0E020B exp 15, 33, 0, 0, 0, v4, v5, v6, v7 ; F800021F 07060504 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mul_f32_e32 v5, s17, v1 ; 100A0211 v_mul_f32_e32 v6, s21, v1 ; 100C0215 v_mul_f32_e32 v1, s23, v1 ; 10020217 v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mac_f32_e32 v6, s20, v0 ; 3E0C0014 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mac_f32_e32 v4, s14, v2 ; 3E08040E v_mac_f32_e32 v5, s18, v2 ; 3E0A0412 v_mac_f32_e32 v6, s24, v2 ; 3E0C0418 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s25, v3 ; 3E0C0619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 34, 0, 0, 0, v6, v1, v0, v0 ; F800022F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0..2] DCL CONST[4] DCL CONST[6] DCL TEMP[0..10], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1], IMM[0].xxxx 2: MOV TEMP[2], IMM[0].xxxx 3: MOV TEMP[3].y, IMM[0].xxxx 4: MOV TEMP[3].x, -CONST[0].xxxx 5: BGNLOOP :0 6: FSLT TEMP[4].x, CONST[0].xxxx, TEMP[3].xxxx 7: UIF TEMP[4].xxxx :0 8: BRK 9: ENDIF 10: MOV TEMP[3].y, -CONST[0].yyyy 11: BGNLOOP :0 12: FSLT TEMP[5].x, CONST[0].yyyy, TEMP[3].yyyy 13: UIF TEMP[5].xxxx :0 14: BRK 15: ENDIF 16: ADD TEMP[6].xy, CONST[1].xyyy, TEMP[3].xyyy 17: MAD TEMP[7].xy, TEMP[6].xyyy, CONST[6].xyyy, TEMP[0].xyyy 18: MOV TEMP[8].xy, TEMP[7].xyyy 19: MOV TEMP[8].w, IMM[0].xxxx 20: TXB TEMP[9], TEMP[8], SAMP[1], 2D 21: ADD TEMP[2], TEMP[2], TEMP[9] 22: ADD TEMP[10].x, TEMP[3].yyyy, IMM[0].yyyy 23: MOV TEMP[3].y, TEMP[10].xxxx 24: ENDLOOP :0 25: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 26: ENDLOOP :0 27: MUL TEMP[1].w, TEMP[2], CONST[0].wwww 28: MUL TEMP[0].xy, IN[2].xyyy, CONST[4].xyyy 29: MOV TEMP[0].xy, TEMP[0].xyyy 30: MOV TEMP[0].w, IMM[0].xxxx 31: TXB TEMP[0], TEMP[0], SAMP[0], 2D 32: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[0].wwww 33: MUL TEMP[2].x, TEMP[1].wwww, TEMP[2].xxxx 34: MUL TEMP[2].x, TEMP[2].xxxx, CONST[0].zzzz 35: MOV_SAT TEMP[2].x, TEMP[2].xxxx 36: MAD TEMP[1], CONST[2], TEMP[2].xxxx, TEMP[0] 37: MOV TEMP[0].w, IMM[0].yyyy 38: MOV TEMP[0].xyz, IN[1].xyzx 39: MUL TEMP[0], TEMP[1], TEMP[0] 40: MUL TEMP[1], TEMP[0], IN[1].wwww 41: MAD TEMP[1], IN[0], TEMP[1].wwww, TEMP[1] 42: MOV OUT[0], TEMP[1] 43: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) %39 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 %41 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 %43 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !tbaa !0 %47 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %48 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %49 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %50 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %51 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %52 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %53 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %54 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %55 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %56 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %57 = fsub float -0.000000e+00, %25 %58 = fsub float -0.000000e+00, %26 br label %LOOP LOOP: ; preds = %IF47, %main_body %temp11.0 = phi float [ 0.000000e+00, %main_body ], [ %temp11.1, %IF47 ] %temp12.0 = phi float [ %57, %main_body ], [ %109, %IF47 ] %59 = fcmp olt float %25, %temp12.0 br i1 %59, label %IF, label %ENDIF IF: ; preds = %LOOP %60 = fmul float %temp11.0, %28 %61 = fmul float %55, %35 %62 = fmul float %56, %36 %63 = bitcast float %61 to i32 %64 = bitcast float %62 to i32 %65 = insertelement <4 x i32> , i32 %63, i32 1 %66 = insertelement <4 x i32> %65, i32 %64, i32 2 %67 = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> %66, <8 x i32> %40, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = extractelement <4 x float> %67, i32 2 %71 = extractelement <4 x float> %67, i32 3 %72 = fsub float 1.000000e+00, %71 %73 = fmul float %60, %72 %74 = fmul float %73, %27 %75 = call float @llvm.AMDIL.clamp.(float %74, float 0.000000e+00, float 1.000000e+00) %76 = fmul float %31, %75 %77 = fadd float %76, %68 %78 = fmul float %32, %75 %79 = fadd float %78, %69 %80 = fmul float %33, %75 %81 = fadd float %80, %70 %82 = fmul float %34, %75 %83 = fadd float %82, %71 %84 = fmul float %77, %51 %85 = fmul float %79, %52 %86 = fmul float %81, %53 %87 = fmul float %84, %54 %88 = fmul float %85, %54 %89 = fmul float %86, %54 %90 = fmul float %83, %54 %91 = fmul float %47, %90 %92 = fadd float %91, %87 %93 = fmul float %48, %90 %94 = fadd float %93, %88 %95 = fmul float %49, %90 %96 = fadd float %95, %89 %97 = fmul float %50, %90 %98 = fadd float %97, %90 %99 = call i32 @llvm.SI.packf16(float %92, float %94) %100 = bitcast i32 %99 to float %101 = call i32 @llvm.SI.packf16(float %96, float %98) %102 = bitcast i32 %101 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %100, float %102, float %100, float %102) ret void ENDIF: ; preds = %LOOP %103 = fadd float %29, %temp12.0 %104 = fmul float %103, %37 %105 = fadd float %104, %55 %106 = bitcast float %105 to i32 %107 = insertelement <4 x i32> , i32 %106, i32 1 br label %LOOP45 LOOP45: ; preds = %ENDIF46, %ENDIF %temp11.1 = phi float [ %temp11.0, %ENDIF ], [ %117, %ENDIF46 ] %temp13.0 = phi float [ %58, %ENDIF ], [ %118, %ENDIF46 ] %108 = fcmp olt float %26, %temp13.0 br i1 %108, label %IF47, label %ENDIF46 IF47: ; preds = %LOOP45 %109 = fadd float %temp12.0, 1.000000e+00 br label %LOOP ENDIF46: ; preds = %LOOP45 %110 = fadd float %30, %temp13.0 %111 = fmul float %110, %38 %112 = fadd float %111, %56 %113 = bitcast float %112 to i32 %114 = insertelement <4 x i32> %107, i32 %113, i32 2 %115 = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> %114, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %116 = extractelement <4 x float> %115, i32 3 %117 = fadd float %temp11.1, %116 %118 = fadd float %temp13.0, 1.000000e+00 br label %LOOP45 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[32:35], s[2:3], 0x0 ; C0900300 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 2, 0, [m0] ; C8080200 v_interp_p2_f32 v2, [v2], v1, 2, 0, [m0] ; C8090201 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[32:35], 0x0 ; C2042100 s_buffer_load_dword s9, s[32:35], 0x1 ; C204A101 s_buffer_load_dword s3, s[32:35], 0x2 ; C201A102 s_buffer_load_dword s7, s[32:35], 0x3 ; C203A103 s_buffer_load_dword s28, s[32:35], 0x4 ; C20E2104 s_buffer_load_dword s29, s[32:35], 0x5 ; C20EA105 s_buffer_load_dword s6, s[32:35], 0x8 ; C2032108 s_buffer_load_dword s2, s[32:35], 0x9 ; C2012109 s_buffer_load_dword s1, s[32:35], 0xa ; C200A10A s_buffer_load_dword s0, s[32:35], 0xb ; C200210B s_buffer_load_dword s10, s[32:35], 0x10 ; C2052110 s_buffer_load_dword s11, s[32:35], 0x11 ; C205A111 s_buffer_load_dword s30, s[32:35], 0x18 ; C20F2118 s_buffer_load_dword s31, s[32:35], 0x19 ; C20FA119 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900 v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901 v_mov_b32_e32 v1, 0x80000000 ; 7E0202FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v11, s8, v1 ; 3A160208 v_xor_b32_e32 v12, s9, v1 ; 3A180209 v_mov_b32_e32 v13, 0 ; 7E1A0280 s_mov_b64 s[4:5], 0 ; BE840480 v_mov_b32_e32 v1, v13 ; 7E02030D v_cmp_nlt_f32_e32 vcc, s8, v11 ; 7C1C1608 s_and_saveexec_b64 s[40:41], vcc ; BEA8246A s_xor_b64 s[40:41], exec, s[40:41] ; 89A8287E s_cbranch_execz BB0_4 ; BF880000 v_add_f32_e32 v13, s28, v11 ; 061A161C v_mad_f32 v15, s30, v13, v10 ; D282000F 042A1A1E v_mov_b32_e32 v14, 0 ; 7E1C0280 s_mov_b64 s[42:43], 0 ; BEAA0480 v_mov_b32_e32 v16, v1 ; 7E200301 v_mov_b32_e32 v17, v12 ; 7E22030C v_mov_b32_e32 v13, v16 ; 7E1A0310 v_cmp_nlt_f32_e32 vcc, s9, v17 ; 7C1C2209 s_and_saveexec_b64 s[44:45], vcc ; BEAC246A s_xor_b64 s[44:45], exec, s[44:45] ; 89AC2C7E v_add_f32_e32 v16, s29, v17 ; 0620221D v_mad_f32 v16, s31, v16, v0 ; D2820010 0402201F image_sample_b v16, 8, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[20:27], s[36:39] ; F0940800 0125100E s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v16, v16, v13 ; 06201B10 v_add_f32_e32 v17, 1.0, v17 ; 062222F2 s_or_b64 exec, exec, s[44:45] ; 88FE2C7E s_or_b64 s[42:43], s[44:45], s[42:43] ; 88AA2A2C s_andn2_b64 exec, exec, s[42:43] ; 8AFE2A7E s_cbranch_execnz BB0_5 ; BF890000 s_or_b64 exec, exec, s[42:43] ; 88FE2A7E v_add_f32_e32 v11, 1.0, v11 ; 061616F2 s_or_b64 exec, exec, s[40:41] ; 88FE287E s_or_b64 s[4:5], s[40:41], s[4:5] ; 88840428 s_andn2_b64 exec, exec, s[4:5] ; 8AFE047E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_mul_f32_e32 v1, s7, v1 ; 10020207 v_mul_f32_e32 v11, s10, v10 ; 1016140A v_mul_f32_e32 v12, s11, v0 ; 1018000B v_mov_b32_e32 v10, 0 ; 7E140280 image_sample_b v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[12:19], s[32:35] ; F0940F00 01030A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, -v13, v1, v1 ; D2820000 2406030D v_mul_f32_e32 v0, s3, v0 ; 10000003 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mad_f32 v1, s6, v0, v10 ; D2820001 042A0006 v_mad_f32 v10, s2, v0, v11 ; D282000A 042E0002 v_mad_f32 v11, s1, v0, v12 ; D282000B 04320001 v_mac_f32_e32 v13, s0, v0 ; 3E1A0000 v_mul_f32_e32 v0, v6, v1 ; 10000306 v_mul_f32_e32 v1, v7, v10 ; 10021507 v_mul_f32_e32 v6, v8, v11 ; 100C1708 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v9, v13 ; 100E1B09 v_mac_f32_e32 v0, v7, v4 ; 3E000907 v_mac_f32_e32 v1, v7, v3 ; 3E020707 v_mac_f32_e32 v6, v7, v2 ; 3E0C0507 v_mac_f32_e32 v7, v7, v5 ; 3E0E0B07 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 20 Code Size: 476 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL CONST[0..47] DCL TEMP[0..1], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 2.0000, 0.1000} IMM[1] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MAD TEMP[1].x, IN[1].xxxx, IMM[0].zzzz, IMM[0].wwww 2: F2I TEMP[1].x, TEMP[1].xxxx 3: UARL ADDR[0].x, TEMP[1].xxxx 4: UARL ADDR[0].x, TEMP[1].xxxx 5: DP4 TEMP[0].x, IN[0], CONST[ADDR[0].x] 6: MAD TEMP[1].x, IN[1].xxxx, IMM[0].zzzz, IMM[1].xxxx 7: F2I TEMP[1].x, TEMP[1].xxxx 8: UARL ADDR[0].x, TEMP[1].xxxx 9: DP4 TEMP[1].x, IN[0], CONST[ADDR[0].x] 10: MOV TEMP[0].y, TEMP[1].xxxx 11: MOV OUT[0], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 %16 = add i32 %5, %8 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %16) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = add i32 %5, %8 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = fmul float %26, 2.000000e+00 %28 = fadd float %27, 0x3FB99999A0000000 %29 = fptosi float %28 to i32 %30 = shl i32 %29, 4 %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %30) %32 = shl i32 %29, 4 %33 = or i32 %32, 4 %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %33) %35 = shl i32 %29, 4 %36 = or i32 %35, 8 %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %36) %38 = shl i32 %29, 4 %39 = or i32 %38, 12 %40 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %39) %41 = fmul float %18, %31 %42 = fmul float %19, %34 %43 = fadd float %41, %42 %44 = fmul float %20, %37 %45 = fadd float %43, %44 %46 = fmul float %21, %40 %47 = fadd float %45, %46 %48 = fmul float %26, 2.000000e+00 %49 = fadd float %48, 0x3FF19999A0000000 %50 = fptosi float %49 to i32 %51 = shl i32 %50, 4 %52 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %51) %53 = shl i32 %50, 4 %54 = or i32 %53, 4 %55 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %54) %56 = shl i32 %50, 4 %57 = or i32 %56, 8 %58 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %57) %59 = shl i32 %50, 4 %60 = or i32 %59, 12 %61 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %60) %62 = fmul float %18, %52 %63 = fmul float %19, %55 %64 = fadd float %62, %63 %65 = fmul float %20, %58 %66 = fadd float %64, %65 %67 = fmul float %21, %61 %68 = fadd float %66, %67 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %47, float %68, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_waitcnt vmcnt(0) ; BF8C0770 v_madak_f32_e32 v0, 2.0, v6, 0x3dcccccd ; 42000CF4 3DCCCCCD v_madak_f32_e32 v6, 2.0, v6, 0x3f8ccccd ; 420C0CF4 3F8CCCCD v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v6, v6 ; 7E0C1106 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v6, 4, v6 ; 340C0C84 buffer_load_dword v7, v0, s[0:3], 0 offen ; E0301000 80000700 v_or_b32_e32 v8, 4, v0 ; 38100084 v_or_b32_e32 v9, 8, v0 ; 38120088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v10, v6, s[0:3], 0 offen ; E0301000 80000A06 v_or_b32_e32 v11, 4, v6 ; 38160C84 buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 v_or_b32_e32 v12, 8, v6 ; 38180C88 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B v_or_b32_e32 v6, 12, v6 ; 380C0C8C buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v8, v8, v3 ; 10100708 v_mac_f32_e32 v8, v7, v2 ; 3E100507 s_waitcnt vmcnt(4) ; BF8C0774 v_mul_f32_e32 v3, v11, v3 ; 1006070B v_mac_f32_e32 v3, v10, v2 ; 3E06050A s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v8, v9, v4 ; 3E100909 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v3, v12, v4 ; 3E06090C s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v8, v0, v5 ; 3E100B00 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v3, v6, v5 ; 3E060B06 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v8, v3, v0, v1 ; F80008CF 01000308 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 236 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL OUT[0], COLOR DCL CONST[0] 0: MOV OUT[0], CONST[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %29 = call i32 @llvm.SI.packf16(float %25, float %26) %30 = bitcast i32 %29 to float %31 = call i32 @llvm.SI.packf16(float %27, float %28) %32 = bitcast i32 %31 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %30, float %32, float %30, float %32) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_cvt_pkrtz_f16_f32_e32 v0, s4, v0 ; 5E000004 v_mov_b32_e32 v1, s0 ; 7E020200 v_cvt_pkrtz_f16_f32_e32 v1, s6, v1 ; 5E020206 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[0] 2: DP4 TEMP[1].x, IN[0], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = add i32 %5, %8 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = extractelement <4 x float> %25, i32 3 %30 = fmul float %26, %14 %31 = fmul float %27, %15 %32 = fadd float %30, %31 %33 = fmul float %28, %16 %34 = fadd float %32, %33 %35 = fmul float %29, %17 %36 = fadd float %34, %35 %37 = fmul float %26, %18 %38 = fmul float %27, %19 %39 = fadd float %37, %38 %40 = fmul float %28, %20 %41 = fadd float %39, %40 %42 = fmul float %29, %21 %43 = fadd float %41, %42 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %36, float %43, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s8, s[0:3], 0x2 ; C2040102 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x3 ; C2050103 s_buffer_load_dword s0, s[0:3], 0x7 ; C2000107 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 v_mul_f32_e32 v1, s5, v1 ; 10020205 v_mac_f32_e32 v4, s6, v0 ; 3E080006 v_mac_f32_e32 v1, s7, v0 ; 3E020007 v_mac_f32_e32 v4, s8, v2 ; 3E080408 v_mac_f32_e32 v1, s9, v2 ; 3E020409 v_mac_f32_e32 v4, s10, v3 ; 3E08060A v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 12, 0, 1, 0, v4, v1, v2, v0 ; F80008CF 00020104 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 116 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL OUT[0], COLOR DCL CONST[0] 0: MOV OUT[0], CONST[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %29 = call i32 @llvm.SI.packf16(float %25, float %26) %30 = bitcast i32 %29 to float %31 = call i32 @llvm.SI.packf16(float %27, float %28) %32 = bitcast i32 %31 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %30, float %32, float %30, float %32) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_cvt_pkrtz_f16_f32_e32 v0, s4, v0 ; 5E000004 v_mov_b32_e32 v1, s0 ; 7E020200 v_cvt_pkrtz_f16_f32_e32 v1, s6, v1 ; 5E020206 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[0] 2: DP4 TEMP[1].x, IN[2], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV OUT[2], IN[1] 5: MOV OUT[1], IN[0] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = add i32 %5, %8 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = extractelement <4 x float> %25, i32 3 %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %14 %47 = fmul float %43, %15 %48 = fadd float %46, %47 %49 = fmul float %44, %16 %50 = fadd float %48, %49 %51 = fmul float %45, %17 %52 = fadd float %50, %51 %53 = fmul float %42, %18 %54 = fmul float %43, %19 %55 = fadd float %53, %54 %56 = fmul float %44, %20 %57 = fadd float %55, %56 %58 = fmul float %45, %21 %59 = fadd float %57, %58 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %26, float %27, float %28, float %29) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %34, float %35, float %36, float %37) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %52, float %59, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103 s_buffer_load_dword s20, s[0:3], 0x4 ; C20A0104 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s0, s[0:3], 0x7 ; C2000107 s_waitcnt vmcnt(2) ; BF8C0772 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 33, 0, 0, 0, v6, v7, v8, v9 ; F800021F 09080706 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s9, v11 ; 10001609 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s4, v11 ; 10041604 v_mac_f32_e32 v0, s8, v10 ; 3E001408 v_mac_f32_e32 v2, s20, v10 ; 3E041414 v_mac_f32_e32 v0, s10, v12 ; 3E00180A v_mac_f32_e32 v2, s5, v12 ; 3E041805 v_mac_f32_e32 v0, s11, v13 ; 3E001A0B v_mac_f32_e32 v2, s0, v13 ; 3E041A00 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 12, 0, 1, 0, v0, v2, v3, v1 ; F80008CF 01030200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 164 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MUL TEMP[1].x, IN[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %24 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %25 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %27 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %28 = fmul float %26, %27 %29 = call i32 @llvm.SI.packf16(float %23, float %24) %30 = bitcast i32 %29 to float %31 = call i32 @llvm.SI.packf16(float %25, float %28) %32 = bitcast i32 %31 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %30, float %32, float %30, float %32) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 4.0000, 2.1000} IMM[1] FLT32 { 3.1000, 1.1000, 0.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MAD TEMP[1].x, IN[3].xxxx, IMM[0].zzzz, IMM[0].wwww 2: F2I TEMP[1].x, TEMP[1].xxxx 3: UARL ADDR[0].x, TEMP[1].xxxx 4: UARL ADDR[0].x, TEMP[1].xxxx 5: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 6: MAD TEMP[1].x, IN[3].xxxx, IMM[0].zzzz, IMM[1].xxxx 7: F2I TEMP[1].x, TEMP[1].xxxx 8: UARL ADDR[0].x, TEMP[1].xxxx 9: DP4 TEMP[1].x, IN[2], CONST[ADDR[0].x] 10: MOV TEMP[0].y, TEMP[1].xxxx 11: MAD TEMP[1].x, IN[3].xxxx, IMM[0].zzzz, IMM[1].yyyy 12: F2I TEMP[1].x, TEMP[1].xxxx 13: MAD TEMP[2].x, IN[3].xxxx, IMM[0].zzzz, IMM[1].zzzz 14: F2I TEMP[2].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: UARL ADDR[0].x, TEMP[2].xxxx 17: MOV TEMP[2], CONST[ADDR[0].x] 18: UARL ADDR[0].x, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: MAD TEMP[1], IN[0], CONST[ADDR[0].x], TEMP[2] 21: MOV TEMP[2].xy, IN[1].xyxx 22: MOV OUT[1], TEMP[1] 23: MOV OUT[0], TEMP[0] 24: MOV OUT[2], TEMP[2] 25: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 %16 = add i32 %5, %8 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %16) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = add i32 %5, %8 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 %30 = add i32 %5, %8 %31 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %29, i32 0, i32 %30) %32 = extractelement <4 x float> %31, i32 0 %33 = extractelement <4 x float> %31, i32 1 %34 = extractelement <4 x float> %31, i32 2 %35 = extractelement <4 x float> %31, i32 3 %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 %38 = add i32 %5, %8 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = fmul float %40, 4.000000e+00 %42 = fadd float %41, 0x4000CCCCC0000000 %43 = fptosi float %42 to i32 %44 = shl i32 %43, 4 %45 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %44) %46 = shl i32 %43, 4 %47 = or i32 %46, 4 %48 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %47) %49 = shl i32 %43, 4 %50 = or i32 %49, 8 %51 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %50) %52 = shl i32 %43, 4 %53 = or i32 %52, 12 %54 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %53) %55 = fmul float %32, %45 %56 = fmul float %33, %48 %57 = fadd float %55, %56 %58 = fmul float %34, %51 %59 = fadd float %57, %58 %60 = fmul float %35, %54 %61 = fadd float %59, %60 %62 = fmul float %40, 4.000000e+00 %63 = fadd float %62, 0x4008CCCCC0000000 %64 = fptosi float %63 to i32 %65 = shl i32 %64, 4 %66 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %65) %67 = shl i32 %64, 4 %68 = or i32 %67, 4 %69 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %68) %70 = shl i32 %64, 4 %71 = or i32 %70, 8 %72 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %71) %73 = shl i32 %64, 4 %74 = or i32 %73, 12 %75 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %74) %76 = fmul float %32, %66 %77 = fmul float %33, %69 %78 = fadd float %76, %77 %79 = fmul float %34, %72 %80 = fadd float %78, %79 %81 = fmul float %35, %75 %82 = fadd float %80, %81 %83 = fmul float %40, 4.000000e+00 %84 = fadd float %83, 0x3FF19999A0000000 %85 = fptosi float %84 to i32 %86 = fmul float %40, 4.000000e+00 %87 = fadd float %86, 0x3FB99999A0000000 %88 = fptosi float %87 to i32 %89 = shl i32 %88, 4 %90 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %89) %91 = shl i32 %88, 4 %92 = or i32 %91, 4 %93 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %92) %94 = shl i32 %88, 4 %95 = or i32 %94, 8 %96 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %95) %97 = shl i32 %88, 4 %98 = or i32 %97, 12 %99 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %98) %100 = shl i32 %85, 4 %101 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %100) %102 = fmul float %18, %101 %103 = fadd float %102, %90 %104 = shl i32 %85, 4 %105 = or i32 %104, 4 %106 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %105) %107 = fmul float %19, %106 %108 = fadd float %107, %93 %109 = shl i32 %85, 4 %110 = or i32 %109, 8 %111 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %110) %112 = fmul float %20, %111 %113 = fadd float %112, %96 %114 = shl i32 %85, 4 %115 = or i32 %114, 12 %116 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %115) %117 = fmul float %21, %116 %118 = fadd float %117, %99 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %103, float %108, float %113, float %118) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %26, float %27, float %96, float %99) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %61, float %82, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C v_mov_b32_e32 v1, 1.0 ; 7E0202F2 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800 buffer_load_format_xyzw v[12:15], v0, s[20:23], 0 idxen ; E00C2000 80050C00 s_waitcnt vmcnt(0) ; BF8C0770 v_madak_f32_e32 v0, 4.0, v12, 0x40066666 ; 420018F6 40066666 v_madak_f32_e32 v13, 4.0, v12, 0x40466666 ; 421A18F6 40466666 v_madak_f32_e32 v14, 4.0, v12, 0x3f8ccccd ; 421C18F6 3F8CCCCD v_madak_f32_e32 v12, 4.0, v12, 0x3dcccccd ; 421818F6 3DCCCCCD v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v13, v13 ; 7E1A110D v_cvt_i32_f32_e32 v12, v12 ; 7E18110C v_cvt_i32_f32_e32 v14, v14 ; 7E1C110E v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v13, 4, v13 ; 341A1A84 v_lshlrev_b32_e32 v12, 4, v12 ; 34181884 v_lshlrev_b32_e32 v14, 4, v14 ; 341C1C84 buffer_load_dword v15, v0, s[0:3], 0 offen ; E0301000 80000F00 v_or_b32_e32 v16, 4, v0 ; 38200084 v_or_b32_e32 v17, 8, v0 ; 38220088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v18, v12, s[0:3], 0 offen ; E0301000 8000120C v_or_b32_e32 v19, 4, v12 ; 38261884 v_or_b32_e32 v20, 8, v12 ; 38281888 v_or_b32_e32 v12, 12, v12 ; 3818188C buffer_load_dword v21, v14, s[0:3], 0 offen ; E0301000 8000150E v_or_b32_e32 v22, 4, v14 ; 382C1C84 v_or_b32_e32 v23, 8, v14 ; 382E1C88 v_or_b32_e32 v14, 12, v14 ; 381C1C8C buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 buffer_load_dword v14, v14, s[0:3], 0 offen ; E0301000 80000E0E buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_or_b32_e32 v24, 4, v13 ; 38301A84 buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 buffer_load_dword v25, v13, s[0:3], 0 offen ; E0301000 8000190D buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 v_or_b32_e32 v26, 8, v13 ; 38341A88 buffer_load_dword v26, v26, s[0:3], 0 offen ; E0301000 80001A1A v_or_b32_e32 v13, 12, v13 ; 381A1A8C buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D s_waitcnt vmcnt(13) ; BF8C077D v_mac_f32_e32 v18, v21, v2 ; 3E240515 s_waitcnt vmcnt(9) ; BF8C0779 v_mac_f32_e32 v19, v22, v3 ; 3E260716 s_waitcnt vmcnt(8) ; BF8C0778 v_mad_f32 v2, v23, v4, v20 ; D2820002 04520917 s_waitcnt vmcnt(7) ; BF8C0777 v_mad_f32 v3, v14, v5, v12 ; D2820003 04320B0E s_waitcnt vmcnt(6) ; BF8C0776 v_mul_f32_e32 v4, v16, v9 ; 10081310 v_mac_f32_e32 v4, v15, v8 ; 3E08110F s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v5, v24, v9 ; 100A1318 s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v5, v25, v8 ; 3E0A1119 exp 15, 32, 0, 0, 0, v18, v19, v2, v3 ; F800020F 03021312 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v4, v17, v10 ; 3E081511 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v5, v26, v10 ; 3E0A151A exp 15, 33, 0, 0, 0, v6, v7, v20, v12 ; F800021F 0C140706 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v4, v0, v11 ; 3E081700 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v5, v13, v11 ; 3E0A170D v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v4, v5, v0, v1 ; F80008CF 01000504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 28 Code Size: 444 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MOV TEMP[1].xy, IN[1].xyyy 2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 3: MUL TEMP[1].x, IN[0].wwww, TEMP[1].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %33 = bitcast float %31 to i32 %34 = bitcast float %32 to i32 %35 = insertelement <2 x i32> undef, i32 %33, i32 0 %36 = insertelement <2 x i32> %35, i32 %34, i32 1 %37 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %36, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %38 = extractelement <4 x float> %37, i32 3 %39 = fmul float %30, %38 %40 = call i32 @llvm.SI.packf16(float %27, float %28) %41 = bitcast i32 %40 to float %42 = call i32 @llvm.SI.packf16(float %29, float %39) %43 = bitcast i32 %42 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %41, float %43, float %41, float %43) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800800 00020006 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 104 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 64) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 68) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 72) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 76) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 92) %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %22 %47 = fmul float %43, %23 %48 = fadd float %46, %47 %49 = fmul float %44, %24 %50 = fadd float %48, %49 %51 = fmul float %45, %25 %52 = fadd float %50, %51 %53 = fmul float %42, %26 %54 = fmul float %43, %27 %55 = fadd float %53, %54 %56 = fmul float %44, %28 %57 = fadd float %55, %56 %58 = fmul float %45, %29 %59 = fadd float %57, %58 %60 = fmul float %42, %30 %61 = fmul float %43, %31 %62 = fadd float %60, %61 %63 = fmul float %44, %32 %64 = fadd float %62, %63 %65 = fmul float %45, %33 %66 = fadd float %64, %65 %67 = fmul float %42, %34 %68 = fmul float %43, %35 %69 = fadd float %67, %68 %70 = fmul float %44, %36 %71 = fadd float %69, %70 %72 = fmul float %45, %37 %73 = fadd float %71, %72 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %66, float %73, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %52, float %59, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mov_b32_e32 v6, s6 ; 7E0C0206 v_mov_b32_e32 v7, s7 ; 7E0E0207 exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, s8 ; 7E080208 v_mov_b32_e32 v5, s9 ; 7E0A0209 v_mov_b32_e32 v6, s10 ; 7E0C020A v_mov_b32_e32 v7, s11 ; 7E0E020B exp 15, 33, 0, 0, 0, v4, v5, v6, v7 ; F800021F 07060504 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mul_f32_e32 v5, s17, v1 ; 100A0211 v_mul_f32_e32 v6, s21, v1 ; 100C0215 v_mul_f32_e32 v1, s23, v1 ; 10020217 v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mac_f32_e32 v6, s20, v0 ; 3E0C0014 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mac_f32_e32 v4, s14, v2 ; 3E08040E v_mac_f32_e32 v5, s18, v2 ; 3E0A0412 v_mac_f32_e32 v6, s24, v2 ; 3E0C0418 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s25, v3 ; 3E0C0619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 34, 0, 0, 0, v6, v1, v0, v0 ; F800022F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].w, IMM[0].xxxx 1: MOV TEMP[0].xyz, IN[1].xyzx 2: MOV TEMP[1].xy, IN[2].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: MUL TEMP[0], TEMP[1], TEMP[0] 5: MUL TEMP[0], TEMP[0], IN[1].wwww 6: MAD TEMP[0], IN[0], TEMP[0].wwww, TEMP[0] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %36 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %40, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %31 %47 = fmul float %43, %32 %48 = fmul float %44, %33 %49 = fmul float %46, %34 %50 = fmul float %47, %34 %51 = fmul float %48, %34 %52 = fmul float %45, %34 %53 = fmul float %27, %52 %54 = fadd float %53, %49 %55 = fmul float %28, %52 %56 = fadd float %55, %50 %57 = fmul float %29, %52 %58 = fadd float %57, %51 %59 = fmul float %30, %52 %60 = fadd float %59, %52 %61 = call i32 @llvm.SI.packf16(float %54, float %56) %62 = bitcast i32 %61 to float %63 = call i32 @llvm.SI.packf16(float %58, float %60) %64 = bitcast i32 %63 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %62, float %64, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[8:15], s[0:3] ; F0800F00 00020A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v6, v10 ; 10001506 v_mul_f32_e32 v1, v7, v11 ; 10021707 v_mul_f32_e32 v6, v8, v12 ; 100C1908 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v9, v13 ; 100E1B09 v_mac_f32_e32 v0, v7, v2 ; 3E000507 v_mac_f32_e32 v1, v7, v3 ; 3E020707 v_mac_f32_e32 v6, v7, v4 ; 3E0C0907 v_mac_f32_e32 v7, v7, v5 ; 3E0E0B07 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 176 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..143] DCL TEMP[0..3], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 1530.0599, 2.1000} IMM[1] FLT32 { 3.1000, 4.1000, 5.1000, 0.1000} IMM[2] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MAD TEMP[1].x, IMM[0].zzzz, IN[0].zzzz, IMM[0].wwww 2: F2I TEMP[1].x, TEMP[1].xxxx 3: UARL ADDR[0].x, TEMP[1].xxxx 4: UARL ADDR[0].x, TEMP[1].xxxx 5: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 6: MAD TEMP[1].x, IMM[0].zzzz, IN[0].zzzz, IMM[1].xxxx 7: F2I TEMP[1].x, TEMP[1].xxxx 8: UARL ADDR[0].x, TEMP[1].xxxx 9: DP4 TEMP[1].x, IN[1], CONST[ADDR[0].x] 10: MOV TEMP[0].y, TEMP[1].xxxx 11: MAD TEMP[1].x, IMM[0].zzzz, IN[0].zzzz, IMM[1].yyyy 12: F2I TEMP[1].x, TEMP[1].xxxx 13: UARL ADDR[0].x, TEMP[1].xxxx 14: UARL ADDR[0].x, TEMP[1].xxxx 15: DP4 TEMP[1].x, IN[1], CONST[ADDR[0].x] 16: MAD TEMP[2].x, IMM[0].zzzz, IN[0].zzzz, IMM[1].zzzz 17: F2I TEMP[2].x, TEMP[2].xxxx 18: UARL ADDR[0].x, TEMP[2].xxxx 19: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 20: MOV TEMP[1].y, TEMP[2].xxxx 21: MAD TEMP[2].x, IMM[0].zzzz, IN[0].zzzz, IMM[1].wwww 22: F2I TEMP[2].x, TEMP[2].xxxx 23: UARL ADDR[0].x, TEMP[2].xxxx 24: MOV TEMP[2], CONST[ADDR[0].x] 25: MAD TEMP[3].x, IMM[0].zzzz, IN[0].zzzz, IMM[2].xxxx 26: F2I TEMP[3].x, TEMP[3].xxxx 27: UARL ADDR[0].x, TEMP[3].xxxx 28: MOV TEMP[3], CONST[ADDR[0].x] 29: MOV TEMP[1].xy, TEMP[1].xyxx 30: MOV OUT[1], IN[0] 31: MOV OUT[2], TEMP[2] 32: MOV OUT[3], TEMP[3] 33: MOV OUT[0], TEMP[0] 34: MOV OUT[4], TEMP[1] 35: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 %16 = add i32 %5, %8 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %16) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = add i32 %5, %8 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = extractelement <4 x float> %25, i32 3 %30 = fmul float %20, 0x4097E83D60000000 %31 = fadd float %30, 0x4000CCCCC0000000 %32 = fptosi float %31 to i32 %33 = shl i32 %32, 4 %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %33) %35 = shl i32 %32, 4 %36 = or i32 %35, 4 %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %36) %38 = shl i32 %32, 4 %39 = or i32 %38, 8 %40 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %39) %41 = shl i32 %32, 4 %42 = or i32 %41, 12 %43 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %42) %44 = fmul float %26, %34 %45 = fmul float %27, %37 %46 = fadd float %44, %45 %47 = fmul float %28, %40 %48 = fadd float %46, %47 %49 = fmul float %29, %43 %50 = fadd float %48, %49 %51 = fmul float %20, 0x4097E83D60000000 %52 = fadd float %51, 0x4008CCCCC0000000 %53 = fptosi float %52 to i32 %54 = shl i32 %53, 4 %55 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %54) %56 = shl i32 %53, 4 %57 = or i32 %56, 4 %58 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %57) %59 = shl i32 %53, 4 %60 = or i32 %59, 8 %61 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %60) %62 = shl i32 %53, 4 %63 = or i32 %62, 12 %64 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %63) %65 = fmul float %26, %55 %66 = fmul float %27, %58 %67 = fadd float %65, %66 %68 = fmul float %28, %61 %69 = fadd float %67, %68 %70 = fmul float %29, %64 %71 = fadd float %69, %70 %72 = fmul float %20, 0x4097E83D60000000 %73 = fadd float %72, 0x4010666660000000 %74 = fptosi float %73 to i32 %75 = shl i32 %74, 4 %76 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %75) %77 = shl i32 %74, 4 %78 = or i32 %77, 4 %79 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %78) %80 = shl i32 %74, 4 %81 = or i32 %80, 8 %82 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %81) %83 = shl i32 %74, 4 %84 = or i32 %83, 12 %85 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %84) %86 = fmul float %26, %76 %87 = fmul float %27, %79 %88 = fadd float %86, %87 %89 = fmul float %28, %82 %90 = fadd float %88, %89 %91 = fmul float %29, %85 %92 = fadd float %90, %91 %93 = fmul float %20, 0x4097E83D60000000 %94 = fadd float %93, 0x4014666660000000 %95 = fptosi float %94 to i32 %96 = shl i32 %95, 4 %97 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %96) %98 = shl i32 %95, 4 %99 = or i32 %98, 4 %100 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %99) %101 = shl i32 %95, 4 %102 = or i32 %101, 8 %103 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %102) %104 = shl i32 %95, 4 %105 = or i32 %104, 12 %106 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %105) %107 = fmul float %26, %97 %108 = fmul float %27, %100 %109 = fadd float %107, %108 %110 = fmul float %28, %103 %111 = fadd float %109, %110 %112 = fmul float %29, %106 %113 = fadd float %111, %112 %114 = fmul float %20, 0x4097E83D60000000 %115 = fadd float %114, 0x3FB99999A0000000 %116 = fptosi float %115 to i32 %117 = shl i32 %116, 4 %118 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %117) %119 = shl i32 %116, 4 %120 = or i32 %119, 4 %121 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %120) %122 = shl i32 %116, 4 %123 = or i32 %122, 8 %124 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %123) %125 = shl i32 %116, 4 %126 = or i32 %125, 12 %127 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %126) %128 = fmul float %20, 0x4097E83D60000000 %129 = fadd float %128, 0x3FF19999A0000000 %130 = fptosi float %129 to i32 %131 = shl i32 %130, 4 %132 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %131) %133 = shl i32 %130, 4 %134 = or i32 %133, 4 %135 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %134) %136 = shl i32 %130, 4 %137 = or i32 %136, 8 %138 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %137) %139 = shl i32 %130, 4 %140 = or i32 %139, 12 %141 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %140) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %118, float %121, float %124, float %127) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %132, float %135, float %138, float %141) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %92, float %113, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %50, float %71, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_mov_b32_e32 v1, 0x44bf41eb ; 7E0202FF 44BF41EB v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_waitcnt vmcnt(1) ; BF8C0771 v_madak_f32_e32 v0, v4, v1, 0x40833333 ; 42000304 40833333 v_madak_f32_e32 v10, v4, v1, 0x40a33333 ; 42140304 40A33333 v_madak_f32_e32 v11, v4, v1, 0x40066666 ; 42160304 40066666 v_madak_f32_e32 v12, v4, v1, 0x40466666 ; 42180304 40466666 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v10, v10 ; 7E14110A v_madak_f32_e32 v13, v4, v1, 0x3dcccccd ; 421A0304 3DCCCCCD v_madak_f32_e32 v1, v4, v1, 0x3f8ccccd ; 42020304 3F8CCCCD v_cvt_i32_f32_e32 v11, v11 ; 7E16110B v_cvt_i32_f32_e32 v12, v12 ; 7E18110C v_cvt_i32_f32_e32 v13, v13 ; 7E1A110D v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v11, 4, v11 ; 34161684 v_lshlrev_b32_e32 v12, 4, v12 ; 34181884 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v10, 4, v10 ; 34141484 v_lshlrev_b32_e32 v13, 4, v13 ; 341A1A84 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 buffer_load_dword v14, v11, s[0:3], 0 offen ; E0301000 80000E0B v_or_b32_e32 v15, 4, v11 ; 381E1684 v_or_b32_e32 v16, 8, v11 ; 38201688 v_or_b32_e32 v11, 12, v11 ; 3816168C buffer_load_dword v17, v12, s[0:3], 0 offen ; E0301000 8000110C v_or_b32_e32 v18, 4, v12 ; 38241884 v_or_b32_e32 v19, 8, v12 ; 38261888 v_or_b32_e32 v12, 12, v12 ; 3818188C buffer_load_dword v20, v13, s[0:3], 0 offen ; E0301000 8000140D v_or_b32_e32 v21, 4, v13 ; 382A1A84 v_or_b32_e32 v22, 8, v13 ; 382C1A88 v_or_b32_e32 v13, 12, v13 ; 381A1A8C buffer_load_dword v23, v1, s[0:3], 0 offen ; E0301000 80001701 v_or_b32_e32 v24, 4, v1 ; 38300284 v_or_b32_e32 v25, 8, v1 ; 38320288 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 buffer_load_dword v25, v25, s[0:3], 0 offen ; E0301000 80001919 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 buffer_load_dword v26, v0, s[0:3], 0 offen ; E0301000 80001A00 v_or_b32_e32 v27, 4, v0 ; 38360084 v_or_b32_e32 v28, 8, v0 ; 38380088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v18, v18, s[0:3], 0 offen ; E0301000 80001212 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B v_or_b32_e32 v29, 4, v10 ; 383A1484 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D buffer_load_dword v30, v10, s[0:3], 0 offen ; E0301000 80001E0A v_or_b32_e32 v31, 8, v10 ; 383E1488 v_or_b32_e32 v10, 12, v10 ; 3814148C buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C buffer_load_dword v31, v31, s[0:3], 0 offen ; E0301000 80001F1F buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 s_waitcnt ; BF8C077F exp 15, 33, 0, 0, 0, v20, v21, v22, v13 ; F800021F 0D161514 s_waitcnt vmcnt(13) ; BF8C077D exp 15, 34, 0, 0, 0, v23, v24, v25, v1 ; F800022F 01191817 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, v15, v7 ; 10020F0F v_mac_f32_e32 v1, v14, v6 ; 3E020D0E s_waitcnt vmcnt(11) ; BF8C077B v_mul_f32_e32 v2, v18, v7 ; 10040F12 v_mac_f32_e32 v2, v17, v6 ; 3E040D11 s_waitcnt vmcnt(9) ; BF8C0779 v_mul_f32_e32 v3, v27, v7 ; 10060F1B v_mac_f32_e32 v3, v26, v6 ; 3E060D1A s_waitcnt vmcnt(8) ; BF8C0778 v_mul_f32_e32 v4, v29, v7 ; 10080F1D s_waitcnt vmcnt(7) ; BF8C0777 v_mac_f32_e32 v4, v30, v6 ; 3E080D1E v_mac_f32_e32 v1, v16, v8 ; 3E021110 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 s_waitcnt vmcnt(6) ; BF8C0776 v_mac_f32_e32 v2, v19, v8 ; 3E041113 s_waitcnt vmcnt(5) ; BF8C0775 v_mac_f32_e32 v3, v28, v8 ; 3E06111C s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v4, v31, v8 ; 3E08111F s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v1, v11, v9 ; 3E02130B s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v2, v12, v9 ; 3E04130C s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v3, v0, v9 ; 3E061300 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v4, v10, v9 ; 3E08130A exp 15, 35, 0, 0, 0, v3, v4, v0, v0 ; F800023F 00000403 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v1, v2, v0, v5 ; F80008CF 05000201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 32 Code Size: 584 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[2], IN[1] 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %36 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %37 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %38 = bitcast float %36 to i32 %39 = bitcast float %37 to i32 %40 = insertelement <2 x i32> undef, i32 %38, i32 0 %41 = insertelement <2 x i32> %40, i32 %39, i32 1 %42 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %41, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = fmul float %43, %32 %48 = fadd float %47, %28 %49 = fmul float %44, %33 %50 = fadd float %49, %29 %51 = fmul float %45, %34 %52 = fadd float %51, %30 %53 = fmul float %46, %35 %54 = fadd float %53, %31 %55 = fmul float %54, %27 %56 = call i32 @llvm.SI.packf16(float %48, float %50) %57 = bitcast i32 %56 to float %58 = call i32 @llvm.SI.packf16(float %52, float %55) %59 = bitcast i32 %58 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %57, float %59, float %57, float %59) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v3, v7, v11 ; 3E061707 v_mac_f32_e32 v4, v8, v12 ; 3E081908 v_mac_f32_e32 v5, v9, v13 ; 3E0A1B09 v_mac_f32_e32 v6, v10, v14 ; 3E0C1D0A v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[0] 2: DP4 TEMP[1].x, IN[2], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[2], CONST[2] 5: DP4 TEMP[2].x, IN[2], CONST[3] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[2], IN[1] 9: MOV OUT[1], IN[0] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %8 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = fmul float %50, %14 %55 = fmul float %51, %15 %56 = fadd float %54, %55 %57 = fmul float %52, %16 %58 = fadd float %56, %57 %59 = fmul float %53, %17 %60 = fadd float %58, %59 %61 = fmul float %50, %18 %62 = fmul float %51, %19 %63 = fadd float %61, %62 %64 = fmul float %52, %20 %65 = fadd float %63, %64 %66 = fmul float %53, %21 %67 = fadd float %65, %66 %68 = fmul float %50, %22 %69 = fmul float %51, %23 %70 = fadd float %68, %69 %71 = fmul float %52, %24 %72 = fadd float %70, %71 %73 = fmul float %53, %25 %74 = fadd float %72, %73 %75 = fmul float %50, %26 %76 = fmul float %51, %27 %77 = fadd float %75, %76 %78 = fmul float %52, %28 %79 = fadd float %77, %78 %80 = fmul float %53, %29 %81 = fadd float %79, %80 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %34, float %35, float %36, float %37) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %42, float %43, float %44, float %45) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %74, float %81, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %67, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107 s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108 s_buffer_load_dword s20, s[0:3], 0x9 ; C20A0109 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_buffer_load_dword s5, s[0:3], 0xb ; C202810B s_buffer_load_dword s6, s[0:3], 0xc ; C203010C s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s12, s[0:3], 0xe ; C206010E s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s16, s[0:3], 0x3 ; C2080103 s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(2) ; BF8C0772 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 33, 0, 0, 0, v6, v7, v8, v9 ; F800021F 09080706 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s14, v11 ; 1000160E s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s8, v11 ; 10041608 v_mul_f32_e32 v3, s20, v11 ; 10061614 v_mul_f32_e32 v4, s7, v11 ; 10081607 v_mac_f32_e32 v0, s13, v10 ; 3E00140D v_mac_f32_e32 v2, s17, v10 ; 3E041411 v_mac_f32_e32 v3, s11, v10 ; 3E06140B v_mac_f32_e32 v4, s6, v10 ; 3E081406 v_mac_f32_e32 v0, s15, v12 ; 3E00180F v_mac_f32_e32 v2, s9, v12 ; 3E041809 v_mac_f32_e32 v3, s4, v12 ; 3E061804 v_mac_f32_e32 v4, s12, v12 ; 3E08180C v_mac_f32_e32 v0, s16, v13 ; 3E001A10 v_mac_f32_e32 v2, s10, v13 ; 3E041A0A v_mac_f32_e32 v3, s5, v13 ; 3E061A05 v_mac_f32_e32 v4, s0, v13 ; 3E081A00 exp 15, 34, 0, 0, 0, v3, v4, v0, v0 ; F800022F 00000403 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 12, 0, 1, 0, v0, v2, v3, v1 ; F80008CF 01030200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 240 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[2].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: LRP TEMP[0], IN[1].xxxx, TEMP[0], IN[0] 3: MOV TEMP[1].xyz, TEMP[0].xyzx 4: MUL TEMP[0].x, TEMP[0].wwww, IN[1].wwww 5: MOV TEMP[1].w, TEMP[0].xxxx 6: MOV OUT[0], TEMP[1] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %35 = bitcast float %33 to i32 %36 = bitcast float %34 to i32 %37 = insertelement <2 x i32> undef, i32 %35, i32 0 %38 = insertelement <2 x i32> %37, i32 %36, i32 1 %39 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %38, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = extractelement <4 x float> %39, i32 2 %43 = extractelement <4 x float> %39, i32 3 %44 = fsub float 1.000000e+00, %31 %45 = fmul float %40, %31 %46 = fmul float %27, %44 %47 = fadd float %45, %46 %48 = fsub float 1.000000e+00, %31 %49 = fmul float %41, %31 %50 = fmul float %28, %48 %51 = fadd float %49, %50 %52 = fsub float 1.000000e+00, %31 %53 = fmul float %42, %31 %54 = fmul float %29, %52 %55 = fadd float %53, %54 %56 = fsub float 1.000000e+00, %31 %57 = fmul float %43, %31 %58 = fmul float %30, %56 %59 = fadd float %57, %58 %60 = fmul float %59, %32 %61 = call i32 @llvm.SI.packf16(float %47, float %51) %62 = bitcast i32 %61 to float %63 = call i32 @llvm.SI.packf16(float %55, float %60) %64 = bitcast i32 %63 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %62, float %64, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 3, 1, [m0] ; C81C0700 v_interp_p2_f32 v7, [v7], v1, 3, 1, [m0] ; C81D0701 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800F00 00020808 v_mad_f32 v0, -v6, v2, v2 ; D2820000 240A0506 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, v6, v8 ; 3E001106 v_mad_f32 v1, -v6, v3, v3 ; D2820001 240E0706 v_mac_f32_e32 v1, v6, v9 ; 3E021306 v_mad_f32 v2, -v6, v4, v4 ; D2820002 24120906 v_mac_f32_e32 v2, v6, v10 ; 3E041506 v_mad_f32 v3, -v6, v5, v5 ; D2820003 24160B06 v_mac_f32_e32 v3, v6, v11 ; 3E061706 v_mul_f32_e32 v3, v7, v3 ; 10060707 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 168 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[2] 2: DP4 TEMP[1].x, IN[2], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[2], CONST[4] 5: DP4 TEMP[2].x, IN[2], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[2], IN[1] 9: MOV OUT[3], CONST[0] 10: MOV OUT[1], IN[0] 11: MOV OUT[4], CONST[1] 12: MOV OUT[0], TEMP[0] 13: MOV OUT[5], TEMP[1] 14: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 64) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 68) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 72) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 76) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 92) %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %8 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %8 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = extractelement <4 x float> %57, i32 3 %62 = fmul float %58, %22 %63 = fmul float %59, %23 %64 = fadd float %62, %63 %65 = fmul float %60, %24 %66 = fadd float %64, %65 %67 = fmul float %61, %25 %68 = fadd float %66, %67 %69 = fmul float %58, %26 %70 = fmul float %59, %27 %71 = fadd float %69, %70 %72 = fmul float %60, %28 %73 = fadd float %71, %72 %74 = fmul float %61, %29 %75 = fadd float %73, %74 %76 = fmul float %58, %30 %77 = fmul float %59, %31 %78 = fadd float %76, %77 %79 = fmul float %60, %32 %80 = fadd float %78, %79 %81 = fmul float %61, %33 %82 = fadd float %80, %81 %83 = fmul float %58, %34 %84 = fmul float %59, %35 %85 = fadd float %83, %84 %86 = fmul float %60, %36 %87 = fadd float %85, %86 %88 = fmul float %61, %37 %89 = fadd float %87, %88 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %42, float %43, float %44, float %45) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %50, float %51, float %52, float %53) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %82, float %89, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %75, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_buffer_load_dword s9, s[0:3], 0x10 ; C2048110 s_buffer_load_dword s10, s[0:3], 0x11 ; C2050111 s_buffer_load_dword s11, s[0:3], 0x12 ; C2058112 s_buffer_load_dword s20, s[0:3], 0x13 ; C20A0113 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116 s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117 s_buffer_load_dword s12, s[0:3], 0x5 ; C2060105 s_buffer_load_dword s13, s[0:3], 0x6 ; C2068106 s_buffer_load_dword s14, s[0:3], 0x7 ; C2070107 s_buffer_load_dword s15, s[0:3], 0x8 ; C2078108 s_buffer_load_dword s16, s[0:3], 0x9 ; C2080109 s_buffer_load_dword s17, s[0:3], 0xa ; C208810A s_buffer_load_dword s18, s[0:3], 0xb ; C209010B s_buffer_load_dword s19, s[0:3], 0xc ; C209810C s_buffer_load_dword s21, s[0:3], 0xd ; C20A810D s_buffer_load_dword s22, s[0:3], 0xe ; C20B010E s_buffer_load_dword s23, s[0:3], 0x0 ; C20B8100 s_buffer_load_dword s24, s[0:3], 0x1 ; C20C0101 s_buffer_load_dword s25, s[0:3], 0x2 ; C20C8102 s_buffer_load_dword s26, s[0:3], 0x3 ; C20D0103 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s12 ; 7E00020C v_mov_b32_e32 v14, s23 ; 7E1C0217 v_mov_b32_e32 v15, s24 ; 7E1E0218 v_mov_b32_e32 v16, s25 ; 7E200219 v_mov_b32_e32 v17, s26 ; 7E22021A v_mov_b32_e32 v18, s0 ; 7E240200 s_waitcnt vmcnt(2) ; BF8C0772 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v2, s13 ; 7E04020D s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 33, 0, 0, 0, v6, v7, v8, v9 ; F800021F 09080706 v_mov_b32_e32 v3, s14 ; 7E06020E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, s16, v11 ; 10081610 v_mul_f32_e32 v5, s21, v11 ; 100A1615 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v6, s10, v11 ; 100C160A exp 15, 34, 0, 0, 0, v14, v15, v16, v17 ; F800022F 11100F0E v_mul_f32_e32 v7, s5, v11 ; 100E1605 v_mac_f32_e32 v4, s15, v10 ; 3E08140F v_mac_f32_e32 v5, s19, v10 ; 3E0A1413 v_mac_f32_e32 v6, s9, v10 ; 3E0C1409 v_mac_f32_e32 v7, s4, v10 ; 3E0E1404 v_mac_f32_e32 v4, s17, v12 ; 3E081811 v_mac_f32_e32 v5, s22, v12 ; 3E0A1816 v_mac_f32_e32 v6, s11, v12 ; 3E0C180B v_mac_f32_e32 v7, s6, v12 ; 3E0E1806 v_mac_f32_e32 v4, s18, v13 ; 3E081A12 v_mac_f32_e32 v5, s8, v13 ; 3E0A1A08 v_mac_f32_e32 v6, s20, v13 ; 3E0C1A14 v_mac_f32_e32 v7, s7, v13 ; 3E0E1A07 exp 15, 35, 0, 0, 0, v18, v0, v2, v3 ; F800023F 03020012 exp 15, 36, 0, 0, 0, v6, v7, v0, v0 ; F800024F 00000706 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v4, v5, v0, v1 ; F80008CF 01000504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 20 Code Size: 328 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[4].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: LRP TEMP[0], IN[1].xxxx, TEMP[0], IN[0] 3: MAD TEMP[0], TEMP[0], IN[3], IN[2] 4: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww 5: MOV TEMP[0].w, TEMP[1].xxxx 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %36 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %37 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %38 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %39 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) %40 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) %41 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) %42 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) %43 = bitcast float %41 to i32 %44 = bitcast float %42 to i32 %45 = insertelement <2 x i32> undef, i32 %43, i32 0 %46 = insertelement <2 x i32> %45, i32 %44, i32 1 %47 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %46, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = fsub float 1.000000e+00, %31 %53 = fmul float %48, %31 %54 = fmul float %27, %52 %55 = fadd float %53, %54 %56 = fsub float 1.000000e+00, %31 %57 = fmul float %49, %31 %58 = fmul float %28, %56 %59 = fadd float %57, %58 %60 = fsub float 1.000000e+00, %31 %61 = fmul float %50, %31 %62 = fmul float %29, %60 %63 = fadd float %61, %62 %64 = fsub float 1.000000e+00, %31 %65 = fmul float %51, %31 %66 = fmul float %30, %64 %67 = fadd float %65, %66 %68 = fmul float %55, %37 %69 = fadd float %68, %33 %70 = fmul float %59, %38 %71 = fadd float %70, %34 %72 = fmul float %63, %39 %73 = fadd float %72, %35 %74 = fmul float %67, %40 %75 = fadd float %74, %36 %76 = fmul float %75, %32 %77 = call i32 @llvm.SI.packf16(float %69, float %71) %78 = bitcast i32 %77 to float %79 = call i32 @llvm.SI.packf16(float %73, float %76) %80 = bitcast i32 %79 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %78, float %80, float %78, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 3, 1, [m0] ; C81C0700 v_interp_p2_f32 v7, [v7], v1, 3, 1, [m0] ; C81D0701 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00 v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01 v_interp_p1_f32 v11, v0, 3, 2, [m0] ; C82C0B00 v_interp_p2_f32 v11, [v11], v1, 3, 2, [m0] ; C82D0B01 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01 v_interp_p1_f32 v14, v0, 2, 3, [m0] ; C8380E00 v_interp_p2_f32 v14, [v14], v1, 2, 3, [m0] ; C8390E01 v_interp_p1_f32 v15, v0, 3, 3, [m0] ; C83C0F00 v_interp_p2_f32 v15, [v15], v1, 3, 3, [m0] ; C83D0F01 v_interp_p1_f32 v16, v0, 0, 4, [m0] ; C8401000 v_interp_p2_f32 v16, [v16], v1, 0, 4, [m0] ; C8411001 v_interp_p1_f32 v17, v0, 1, 4, [m0] ; C8441100 v_interp_p2_f32 v17, [v17], v1, 1, 4, [m0] ; C8451101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[8:15], s[0:3] ; F0800F00 00021010 v_mad_f32 v0, -v6, v2, v2 ; D2820000 240A0506 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, v6, v16 ; 3E002106 v_mad_f32 v1, -v6, v3, v3 ; D2820001 240E0706 v_mac_f32_e32 v1, v6, v17 ; 3E022306 v_mad_f32 v2, -v6, v4, v4 ; D2820002 24120906 v_mac_f32_e32 v2, v6, v18 ; 3E042506 v_mad_f32 v3, -v6, v5, v5 ; D2820003 24160B06 v_mac_f32_e32 v3, v6, v19 ; 3E062706 v_mac_f32_e32 v8, v12, v0 ; 3E10010C v_mac_f32_e32 v9, v13, v1 ; 3E12030D v_mac_f32_e32 v10, v14, v2 ; 3E14050E v_mac_f32_e32 v11, v15, v3 ; 3E16070F v_mul_f32_e32 v0, v7, v11 ; 10001707 v_cvt_pkrtz_f16_f32_e32 v1, v8, v9 ; 5E021308 v_cvt_pkrtz_f16_f32_e32 v0, v10, v0 ; 5E00010A exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 20 Code Size: 248 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..3] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[2] 2: DP4 TEMP[1].x, IN[1], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV TEMP[1].xy, IN[0].xyxx 5: MOV OUT[1], CONST[0] 6: MOV OUT[2], CONST[1] 7: MOV OUT[0], TEMP[0] 8: MOV OUT[3], TEMP[1] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 %38 = add i32 %5, %8 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = extractelement <4 x float> %39, i32 2 %43 = extractelement <4 x float> %39, i32 3 %44 = fmul float %40, %22 %45 = fmul float %41, %23 %46 = fadd float %44, %45 %47 = fmul float %42, %24 %48 = fadd float %46, %47 %49 = fmul float %43, %25 %50 = fadd float %48, %49 %51 = fmul float %40, %26 %52 = fmul float %41, %27 %53 = fadd float %51, %52 %54 = fmul float %42, %28 %55 = fadd float %53, %54 %56 = fmul float %43, %29 %57 = fadd float %55, %56 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %34, float %35, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %50, float %57, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107 s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108 s_buffer_load_dword s16, s[0:3], 0x9 ; C2080109 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[4:7], v0, s[12:15], 0 idxen ; E00C2000 80030400 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_buffer_load_dword s5, s[0:3], 0xb ; C202810B s_buffer_load_dword s6, s[0:3], 0xc ; C203010C s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s12, s[0:3], 0xe ; C206010E s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s17, s[0:3], 0x3 ; C2088103 s_buffer_load_dword s18, s[0:3], 0x4 ; C2090104 s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s8 ; 7E000208 v_mov_b32_e32 v8, s13 ; 7E10020D v_mov_b32_e32 v9, s14 ; 7E12020E v_mov_b32_e32 v10, s15 ; 7E14020F v_mov_b32_e32 v11, s17 ; 7E160211 exp 15, 32, 0, 0, 0, v8, v9, v10, v11 ; F800020F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v8, s18 ; 7E100212 v_mov_b32_e32 v9, s9 ; 7E120209 v_mov_b32_e32 v10, s10 ; 7E14020A exp 15, 33, 0, 0, 0, v8, v0, v9, v10 ; F800021F 0A090008 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v0, s16, v5 ; 10000A10 exp 15, 34, 0, 0, 0, v2, v3, v0, v0 ; F800022F 00000302 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s7, v5 ; 10040A07 v_mac_f32_e32 v0, s11, v4 ; 3E00080B v_mac_f32_e32 v2, s6, v4 ; 3E040806 v_mac_f32_e32 v0, s4, v6 ; 3E000C04 v_mac_f32_e32 v2, s12, v6 ; 3E040C0C v_mac_f32_e32 v0, s5, v7 ; 3E000E05 v_mac_f32_e32 v2, s0, v7 ; 3E040E00 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 12, 0, 1, 0, v0, v2, v3, v1 ; F80008CF 01030200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 228 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[2].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[1], IN[0] 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %36 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %40, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %31 %47 = fadd float %46, %27 %48 = fmul float %43, %32 %49 = fadd float %48, %28 %50 = fmul float %44, %33 %51 = fadd float %50, %29 %52 = fmul float %45, %34 %53 = fadd float %52, %30 %54 = call i32 @llvm.SI.packf16(float %47, float %49) %55 = bitcast i32 %54 to float %56 = call i32 @llvm.SI.packf16(float %51, float %53) %57 = bitcast i32 %56 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %55, float %57, float %55, float %57) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[8:15], s[0:3] ; F0800F00 00020A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v6, v10 ; 3E041506 v_mac_f32_e32 v3, v7, v11 ; 3E061707 v_mac_f32_e32 v4, v8, v12 ; 3E081908 v_mac_f32_e32 v5, v9, v13 ; 3E0A1B09 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 v_cvt_pkrtz_f16_f32_e32 v1, v4, v5 ; 5E020B04 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 148 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG 0: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) ret void } declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } Shader Disassembly: v_mov_b32_e32 v0, 0 ; 7E000280 exp 0, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001800 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 16 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** Looking up breakpad interfaces from steamclient Calling BreakpadMiniDumpSystemInit Steam_SetMinidumpSteamID: Caching Steam ID: 76561198008816698 [API loaded yes] Steam_SetMinidumpSteamID: Setting Steam ID: 76561198008816698 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 64) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 68) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 72) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 76) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 92) %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %22 %47 = fmul float %43, %23 %48 = fadd float %46, %47 %49 = fmul float %44, %24 %50 = fadd float %48, %49 %51 = fmul float %45, %25 %52 = fadd float %50, %51 %53 = fmul float %42, %26 %54 = fmul float %43, %27 %55 = fadd float %53, %54 %56 = fmul float %44, %28 %57 = fadd float %55, %56 %58 = fmul float %45, %29 %59 = fadd float %57, %58 %60 = fmul float %42, %30 %61 = fmul float %43, %31 %62 = fadd float %60, %61 %63 = fmul float %44, %32 %64 = fadd float %62, %63 %65 = fmul float %45, %33 %66 = fadd float %64, %65 %67 = fmul float %42, %34 %68 = fmul float %43, %35 %69 = fadd float %67, %68 %70 = fmul float %44, %36 %71 = fadd float %69, %70 %72 = fmul float %45, %37 %73 = fadd float %71, %72 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %66, float %73, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %52, float %59, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mov_b32_e32 v6, s6 ; 7E0C0206 v_mov_b32_e32 v7, s7 ; 7E0E0207 exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, s8 ; 7E080208 v_mov_b32_e32 v5, s9 ; 7E0A0209 v_mov_b32_e32 v6, s10 ; 7E0C020A v_mov_b32_e32 v7, s11 ; 7E0E020B exp 15, 33, 0, 0, 0, v4, v5, v6, v7 ; F800021F 07060504 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mul_f32_e32 v5, s17, v1 ; 100A0211 v_mul_f32_e32 v6, s21, v1 ; 100C0215 v_mul_f32_e32 v1, s23, v1 ; 10020217 v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mac_f32_e32 v6, s20, v0 ; 3E0C0014 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mac_f32_e32 v4, s14, v2 ; 3E08040E v_mac_f32_e32 v5, s18, v2 ; 3E0A0412 v_mac_f32_e32 v6, s24, v2 ; 3E0C0418 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s25, v3 ; 3E0C0619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 34, 0, 0, 0, v6, v1, v0, v0 ; F800022F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0] DCL CONST[2] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1], IMM[0].xxxx 2: MOV TEMP[2], IMM[0].xxxx 3: MOV TEMP[3].y, IMM[0].xxxx 4: MOV TEMP[3].x, -CONST[0].xxxx 5: BGNLOOP :0 6: FSLT TEMP[4].x, CONST[0].xxxx, TEMP[3].xxxx 7: UIF TEMP[4].xxxx :0 8: BRK 9: ENDIF 10: MOV TEMP[3].y, -CONST[0].yyyy 11: BGNLOOP :0 12: FSLT TEMP[5].x, CONST[0].yyyy, TEMP[3].yyyy 13: UIF TEMP[5].xxxx :0 14: BRK 15: ENDIF 16: MAD TEMP[6].xy, TEMP[3].xyyy, CONST[2].xyyy, TEMP[0].xyyy 17: MOV TEMP[7].xy, TEMP[6].xyyy 18: MOV TEMP[7].w, IMM[0].xxxx 19: TXB TEMP[8], TEMP[7], SAMP[0], 2D 20: ADD TEMP[2], TEMP[2], TEMP[8] 21: ADD TEMP[9].x, TEMP[3].yyyy, IMM[0].yyyy 22: MOV TEMP[3].y, TEMP[9].xxxx 23: ENDLOOP :0 24: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 25: ENDLOOP :0 26: MUL TEMP[1], TEMP[2], CONST[0].wwww 27: MOV TEMP[0].w, IMM[0].yyyy 28: MOV TEMP[0].xyz, IN[1].xyzx 29: MUL TEMP[0], TEMP[1], TEMP[0] 30: MUL TEMP[1], TEMP[0], IN[1].wwww 31: MAD TEMP[1], IN[0], TEMP[1].wwww, TEMP[1] 32: MOV OUT[0], TEMP[1] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) %30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, align 32, !tbaa !0 %32 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !tbaa !0 %34 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %36 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %37 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %38 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %39 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %40 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %41 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %42 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %43 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %44 = fsub float -0.000000e+00, %25 %45 = fsub float -0.000000e+00, %26 br label %LOOP LOOP: ; preds = %IF43, %main_body %temp9.0 = phi float [ 0.000000e+00, %main_body ], [ %temp9.1, %IF43 ] %temp10.0 = phi float [ 0.000000e+00, %main_body ], [ %temp10.1, %IF43 ] %temp11.0 = phi float [ 0.000000e+00, %main_body ], [ %temp11.1, %IF43 ] %temp12.0 = phi float [ %44, %main_body ], [ %75, %IF43 ] %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %temp8.1, %IF43 ] %46 = fcmp olt float %25, %temp12.0 br i1 %46, label %IF, label %ENDIF IF: ; preds = %LOOP %47 = fmul float %temp8.0, %27 %48 = fmul float %temp9.0, %27 %49 = fmul float %temp10.0, %27 %50 = fmul float %temp11.0, %27 %51 = fmul float %47, %38 %52 = fmul float %48, %39 %53 = fmul float %49, %40 %54 = fmul float %51, %41 %55 = fmul float %52, %41 %56 = fmul float %53, %41 %57 = fmul float %50, %41 %58 = fmul float %34, %57 %59 = fadd float %58, %54 %60 = fmul float %35, %57 %61 = fadd float %60, %55 %62 = fmul float %36, %57 %63 = fadd float %62, %56 %64 = fmul float %37, %57 %65 = fadd float %64, %57 %66 = call i32 @llvm.SI.packf16(float %59, float %61) %67 = bitcast i32 %66 to float %68 = call i32 @llvm.SI.packf16(float %63, float %65) %69 = bitcast i32 %68 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %67, float %69, float %67, float %69) ret void ENDIF: ; preds = %LOOP %70 = fmul float %temp12.0, %28 %71 = fadd float %70, %42 %72 = bitcast float %71 to i32 %73 = insertelement <4 x i32> , i32 %72, i32 1 br label %LOOP41 LOOP41: ; preds = %ENDIF42, %ENDIF %temp9.1 = phi float [ %temp9.0, %ENDIF ], [ %86, %ENDIF42 ] %temp10.1 = phi float [ %temp10.0, %ENDIF ], [ %87, %ENDIF42 ] %temp11.1 = phi float [ %temp11.0, %ENDIF ], [ %88, %ENDIF42 ] %temp13.0 = phi float [ %45, %ENDIF ], [ %89, %ENDIF42 ] %temp8.1 = phi float [ %temp8.0, %ENDIF ], [ %85, %ENDIF42 ] %74 = fcmp olt float %26, %temp13.0 br i1 %74, label %IF43, label %ENDIF42 IF43: ; preds = %LOOP41 %75 = fadd float %temp12.0, 1.000000e+00 br label %LOOP ENDIF42: ; preds = %LOOP41 %76 = fmul float %temp13.0, %29 %77 = fadd float %76, %43 %78 = bitcast float %77 to i32 %79 = insertelement <4 x i32> %73, i32 %78, i32 2 %80 = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> %79, <8 x i32> %31, <4 x i32> %33, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %81 = extractelement <4 x float> %80, i32 0 %82 = extractelement <4 x float> %80, i32 1 %83 = extractelement <4 x float> %80, i32 2 %84 = extractelement <4 x float> %80, i32 3 %85 = fadd float %temp8.1, %81 %86 = fadd float %temp9.1, %82 %87 = fadd float %temp10.1, %83 %88 = fadd float %temp11.1, %84 %89 = fadd float %temp13.0, 1.000000e+00 br label %LOOP41 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[16:19], s[2:3], 0x0 ; C0880300 s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[16:19], 0x0 ; C2009100 s_buffer_load_dword s2, s[16:19], 0x1 ; C2011101 s_buffer_load_dword s0, s[16:19], 0x3 ; C2001103 s_buffer_load_dword s3, s[16:19], 0x8 ; C2019108 s_buffer_load_dword s6, s[16:19], 0x9 ; C2031109 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900 v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901 v_mov_b32_e32 v11, 0x80000000 ; 7E1602FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v1, s1, v11 ; 3A021601 v_xor_b32_e32 v12, s2, v11 ; 3A181602 v_mov_b32_e32 v16, 0 ; 7E200280 s_mov_b64 s[4:5], 0 ; BE840480 v_mov_b32_e32 v17, 0 ; 7E220280 v_mov_b32_e32 v18, 0 ; 7E240280 v_mov_b32_e32 v19, 0 ; 7E260280 v_mov_b32_e32 v13, v19 ; 7E1A0313 v_mov_b32_e32 v11, v18 ; 7E160312 v_mov_b32_e32 v14, v17 ; 7E1C0311 v_mov_b32_e32 v15, v16 ; 7E1E0310 v_cmp_nlt_f32_e32 vcc, s1, v1 ; 7C1C0201 s_and_saveexec_b64 s[20:21], vcc ; BE94246A s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_4 ; BF880000 v_mad_f32 v21, s3, v1, v10 ; D2820015 042A0203 v_mov_b32_e32 v20, 0 ; 7E280280 s_mov_b64 s[22:23], 0 ; BE960480 v_mov_b32_e32 v22, v15 ; 7E2C030F v_mov_b32_e32 v24, v14 ; 7E30030E v_mov_b32_e32 v26, v11 ; 7E34030B v_mov_b32_e32 v23, v12 ; 7E2E030C v_mov_b32_e32 v25, v13 ; 7E32030D v_mov_b32_e32 v19, v25 ; 7E260319 v_mov_b32_e32 v18, v26 ; 7E24031A v_mov_b32_e32 v17, v24 ; 7E220318 v_mov_b32_e32 v16, v22 ; 7E200316 v_cmp_nlt_f32_e32 vcc, s2, v23 ; 7C1C2E02 s_and_saveexec_b64 s[24:25], vcc ; BE98246A s_xor_b64 s[24:25], exec, s[24:25] ; 8998187E v_mad_f32 v22, s6, v23, v0 ; D2820016 04022E06 image_sample_b v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[8:15], s[16:19] ; F0940F00 00821A14 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v25, v26, v19 ; 0632271A v_add_f32_e32 v22, v27, v16 ; 062C211B v_add_f32_e32 v24, v28, v17 ; 0630231C v_add_f32_e32 v26, v29, v18 ; 0634251D v_add_f32_e32 v23, 1.0, v23 ; 062E2EF2 s_or_b64 exec, exec, s[24:25] ; 88FE187E s_or_b64 s[22:23], s[24:25], s[22:23] ; 88961618 s_andn2_b64 exec, exec, s[22:23] ; 8AFE167E s_cbranch_execnz BB0_5 ; BF890000 s_or_b64 exec, exec, s[22:23] ; 88FE167E v_add_f32_e32 v1, 1.0, v1 ; 060202F2 s_or_b64 exec, exec, s[20:21] ; 88FE147E s_or_b64 s[4:5], s[20:21], s[4:5] ; 88840414 s_andn2_b64 exec, exec, s[4:5] ; 8AFE047E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_mul_f32_e32 v0, s0, v13 ; 10001A00 v_mul_f32_e32 v1, s0, v15 ; 10021E00 v_mul_f32_e32 v10, s0, v14 ; 10141C00 v_mul_f32_e32 v11, s0, v11 ; 10161600 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mul_f32_e32 v6, v8, v10 ; 100C1508 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v9, v11 ; 100E1709 v_mac_f32_e32 v0, v7, v2 ; 3E000507 v_mac_f32_e32 v1, v7, v3 ; 3E020707 v_mac_f32_e32 v6, v7, v4 ; 3E0C0907 v_mac_f32_e32 v7, v7, v5 ; 3E0E0B07 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 32 Code Size: 424 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %32, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %34, float %35, float %36, float %37) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 60 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], CUBE, UINT 0: TEX OUT[0], IN[0], SAMP[0], CUBE 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %12) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %12) %31 = insertelement <4 x float> undef, float %27, i32 0 %32 = insertelement <4 x float> %31, float %28, i32 1 %33 = insertelement <4 x float> %32, float %29, i32 2 %34 = insertelement <4 x float> %33, float %30, i32 3 %35 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = extractelement <4 x float> %35, i32 2 %39 = call float @llvm.fabs.f32(float %38) %40 = fdiv float 1.000000e+00, %39 %41 = fmul float %36, %40 %42 = fadd float %41, 1.500000e+00 %43 = fmul float %37, %40 %44 = fadd float %43, 1.500000e+00 %45 = bitcast float %44 to i32 %46 = bitcast float %42 to i32 %bc = bitcast <4 x float> %35 to <4 x i32> %47 = insertelement <4 x i32> undef, i32 %45, i32 0 %48 = insertelement <4 x i32> %47, i32 %46, i32 1 %49 = shufflevector <4 x i32> %48, <4 x i32> %bc, <4 x i32> %50 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %49, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %51 = extractelement <4 x float> %50, i32 0 %52 = extractelement <4 x float> %50, i32 1 %53 = extractelement <4 x float> %50, i32 2 %54 = extractelement <4 x float> %50, i32 3 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %51, float %52, float %53, float %54) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v0, v0, 3, 0, [m0] ; C8000300 v_interp_p2_f32 v0, [v0], v1, 3, 0, [m0] ; C8010301 v_cubeid_f32 v7, v2, v3, v4 ; D2880007 04120702 v_cubema_f32 v0, v2, v3, v4 ; D28E0000 04120702 v_rcp_f32_e64 v0, |v0| ; D3540100 00000100 v_cubesc_f32 v1, v2, v3, v4 ; D28A0001 04120702 v_cubetc_f32 v2, v2, v3, v4 ; D28C0002 04120702 v_mov_b32_e32 v5, 0x3fc00000 ; 7E0A02FF 3FC00000 v_mad_f32 v6, v0, v2, v5 ; D2820006 04160500 v_mac_f32_e32 v5, v0, v1 ; 3E0A0300 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[5:8], s[8:15], s[0:3] ; F0800F00 00020005 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 136 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL CONST[0..57] DCL TEMP[0..10], LOCAL IMM[0] FLT32 { -128.0000, 1.0000, -2.0000, -64.0000} IMM[1] FLT32 { -0.0159, 0.0159, 2.2000, 0.2125} IMM[2] FLT32 { 0.2125, 0.7154, 0.0721, 0.0000} 0: ADD TEMP[0].xy, IMM[0].xxxx, IN[1].xyyy 1: FSLT TEMP[1].xy, TEMP[0].xyyy, CONST[0].xxxx 2: AND TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy 3: ABS TEMP[2].xy, TEMP[0].xyyy 4: ADD TEMP[0].xy, -TEMP[1].xyyy, TEMP[2].xyyy 5: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].yyyy 6: ADD TEMP[0].xy, TEMP[0].xyyy, IMM[0].wwww 7: FSLT TEMP[2].xy, TEMP[0].xyyy, CONST[0].xxxx 8: AND TEMP[2].xy, TEMP[2].xyyy, IMM[0].yyyy 9: ABS TEMP[3].xy, TEMP[0].xyyy 10: ADD TEMP[0].xy, TEMP[3].xyyy, -TEMP[2].xyyy 11: MAD TEMP[2].xy, TEMP[2].xyyy, IMM[0].zzzz, IMM[0].yyyy 12: MAD TEMP[3].x, TEMP[0].xxxx, IMM[1].xxxx, IMM[0].yyyy 13: MAD TEMP[3].x, TEMP[0].yyyy, IMM[1].xxxx, TEMP[3].xxxx 14: MOV TEMP[3].z, TEMP[3].xxxx 15: MUL TEMP[3].xy, TEMP[0].xyyy, IMM[1].yyyy 16: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 17: RSQ TEMP[4].x, TEMP[4].xxxx 18: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 19: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[3].xyyy 20: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].zzzz 21: MOV TEMP[2].z, TEMP[1].xxxx 22: MAD TEMP[0].xyz, IN[5].xyzz, CONST[13].xxxx, TEMP[2].xyzz 23: DP3 TEMP[2].x, TEMP[0].xyzz, CONST[54].xyzz 24: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[55].xyzz 25: MOV TEMP[2].y, TEMP[1].xxxx 26: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[56].xyzz 27: MOV TEMP[2].z, TEMP[1].xxxx 28: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz 29: RSQ TEMP[1].x, TEMP[0].xxxx 30: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[2].xyzz 31: ADD TEMP[0].xyz, IN[2].xyzz, IN[2].xyzz 32: LG2 TEMP[2].x, TEMP[0].xxxx 33: LG2 TEMP[3].x, TEMP[0].yyyy 34: MOV TEMP[2].y, TEMP[3].xxxx 35: LG2 TEMP[3].x, TEMP[0].zzzz 36: MOV TEMP[2].z, TEMP[3].xxxx 37: MUL TEMP[0].xyz, TEMP[2].xyzz, IMM[1].zzzz 38: EX2 TEMP[2].x, TEMP[0].xxxx 39: EX2 TEMP[3].x, TEMP[0].yyyy 40: MOV TEMP[2].y, TEMP[3].xxxx 41: EX2 TEMP[3].x, TEMP[0].zzzz 42: MOV TEMP[2].z, TEMP[3].xxxx 43: ABS TEMP[3].x, CONST[50].xxxx 44: FSLT TEMP[3].x, -TEMP[3].xxxx, TEMP[3].xxxx 45: AND TEMP[0].x, TEMP[3].xxxx, IMM[0].yyyy 46: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[0].xxxx 47: DP3 TEMP[4].x, TEMP[2].xyzz, IMM[2].xyzz 48: ADD TEMP[5].x, CONST[0].yyyy, -IN[2].wwww 49: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 50: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[0].xxxx 51: MOV TEMP[4].w, TEMP[4].xxxx 52: DP4 TEMP[5].x, IN[3], CONST[48] 53: DP4 TEMP[6].x, IN[3], CONST[49] 54: MOV TEMP[5].y, TEMP[6].xxxx 55: MOV TEMP[0].w, IN[0].wwww 56: MAD TEMP[0].xyz, IN[4].xyzz, CONST[13].xxxx, IN[0].xyzz 57: DP4 TEMP[2].x, TEMP[0], CONST[54] 58: DP4 TEMP[6].x, TEMP[0], CONST[55] 59: MOV TEMP[2].y, TEMP[6].xxxx 60: DP4 TEMP[0].x, TEMP[0], CONST[56] 61: MOV TEMP[2].z, TEMP[0].xxxx 62: ADD TEMP[0].xyz, -TEMP[2].xyzz, CONST[2].xyzz 63: MOV TEMP[2].w, CONST[0].yyyy 64: DP4 TEMP[6].x, TEMP[2], CONST[8] 65: DP4 TEMP[7].x, TEMP[2], CONST[9] 66: MOV TEMP[6].y, TEMP[7].xxxx 67: DP4 TEMP[8].x, TEMP[2], CONST[11] 68: MOV TEMP[6].w, TEMP[8].xxxx 69: DP4 TEMP[9].x, TEMP[2], CONST[10] 70: MOV TEMP[2].xyz, TEMP[2].xyzx 71: MOV TEMP[6].z, TEMP[9].xxxx 72: MOV TEMP[2].w, TEMP[9].xxxx 73: MOV TEMP[5].zw, CONST[0].xxxx 74: MOV TEMP[0].w, CONST[0].xxxx 75: MOV TEMP[1].w, CONST[0].xxxx 76: MOV TEMP[3].w, CONST[0].xxxx 77: MOV TEMP[4].xyz, CONST[0].xxxx 78: MOV TEMP[10], TEMP[6] 79: MAD TEMP[9].x, TEMP[9].xxxx, CONST[0].zzzz, -TEMP[8].xxxx 80: MOV TEMP[6].z, TEMP[9].xxxx 81: MOV TEMP[6].y, -TEMP[7].xxxx 82: MAD TEMP[6].xy, CONST[57].xyyy, TEMP[8].xxxx, TEMP[6].xyyy 83: MOV OUT[2], TEMP[5] 84: MOV OUT[3], TEMP[4] 85: MOV OUT[4], TEMP[0] 86: MOV OUT[5], TEMP[1] 87: MOV OUT[0], TEMP[6] 88: MOV OUT[1], TEMP[10] 89: MOV OUT[6], TEMP[3] 90: MOV OUT[7], TEMP[2] 91: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 128) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 132) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 136) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 140) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 144) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 148) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 152) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 156) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 160) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 164) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 168) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 172) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 176) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 180) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 184) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 188) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 208) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %38 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %39 = call float @llvm.SI.load.const(<16 x i8> %13, i32 776) %40 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %41 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %42 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %43 = call float @llvm.SI.load.const(<16 x i8> %13, i32 792) %44 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %45 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %46 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %47 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %48 = call float @llvm.SI.load.const(<16 x i8> %13, i32 872) %49 = call float @llvm.SI.load.const(<16 x i8> %13, i32 876) %50 = call float @llvm.SI.load.const(<16 x i8> %13, i32 880) %51 = call float @llvm.SI.load.const(<16 x i8> %13, i32 884) %52 = call float @llvm.SI.load.const(<16 x i8> %13, i32 888) %53 = call float @llvm.SI.load.const(<16 x i8> %13, i32 892) %54 = call float @llvm.SI.load.const(<16 x i8> %13, i32 896) %55 = call float @llvm.SI.load.const(<16 x i8> %13, i32 900) %56 = call float @llvm.SI.load.const(<16 x i8> %13, i32 904) %57 = call float @llvm.SI.load.const(<16 x i8> %13, i32 908) %58 = call float @llvm.SI.load.const(<16 x i8> %13, i32 912) %59 = call float @llvm.SI.load.const(<16 x i8> %13, i32 916) %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = add i32 %5, %8 %63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %62) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = extractelement <4 x float> %63, i32 2 %67 = extractelement <4 x float> %63, i32 3 %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 %70 = add i32 %5, %8 %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %70) %72 = extractelement <4 x float> %71, i32 0 %73 = extractelement <4 x float> %71, i32 1 %74 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %75 = load <16 x i8>, <16 x i8> addrspace(2)* %74, align 16, !tbaa !0 %76 = add i32 %5, %8 %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %76) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = extractelement <4 x float> %77, i32 3 %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = add i32 %5, %8 %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = extractelement <4 x float> %85, i32 3 %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = add i32 %5, %8 %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = extractelement <4 x float> %93, i32 2 %97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = add i32 %5, %8 %100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %99) %101 = extractelement <4 x float> %100, i32 0 %102 = extractelement <4 x float> %100, i32 1 %103 = extractelement <4 x float> %100, i32 2 %104 = fadd float %72, -1.280000e+02 %105 = fadd float %73, -1.280000e+02 %106 = fcmp olt float %104, %14 %107 = fcmp olt float %105, %14 %108 = select i1 %106, float 1.000000e+00, float 0.000000e+00 %109 = call float @llvm.fabs.f32(float %104) %110 = call float @llvm.fabs.f32(float %105) %111 = fsub float %109, %108 %112 = select i1 %107, float -1.000000e+00, float -0.000000e+00 %113 = fadd float %112, %110 %114 = fmul float %108, -2.000000e+00 %115 = fadd float %114, 1.000000e+00 %116 = fadd float %111, -6.400000e+01 %117 = fadd float %113, -6.400000e+01 %118 = fcmp olt float %116, %14 %119 = fcmp olt float %117, %14 %120 = select i1 %118, float 1.000000e+00, float 0.000000e+00 %121 = select i1 %119, float 1.000000e+00, float 0.000000e+00 %122 = call float @llvm.fabs.f32(float %116) %123 = call float @llvm.fabs.f32(float %117) %124 = fsub float %122, %120 %125 = fsub float %123, %121 %126 = fmul float %120, -2.000000e+00 %127 = fadd float %126, 1.000000e+00 %128 = fmul float %121, -2.000000e+00 %129 = fadd float %128, 1.000000e+00 %130 = fmul float %124, 0xBF90410420000000 %131 = fadd float %130, 1.000000e+00 %132 = fmul float %125, 0xBF90410420000000 %133 = fadd float %132, %131 %134 = fmul float %124, 0x3F90410420000000 %135 = fmul float %125, 0x3F90410420000000 %136 = fmul float %134, %134 %137 = fmul float %135, %135 %138 = fadd float %137, %136 %139 = fmul float %133, %133 %140 = fadd float %138, %139 %141 = call float @llvm.AMDGPU.rsq.clamped.f32(float %140) %142 = fmul float %134, %141 %143 = fmul float %135, %141 %144 = fmul float %133, %141 %145 = fmul float %127, %142 %146 = fmul float %129, %143 %147 = fmul float %115, %144 %148 = fmul float %101, %36 %149 = fadd float %148, %145 %150 = fmul float %102, %36 %151 = fadd float %150, %146 %152 = fmul float %103, %36 %153 = fadd float %152, %147 %154 = fmul float %149, %46 %155 = fmul float %151, %47 %156 = fadd float %155, %154 %157 = fmul float %153, %48 %158 = fadd float %156, %157 %159 = fmul float %149, %50 %160 = fmul float %151, %51 %161 = fadd float %160, %159 %162 = fmul float %153, %52 %163 = fadd float %161, %162 %164 = fmul float %149, %54 %165 = fmul float %151, %55 %166 = fadd float %165, %164 %167 = fmul float %153, %56 %168 = fadd float %166, %167 %169 = fmul float %158, %158 %170 = fmul float %163, %163 %171 = fadd float %170, %169 %172 = fmul float %168, %168 %173 = fadd float %171, %172 %174 = call float @llvm.AMDGPU.rsq.clamped.f32(float %173) %175 = fmul float %174, %158 %176 = fmul float %174, %163 %177 = fmul float %174, %168 %178 = fadd float %78, %78 %179 = fadd float %79, %79 %180 = fadd float %80, %80 %181 = call float @llvm.log2.f32(float %178) %182 = call float @llvm.log2.f32(float %179) %183 = call float @llvm.log2.f32(float %180) %184 = fmul float %181, 0x40019999A0000000 %185 = fmul float %182, 0x40019999A0000000 %186 = fmul float %183, 0x40019999A0000000 %187 = call float @llvm.AMDIL.exp.(float %184) %188 = call float @llvm.AMDIL.exp.(float %185) %189 = call float @llvm.AMDIL.exp.(float %186) %190 = call float @llvm.fabs.f32(float %45) %191 = fsub float -0.000000e+00, %190 %192 = fcmp ogt float %190, %191 %193 = select i1 %192, float 1.000000e+00, float 0.000000e+00 %194 = fmul float %187, %193 %195 = fmul float %188, %193 %196 = fmul float %189, %193 %197 = fmul float %187, 0x3FCB333340000000 %198 = fmul float %188, 0x3FE6E48E80000000 %199 = fadd float %198, %197 %200 = fmul float %189, 0x3FB2752540000000 %201 = fadd float %199, %200 %202 = fsub float %15, %81 %203 = fmul float %201, %202 %204 = fmul float %203, %193 %205 = fmul float %86, %37 %206 = fmul float %87, %38 %207 = fadd float %205, %206 %208 = fmul float %88, %39 %209 = fadd float %207, %208 %210 = fmul float %89, %40 %211 = fadd float %209, %210 %212 = fmul float %86, %41 %213 = fmul float %87, %42 %214 = fadd float %212, %213 %215 = fmul float %88, %43 %216 = fadd float %214, %215 %217 = fmul float %89, %44 %218 = fadd float %216, %217 %219 = fmul float %94, %36 %220 = fadd float %219, %64 %221 = fmul float %95, %36 %222 = fadd float %221, %65 %223 = fmul float %96, %36 %224 = fadd float %223, %66 %225 = fmul float %220, %46 %226 = fmul float %222, %47 %227 = fadd float %225, %226 %228 = fmul float %224, %48 %229 = fadd float %227, %228 %230 = fmul float %67, %49 %231 = fadd float %229, %230 %232 = fmul float %220, %50 %233 = fmul float %222, %51 %234 = fadd float %232, %233 %235 = fmul float %224, %52 %236 = fadd float %234, %235 %237 = fmul float %67, %53 %238 = fadd float %236, %237 %239 = fmul float %220, %54 %240 = fmul float %222, %55 %241 = fadd float %239, %240 %242 = fmul float %224, %56 %243 = fadd float %241, %242 %244 = fmul float %67, %57 %245 = fadd float %243, %244 %246 = fsub float %17, %231 %247 = fsub float %18, %238 %248 = fsub float %19, %245 %249 = fmul float %231, %20 %250 = fmul float %238, %21 %251 = fadd float %249, %250 %252 = fmul float %245, %22 %253 = fadd float %251, %252 %254 = fmul float %15, %23 %255 = fadd float %253, %254 %256 = fmul float %231, %24 %257 = fmul float %238, %25 %258 = fadd float %256, %257 %259 = fmul float %245, %26 %260 = fadd float %258, %259 %261 = fmul float %15, %27 %262 = fadd float %260, %261 %263 = fmul float %231, %32 %264 = fmul float %238, %33 %265 = fadd float %263, %264 %266 = fmul float %245, %34 %267 = fadd float %265, %266 %268 = fmul float %15, %35 %269 = fadd float %267, %268 %270 = fmul float %231, %28 %271 = fmul float %238, %29 %272 = fadd float %270, %271 %273 = fmul float %245, %30 %274 = fadd float %272, %273 %275 = fmul float %15, %31 %276 = fadd float %274, %275 %277 = fmul float %276, %16 %278 = fsub float %277, %269 %279 = fmul float %58, %269 %280 = fadd float %279, %255 %281 = fmul float %59, %269 %282 = fsub float %281, %262 %283 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %284 = load <16 x i8>, <16 x i8> addrspace(2)* %283, align 16, !tbaa !0 %285 = call float @llvm.SI.load.const(<16 x i8> %284, i32 0) %286 = fmul float %285, %255 %287 = call float @llvm.SI.load.const(<16 x i8> %284, i32 4) %288 = fmul float %287, %262 %289 = fadd float %286, %288 %290 = call float @llvm.SI.load.const(<16 x i8> %284, i32 8) %291 = fmul float %290, %276 %292 = fadd float %289, %291 %293 = call float @llvm.SI.load.const(<16 x i8> %284, i32 12) %294 = fmul float %293, %269 %295 = fadd float %292, %294 %296 = call float @llvm.SI.load.const(<16 x i8> %284, i32 16) %297 = fmul float %296, %255 %298 = call float @llvm.SI.load.const(<16 x i8> %284, i32 20) %299 = fmul float %298, %262 %300 = fadd float %297, %299 %301 = call float @llvm.SI.load.const(<16 x i8> %284, i32 24) %302 = fmul float %301, %276 %303 = fadd float %300, %302 %304 = call float @llvm.SI.load.const(<16 x i8> %284, i32 28) %305 = fmul float %304, %269 %306 = fadd float %303, %305 %307 = call float @llvm.SI.load.const(<16 x i8> %284, i32 32) %308 = fmul float %307, %255 %309 = call float @llvm.SI.load.const(<16 x i8> %284, i32 36) %310 = fmul float %309, %262 %311 = fadd float %308, %310 %312 = call float @llvm.SI.load.const(<16 x i8> %284, i32 40) %313 = fmul float %312, %276 %314 = fadd float %311, %313 %315 = call float @llvm.SI.load.const(<16 x i8> %284, i32 44) %316 = fmul float %315, %269 %317 = fadd float %314, %316 %318 = call float @llvm.SI.load.const(<16 x i8> %284, i32 48) %319 = fmul float %318, %255 %320 = call float @llvm.SI.load.const(<16 x i8> %284, i32 52) %321 = fmul float %320, %262 %322 = fadd float %319, %321 %323 = call float @llvm.SI.load.const(<16 x i8> %284, i32 56) %324 = fmul float %323, %276 %325 = fadd float %322, %324 %326 = call float @llvm.SI.load.const(<16 x i8> %284, i32 60) %327 = fmul float %326, %269 %328 = fadd float %325, %327 %329 = call float @llvm.SI.load.const(<16 x i8> %284, i32 64) %330 = fmul float %329, %255 %331 = call float @llvm.SI.load.const(<16 x i8> %284, i32 68) %332 = fmul float %331, %262 %333 = fadd float %330, %332 %334 = call float @llvm.SI.load.const(<16 x i8> %284, i32 72) %335 = fmul float %334, %276 %336 = fadd float %333, %335 %337 = call float @llvm.SI.load.const(<16 x i8> %284, i32 76) %338 = fmul float %337, %269 %339 = fadd float %336, %338 %340 = call float @llvm.SI.load.const(<16 x i8> %284, i32 80) %341 = fmul float %340, %255 %342 = call float @llvm.SI.load.const(<16 x i8> %284, i32 84) %343 = fmul float %342, %262 %344 = fadd float %341, %343 %345 = call float @llvm.SI.load.const(<16 x i8> %284, i32 88) %346 = fmul float %345, %276 %347 = fadd float %344, %346 %348 = call float @llvm.SI.load.const(<16 x i8> %284, i32 92) %349 = fmul float %348, %269 %350 = fadd float %347, %349 %351 = call float @llvm.SI.load.const(<16 x i8> %284, i32 96) %352 = fmul float %351, %255 %353 = call float @llvm.SI.load.const(<16 x i8> %284, i32 100) %354 = fmul float %353, %262 %355 = fadd float %352, %354 %356 = call float @llvm.SI.load.const(<16 x i8> %284, i32 104) %357 = fmul float %356, %276 %358 = fadd float %355, %357 %359 = call float @llvm.SI.load.const(<16 x i8> %284, i32 108) %360 = fmul float %359, %269 %361 = fadd float %358, %360 %362 = call float @llvm.SI.load.const(<16 x i8> %284, i32 112) %363 = fmul float %362, %255 %364 = call float @llvm.SI.load.const(<16 x i8> %284, i32 116) %365 = fmul float %364, %262 %366 = fadd float %363, %365 %367 = call float @llvm.SI.load.const(<16 x i8> %284, i32 120) %368 = fmul float %367, %276 %369 = fadd float %366, %368 %370 = call float @llvm.SI.load.const(<16 x i8> %284, i32 124) %371 = fmul float %370, %269 %372 = fadd float %369, %371 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %211, float %218, float %14, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %14, float %14, float %14, float %204) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %246, float %247, float %248, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %175, float %176, float %177, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %194, float %195, float %196, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %231, float %238, float %245, float %276) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %280, float %282, float %278, float %269) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %295, float %306, float %317, float %328) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %339, float %350, float %361, float %372) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x80000000 ; 7E0402FF 80000000 v_mov_b32_e32 v3, 0xc2800000 ; 7E0602FF C2800000 v_mov_b32_e32 v4, 0x3c820821 ; 7E0802FF 3C820821 v_mov_b32_e32 v5, 0xbc820821 ; 7E0A02FF BC820821 v_mov_b32_e32 v6, 0x400ccccd ; 7E0C02FF 400CCCCD v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[24:27], s[8:9], 0x10 ; C08C0910 s_load_dwordx4 s[8:11], s[8:9], 0x14 ; C0840914 s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[60:63], s[2:3], 0x40 ; C09E0340 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v0, s[4:7], 0 idxen ; E00C2000 80010700 buffer_load_format_xyzw v[11:14], v0, s[12:15], 0 idxen ; E00C2000 80030B00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[13:16], v0, s[16:19], 0 idxen ; E00C2000 80040D00 buffer_load_format_xyzw v[17:20], v0, s[20:23], 0 idxen ; E00C2000 80051100 buffer_load_format_xyzw v[21:24], v0, s[24:27], 0 idxen ; E00C2000 80061500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[24:27], v0, s[8:11], 0 idxen ; E00C2000 80021800 s_buffer_load_dword s0, s[28:31], 0x0 ; C2001D00 s_buffer_load_dword s43, s[28:31], 0x1 ; C2159D01 s_buffer_load_dword s2, s[28:31], 0x2 ; C2011D02 s_buffer_load_dword s44, s[28:31], 0x8 ; C2161D08 s_buffer_load_dword s45, s[28:31], 0x9 ; C2169D09 s_buffer_load_dword s47, s[28:31], 0x2e ; C2179D2E s_buffer_load_dword s1, s[28:31], 0x2f ; C2009D2F s_buffer_load_dword s38, s[28:31], 0x34 ; C2131D34 s_buffer_load_dword s58, s[28:31], 0xc0 ; C21D1DC0 s_buffer_load_dword s59, s[28:31], 0xc1 ; C21D9DC1 s_buffer_load_dword s64, s[28:31], 0xc7 ; C2201DC7 s_buffer_load_dword s6, s[28:31], 0xc8 ; C2031DC8 s_buffer_load_dword s9, s[28:31], 0xd8 ; C2049DD8 s_buffer_load_dword s7, s[28:31], 0xd9 ; C2039DD9 s_buffer_load_dword s3, s[28:31], 0xda ; C2019DDA s_buffer_load_dword s65, s[28:31], 0xdb ; C2209DDB s_buffer_load_dword s11, s[28:31], 0xdc ; C2059DDC s_buffer_load_dword s8, s[28:31], 0xdd ; C2041DDD s_buffer_load_dword s4, s[28:31], 0xde ; C2021DDE s_buffer_load_dword s66, s[28:31], 0xdf ; C2211DDF s_buffer_load_dword s12, s[28:31], 0xe0 ; C2061DE0 s_buffer_load_dword s10, s[28:31], 0xe1 ; C2051DE1 s_buffer_load_dword s5, s[28:31], 0xe2 ; C2029DE2 s_buffer_load_dword s67, s[28:31], 0xe3 ; C2219DE3 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_gt_f32_e64 s[14:15], |s6|, -|s6| ; D008030E 40000C06 s_buffer_load_dword s6, s[28:31], 0xe4 ; C2031DE4 s_buffer_load_dword s68, s[28:31], 0xc2 ; C2221DC2 s_buffer_load_dword s69, s[28:31], 0xc3 ; C2229DC3 s_buffer_load_dword s70, s[28:31], 0xc4 ; C2231DC4 v_cndmask_b32_e64 v0, 0, 1.0, s[14:15] ; D2000000 0039E480 s_buffer_load_dword s71, s[28:31], 0xc5 ; C2239DC5 s_buffer_load_dword s72, s[28:31], 0xc6 ; C2241DC6 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v27, s1 ; 7E360201 s_buffer_load_dword s57, s[28:31], 0x24 ; C21C9D24 v_mov_b32_e32 v28, s0 ; 7E380200 s_buffer_load_dword s73, s[28:31], 0x25 ; C2249D25 s_buffer_load_dword s50, s[28:31], 0x26 ; C2191D26 s_buffer_load_dword s1, s[28:31], 0x27 ; C2009D27 s_buffer_load_dword s51, s[28:31], 0x28 ; C2199D28 s_buffer_load_dword s48, s[28:31], 0xa ; C2181D0A s_buffer_load_dword s74, s[28:31], 0x20 ; C2251D20 s_buffer_load_dword s75, s[28:31], 0x21 ; C2259D21 s_buffer_load_dword s52, s[28:31], 0x22 ; C21A1D22 s_buffer_load_dword s13, s[28:31], 0x23 ; C2069D23 s_buffer_load_dword s55, s[28:31], 0x29 ; C21B9D29 s_buffer_load_dword s53, s[28:31], 0x2a ; C21A9D2A s_buffer_load_dword s14, s[28:31], 0x2b ; C2071D2B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v29, s1 ; 7E3A0201 s_buffer_load_dword s76, s[28:31], 0x2c ; C2261D2C s_buffer_load_dword s77, s[28:31], 0x2d ; C2269D2D s_buffer_load_dword s21, s[28:31], 0xe5 ; C20A9DE5 s_buffer_load_dword s26, s[60:63], 0x0 ; C20D3D00 s_buffer_load_dword s56, s[60:63], 0x1 ; C21C3D01 s_buffer_load_dword s22, s[60:63], 0x2 ; C20B3D02 v_mov_b32_e32 v30, s13 ; 7E3C020D s_buffer_load_dword s18, s[60:63], 0x3 ; C2093D03 s_buffer_load_dword s28, s[60:63], 0x4 ; C20E3D04 s_buffer_load_dword s54, s[60:63], 0x5 ; C21B3D05 v_mov_b32_e32 v31, s14 ; 7E3E020E s_buffer_load_dword s23, s[60:63], 0x6 ; C20BBD06 s_buffer_load_dword s13, s[60:63], 0x7 ; C206BD07 s_buffer_load_dword s31, s[60:63], 0x8 ; C20FBD08 s_buffer_load_dword s49, s[60:63], 0x9 ; C218BD09 s_buffer_load_dword s24, s[60:63], 0xa ; C20C3D0A s_buffer_load_dword s14, s[60:63], 0xb ; C2073D0B s_buffer_load_dword s33, s[60:63], 0xc ; C210BD0C s_buffer_load_dword s46, s[60:63], 0xd ; C2173D0D s_buffer_load_dword s25, s[60:63], 0xe ; C20CBD0E s_buffer_load_dword s16, s[60:63], 0xf ; C2083D0F s_buffer_load_dword s34, s[60:63], 0x10 ; C2113D10 s_buffer_load_dword s42, s[60:63], 0x11 ; C2153D11 s_buffer_load_dword s27, s[60:63], 0x12 ; C20DBD12 s_buffer_load_dword s15, s[60:63], 0x13 ; C207BD13 s_buffer_load_dword s35, s[60:63], 0x14 ; C211BD14 s_buffer_load_dword s41, s[60:63], 0x15 ; C214BD15 s_buffer_load_dword s29, s[60:63], 0x16 ; C20EBD16 s_buffer_load_dword s17, s[60:63], 0x17 ; C208BD17 s_buffer_load_dword s36, s[60:63], 0x18 ; C2123D18 s_buffer_load_dword s40, s[60:63], 0x19 ; C2143D19 s_buffer_load_dword s30, s[60:63], 0x1a ; C20F3D1A s_buffer_load_dword s19, s[60:63], 0x1b ; C209BD1B s_buffer_load_dword s37, s[60:63], 0x1c ; C212BD1C s_buffer_load_dword s39, s[60:63], 0x1d ; C213BD1D s_buffer_load_dword s32, s[60:63], 0x1e ; C2103D1E s_buffer_load_dword s20, s[60:63], 0x1f ; C20A3D1F v_add_f32_e32 v12, v1, v12 ; 06181901 v_cmp_gt_f32_e32 vcc, s0, v12 ; 7C081800 v_cndmask_b32_e64 v2, v2, -1.0, vcc ; D2000002 01A9E702 v_add_f32_e32 v1, v1, v11 ; 06021701 v_cmp_gt_f32_e32 vcc, s0, v1 ; 7C080200 v_cndmask_b32_e64 v11, 0, 1.0, vcc ; D200000B 01A9E480 v_add_f32_e64 v2, v2, |v12| ; D2060202 00021902 v_sub_f32_e64 v1, |v1|, v11 ; D2080101 00021701 v_add_f32_e32 v1, v3, v1 ; 06020303 v_add_f32_e32 v2, v3, v2 ; 06040503 v_cmp_gt_f32_e32 vcc, s0, v1 ; 7C080200 v_cmp_gt_f32_e64 s[0:1], s0, v2 ; D0080000 00020400 v_mul_f32_e32 v3, s59, v18 ; 1006243B v_mul_f32_e32 v12, s71, v18 ; 10182447 v_mac_f32_e32 v3, s58, v17 ; 3E06223A v_mac_f32_e32 v12, s70, v17 ; 3E182246 v_mac_f32_e32 v3, s68, v19 ; 3E062644 v_mac_f32_e32 v12, s72, v19 ; 3E182648 v_mac_f32_e32 v3, s69, v20 ; 3E062845 v_mac_f32_e32 v12, s64, v20 ; 3E182840 v_mad_f32 v7, s38, v21, v7 ; D2820007 041E2A26 v_mad_f32 v8, s38, v22, v8 ; D2820008 04222C26 v_mad_f32 v9, s38, v23, v9 ; D2820009 04262E26 v_mul_f32_e32 v17, s7, v8 ; 10221007 v_mac_f32_e32 v17, s9, v7 ; 3E220E09 v_mac_f32_e32 v17, s3, v9 ; 3E221203 v_mac_f32_e32 v17, s65, v10 ; 3E221441 v_mul_f32_e32 v18, s8, v8 ; 10241008 v_mac_f32_e32 v18, s11, v7 ; 3E240E0B v_mac_f32_e32 v18, s4, v9 ; 3E241204 v_mac_f32_e32 v18, s66, v10 ; 3E241442 v_mul_f32_e32 v8, s10, v8 ; 1010100A v_mac_f32_e32 v8, s12, v7 ; 3E100E0C v_mac_f32_e32 v8, s5, v9 ; 3E101205 v_mac_f32_e32 v8, s67, v10 ; 3E101443 v_mul_f32_e32 v7, s75, v18 ; 100E244B v_mac_f32_e32 v7, s74, v17 ; 3E0E224A v_mul_f32_e32 v9, s73, v18 ; 10122449 v_mac_f32_e32 v9, s57, v17 ; 3E122239 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s77, v18 ; 1014244D v_mac_f32_e32 v10, s76, v17 ; 3E14224C v_mul_f32_e32 v19, s55, v18 ; 10262437 v_mac_f32_e32 v19, s51, v17 ; 3E262233 v_mac_f32_e32 v7, s52, v8 ; 3E0E1034 v_mac_f32_e32 v9, s50, v8 ; 3E121032 v_mac_f32_e32 v10, s47, v8 ; 3E14102F v_mac_f32_e32 v19, s53, v8 ; 3E261035 v_mac_f32_e32 v7, s43, v30 ; 3E0E3C2B v_mac_f32_e32 v9, s43, v29 ; 3E123A2B v_mac_f32_e32 v10, s43, v27 ; 3E14362B v_mac_f32_e32 v19, s43, v31 ; 3E263E2B v_sub_f32_e32 v16, s43, v16 ; 0820202B v_sub_f32_e32 v20, s44, v17 ; 0828222C v_sub_f32_e32 v21, s45, v18 ; 082A242D v_sub_f32_e32 v22, s48, v8 ; 082C1030 v_mul_f32_e32 v23, s56, v9 ; 102E1238 v_mul_f32_e32 v27, s54, v9 ; 10361236 v_cndmask_b32_e64 v29, 0, 1.0, vcc ; D200001D 01A9E480 v_cndmask_b32_e64 v30, 0, 1.0, s[0:1] ; D200001E 0001E480 v_sub_f32_e64 v1, |v1|, v29 ; D2080101 00023B01 v_sub_f32_e64 v2, |v2|, v30 ; D2080102 00023D02 v_mad_f32 v31, v1, v5, 1.0 ; D282001F 03CA0B01 v_mac_f32_e32 v31, v5, v2 ; 3E3E0505 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v4, v1, v1 ; 10080301 v_mac_f32_e32 v4, v2, v2 ; 3E080502 v_mac_f32_e32 v4, v31, v31 ; 3E083F1F v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_mul_f32_e32 v5, s49, v9 ; 100A1231 v_mul_f32_e32 v32, s46, v9 ; 1040122E v_mul_f32_e32 v33, s42, v9 ; 1042122A v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v4, v4, v31 ; 10083F04 v_mul_f32_e32 v29, -2.0, v29 ; 103A3AF5 v_mac_f32_e32 v1, v1, v29 ; 3E023B01 v_mul_f32_e32 v29, -2.0, v30 ; 103A3CF5 v_mac_f32_e32 v2, v2, v29 ; 3E043B02 v_mul_f32_e32 v11, -2.0, v11 ; 101616F5 v_mac_f32_e32 v4, v4, v11 ; 3E081704 v_mac_f32_e32 v1, s38, v24 ; 3E023026 v_mac_f32_e32 v2, s38, v25 ; 3E043226 v_mac_f32_e32 v4, s38, v26 ; 3E083426 v_mul_f32_e32 v11, s41, v9 ; 10161229 v_mul_f32_e32 v24, s40, v9 ; 10301228 v_add_f32_e32 v13, v13, v13 ; 061A1B0D v_add_f32_e32 v14, v14, v14 ; 061C1D0E v_add_f32_e32 v15, v15, v15 ; 061E1F0F v_log_f32_e32 v13, v13 ; 7E1A4F0D v_log_f32_e32 v14, v14 ; 7E1C4F0E v_log_f32_e32 v15, v15 ; 7E1E4F0F exp 15, 32, 0, 0, 0, v3, v12, v28, v28 ; F800020F 1C1C0C03 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v6, v13 ; 10061B06 v_mul_f32_e32 v12, v6, v14 ; 10181D06 v_mul_f32_e32 v6, v6, v15 ; 100C1F06 v_exp_f32_e32 v3, v3 ; 7E064B03 v_exp_f32_e32 v12, v12 ; 7E184B0C v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mul_f32_e32 v13, 0x3e59999a, v3 ; 101A06FF 3E59999A v_madmk_f32_e32 v13, v12, v13, 0x3f372474 ; 401A1B0C 3F372474 v_madmk_f32_e32 v13, v6, v13, 0x3d93a92a ; 401A1B06 3D93A92A v_mul_f32_e32 v13, v16, v13 ; 101A1B10 v_mul_f32_e32 v13, v0, v13 ; 101A1B00 exp 15, 33, 0, 0, 0, v28, v28, v28, v13 ; F800021F 0D1C1C1C exp 15, 34, 0, 0, 0, v20, v21, v22, v28 ; F800022F 1C161514 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v13, s9, v1 ; 101A0209 v_mul_f32_e32 v14, s11, v1 ; 101C020B v_mul_f32_e32 v1, s12, v1 ; 1002020C v_mac_f32_e32 v13, s7, v2 ; 3E1A0407 v_mac_f32_e32 v14, s8, v2 ; 3E1C0408 v_mac_f32_e32 v1, s10, v2 ; 3E02040A v_mul_f32_e32 v2, s39, v9 ; 10041227 v_mac_f32_e32 v23, s26, v7 ; 3E2E0E1A v_mac_f32_e32 v27, s28, v7 ; 3E360E1C v_mac_f32_e32 v5, s31, v7 ; 3E0A0E1F v_mac_f32_e32 v32, s33, v7 ; 3E400E21 v_mac_f32_e32 v33, s34, v7 ; 3E420E22 v_mac_f32_e32 v11, s35, v7 ; 3E160E23 v_mac_f32_e32 v24, s36, v7 ; 3E300E24 v_mac_f32_e32 v2, s37, v7 ; 3E040E25 v_mad_f32 v7, s6, v10, v7 ; D2820007 041E1406 v_mad_f32 v9, s21, v10, -v9 ; D2820009 84261415 v_mac_f32_e32 v23, s22, v19 ; 3E2E2616 v_mac_f32_e32 v27, s23, v19 ; 3E362617 v_mac_f32_e32 v5, s24, v19 ; 3E0A2618 v_mac_f32_e32 v32, s25, v19 ; 3E402619 v_mac_f32_e32 v33, s27, v19 ; 3E42261B v_mac_f32_e32 v11, s29, v19 ; 3E16261D v_mac_f32_e32 v24, s30, v19 ; 3E30261E v_mac_f32_e32 v2, s32, v19 ; 3E042620 v_mac_f32_e32 v23, s18, v10 ; 3E2E1412 v_mac_f32_e32 v13, s3, v4 ; 3E1A0803 v_mac_f32_e32 v14, s4, v4 ; 3E1C0804 v_mac_f32_e32 v1, s5, v4 ; 3E020805 v_mul_f32_e32 v4, v13, v13 ; 10081B0D v_mac_f32_e32 v4, v14, v14 ; 3E081D0E v_mac_f32_e32 v4, v1, v1 ; 3E080301 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_mac_f32_e32 v27, s13, v10 ; 3E36140D v_mac_f32_e32 v5, s14, v10 ; 3E0A140E v_mac_f32_e32 v32, s16, v10 ; 3E401410 v_mul_f32_e32 v13, v13, v4 ; 101A090D v_mul_f32_e32 v14, v14, v4 ; 101C090E v_mul_f32_e32 v1, v1, v4 ; 10020901 exp 15, 35, 0, 0, 0, v13, v14, v1, v28 ; F800023F 1C010E0D v_mac_f32_e32 v33, s15, v10 ; 3E42140F v_mac_f32_e32 v11, s17, v10 ; 3E161411 v_mac_f32_e32 v24, s19, v10 ; 3E301413 v_mac_f32_e32 v2, s20, v10 ; 3E041414 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, v0, v3 ; 10020700 v_mul_f32_e32 v3, v0, v12 ; 10061900 v_mad_f32 v4, v19, s2, -v10 ; D2820004 84280513 v_mul_f32_e32 v0, v0, v6 ; 10000D00 exp 15, 36, 0, 0, 0, v1, v3, v0, v28 ; F800024F 1C000301 exp 15, 37, 0, 0, 0, v17, v18, v8, v19 ; F800025F 13081211 exp 15, 12, 0, 0, 0, v7, v9, v4, v10 ; F80000CF 0A040907 exp 15, 13, 0, 0, 0, v23, v27, v5, v32 ; F80000DF 20051B17 exp 15, 14, 0, 1, 0, v33, v11, v24, v2 ; F80008EF 02180B21 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 36 Code Size: 1272 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], SHADOW2D, FLOAT DCL CONST[0..90] DCL TEMP[0..13], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, -0.5000} IMM[1] FLT32 { -0.0000, -1.0000, -2.0000, 0.0625} IMM[2] FLT32 { 0.0005, 0.0000, -0.0005, 0.1250} IMM[3] FLT32 { 0.2500, 0.0000, -1.0000, -2.0000} IMM[4] FLT32 { 0.2125, 0.7154, 0.0721, 0.5000} IMM[5] FLT32 { 0.2990, 0.5870, 0.1140, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[0].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[2], 2D 4: MOV TEMP[1], TEMP[1] 5: UIF CONST[90].xxxx :0 6: MAD TEMP[2], IN[5].xyzx, IMM[0].xxxy, IMM[0].yyyx 7: DP4 TEMP[3].x, TEMP[2], CONST[69] 8: DP4 TEMP[4].x, TEMP[2], CONST[70] 9: MOV TEMP[3].y, TEMP[4].xxxx 10: MOV_SAT TEMP[5].xy, TEMP[3].xyyy 11: ADD TEMP[5].xy, -TEMP[3].xyyy, TEMP[5].xyyy 12: MOV TEMP[1].yz, TEMP[5].yxyy 13: DP2 TEMP[5].x, TEMP[5].xyyy, IMM[0].xxxx 14: MOV TEMP[1].y, TEMP[5].xxxx 15: DP4 TEMP[6].x, TEMP[2], CONST[73] 16: DP4 TEMP[7].x, TEMP[2], CONST[74] 17: MOV TEMP[6].y, TEMP[7].xxxx 18: MOV_SAT TEMP[8].xy, TEMP[6].xyyy 19: ADD TEMP[8].xy, -TEMP[6].xyyy, TEMP[8].xyyy 20: MOV TEMP[1].zw, TEMP[8].yyxy 21: DP2 TEMP[8].x, TEMP[8].xyyy, IMM[0].xxxx 22: MOV TEMP[1].z, TEMP[8].xxxx 23: DP4 TEMP[9].x, TEMP[2], CONST[77] 24: DP4 TEMP[10].x, TEMP[2], CONST[78] 25: MOV TEMP[6].z, IMM[0].xxxx 26: MOV TEMP[9].z, IMM[0].zzzz 27: MOV TEMP[11].w, TEMP[6] 28: ABS TEMP[12].x, TEMP[8].xxxx 29: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 30: UIF TEMP[12].xxxx :0 31: MOV TEMP[12].x, TEMP[6].xxxx 32: ELSE :0 33: MOV TEMP[12].x, TEMP[9].xxxx 34: ENDIF 35: MOV TEMP[11].x, TEMP[12].xxxx 36: ABS TEMP[12].x, TEMP[8].xxxx 37: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 38: UIF TEMP[12].xxxx :0 39: MOV TEMP[7].x, TEMP[7].xxxx 40: ELSE :0 41: MOV TEMP[7].x, TEMP[10].xxxx 42: ENDIF 43: MOV TEMP[11].y, TEMP[7].xxxx 44: ABS TEMP[7].x, TEMP[8].xxxx 45: FSGE TEMP[7].x, -TEMP[7].xxxx, IMM[0].yyyy 46: UIF TEMP[7].xxxx :0 47: MOV TEMP[7].x, IMM[0].xxxx 48: ELSE :0 49: MOV TEMP[7].x, IMM[0].zzzz 50: ENDIF 51: MOV TEMP[11].z, TEMP[7].xxxx 52: MOV TEMP[6].xyz, TEMP[11] 53: MOV TEMP[3].w, IMM[0].yyyy 54: MOV TEMP[11].x, TEMP[1] 55: ABS TEMP[7].x, TEMP[5].xxxx 56: FSGE TEMP[7].x, -TEMP[7].xxxx, IMM[0].yyyy 57: UIF TEMP[7].xxxx :0 58: MOV TEMP[7].x, TEMP[3].xxxx 59: ELSE :0 60: MOV TEMP[7].x, TEMP[6].xxxx 61: ENDIF 62: MOV TEMP[11].y, TEMP[7].xxxx 63: ABS TEMP[7].x, TEMP[5].xxxx 64: FSGE TEMP[7].x, -TEMP[7].xxxx, IMM[0].yyyy 65: UIF TEMP[7].xxxx :0 66: MOV TEMP[4].x, TEMP[4].xxxx 67: ELSE :0 68: MOV TEMP[4].x, TEMP[6].yyyy 69: ENDIF 70: MOV TEMP[11].z, TEMP[4].xxxx 71: ABS TEMP[4].x, TEMP[5].xxxx 72: FSGE TEMP[4].x, -TEMP[4].xxxx, IMM[0].yyyy 73: UIF TEMP[4].xxxx :0 74: MOV TEMP[4].x, IMM[0].yyyy 75: ELSE :0 76: MOV TEMP[4].x, TEMP[6].zzzz 77: ENDIF 78: MOV TEMP[11].w, TEMP[4].xxxx 79: MOV TEMP[1].xw, TEMP[11].xxxw 80: DP4 TEMP[5].x, TEMP[2], CONST[71] 81: MOV TEMP[3].z, TEMP[5].xxxx 82: ADD TEMP[6].xy, TEMP[11].yzzz, IMM[0].wwww 83: ABS TEMP[7].xy, TEMP[6].xyyy 84: ADD TEMP[6].xy, TEMP[7].xyyy, -CONST[67].zzzz 85: MUL TEMP[6].xy, TEMP[6].xyyy, CONST[67].wwww 86: MOV_SAT TEMP[7].xy, TEMP[6].xyyy 87: ADD TEMP[6].xy, -TEMP[7].xyyy, IMM[0].xxxx 88: MUL TEMP[6].x, TEMP[6].yyyy, TEMP[6].xxxx 89: MOV_SAT TEMP[7].xy, TEMP[11].yzzz 90: ADD TEMP[4].xyz, TEMP[4].xxxx, IMM[1].xyzz 91: MOV TEMP[9].y, IMM[0].yyyy 92: ABS TEMP[8].x, TEMP[4].xxxx 93: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 94: UIF TEMP[8].xxxx :0 95: MOV TEMP[8].x, CONST[85].zzzz 96: ELSE :0 97: MOV TEMP[8].x, IMM[0].yyyy 98: ENDIF 99: ABS TEMP[10].x, TEMP[4].xxxx 100: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 101: UIF TEMP[10].xxxx :0 102: MOV TEMP[10].x, CONST[85].wwww 103: ELSE :0 104: MOV TEMP[10].x, IMM[0].yyyy 105: ENDIF 106: MOV TEMP[11].y, TEMP[10].xxxx 107: ABS TEMP[10].x, TEMP[4].xxxx 108: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 109: UIF TEMP[10].xxxx :0 110: MOV TEMP[10].x, CONST[85].xxxx 111: ELSE :0 112: MOV TEMP[10].x, IMM[0].yyyy 113: ENDIF 114: MOV TEMP[11].z, TEMP[10].xxxx 115: ABS TEMP[10].x, TEMP[4].xxxx 116: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 117: UIF TEMP[10].xxxx :0 118: MOV TEMP[10].x, CONST[85].yyyy 119: ELSE :0 120: MOV TEMP[10].x, IMM[0].yyyy 121: ENDIF 122: MOV TEMP[11].w, TEMP[10].xxxx 123: ABS TEMP[10].x, TEMP[4].yyyy 124: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 125: UIF TEMP[10].xxxx :0 126: MOV TEMP[10].x, CONST[86].zzzz 127: ELSE :0 128: MOV TEMP[10].x, TEMP[8].xxxx 129: ENDIF 130: ABS TEMP[8].x, TEMP[4].yyyy 131: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 132: UIF TEMP[8].xxxx :0 133: MOV TEMP[8].x, CONST[86].wwww 134: ELSE :0 135: MOV TEMP[8].x, TEMP[11].yyyy 136: ENDIF 137: MOV TEMP[11].y, TEMP[8].xxxx 138: ABS TEMP[8].x, TEMP[4].yyyy 139: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 140: UIF TEMP[8].xxxx :0 141: MOV TEMP[8].x, CONST[86].xxxx 142: ELSE :0 143: MOV TEMP[8].x, TEMP[11].zzzz 144: ENDIF 145: MOV TEMP[11].z, TEMP[8].xxxx 146: ABS TEMP[8].x, TEMP[4].yyyy 147: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 148: UIF TEMP[8].xxxx :0 149: MOV TEMP[8].x, CONST[86].yyyy 150: ELSE :0 151: MOV TEMP[8].x, TEMP[11].wwww 152: ENDIF 153: MOV TEMP[11].w, TEMP[8].xxxx 154: ABS TEMP[8].x, TEMP[4].zzzz 155: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 156: UIF TEMP[8].xxxx :0 157: MOV TEMP[8].x, CONST[87].zzzz 158: ELSE :0 159: MOV TEMP[8].x, TEMP[10].xxxx 160: ENDIF 161: MOV TEMP[11].x, TEMP[8].xxxx 162: ABS TEMP[8].x, TEMP[4].zzzz 163: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 164: UIF TEMP[8].xxxx :0 165: MOV TEMP[8].x, CONST[87].wwww 166: ELSE :0 167: MOV TEMP[8].x, TEMP[11].yyyy 168: ENDIF 169: MOV TEMP[11].y, TEMP[8].xxxx 170: ABS TEMP[8].x, TEMP[4].zzzz 171: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 172: UIF TEMP[8].xxxx :0 173: MOV TEMP[8].x, CONST[87].xxxx 174: ELSE :0 175: MOV TEMP[8].x, TEMP[11].zzzz 176: ENDIF 177: MOV TEMP[11].z, TEMP[8].xxxx 178: ABS TEMP[8].x, TEMP[4].zzzz 179: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 180: UIF TEMP[8].xxxx :0 181: MOV TEMP[8].x, CONST[87].yyyy 182: ELSE :0 183: MOV TEMP[8].x, TEMP[11].wwww 184: ENDIF 185: MOV TEMP[11].w, TEMP[8].xxxx 186: MAD TEMP[3].xy, TEMP[7].xyyy, TEMP[11].xyyy, TEMP[11].zwww 187: ADD TEMP[4], TEMP[3], IMM[2].xxyy 188: TXL TEMP[7].x, TEMP[4], SAMP[3], SHADOW2D 189: MOV TEMP[4].x, TEMP[7].xxxx 190: ADD TEMP[7], TEMP[3], IMM[2].zxyy 191: ADD TEMP[8], TEMP[3], IMM[2].xzyy 192: ADD TEMP[10], TEMP[3], IMM[2].zzyy 193: TXL TEMP[12].x, TEMP[7], SAMP[3], SHADOW2D 194: MOV TEMP[4].y, TEMP[12].xxxx 195: TXL TEMP[12].x, TEMP[8], SAMP[3], SHADOW2D 196: MOV TEMP[4].z, TEMP[12].xxxx 197: TXL TEMP[12].x, TEMP[10], SAMP[3], SHADOW2D 198: MOV TEMP[4].w, TEMP[12].xxxx 199: DP4 TEMP[12].x, TEMP[4], IMM[1].wwww 200: ADD TEMP[4], TEMP[3], IMM[2].xyyy 201: TXL TEMP[13].x, TEMP[4], SAMP[3], SHADOW2D 202: MOV TEMP[4].x, TEMP[13].xxxx 203: ADD TEMP[7], TEMP[3], IMM[2].zyyy 204: TXL TEMP[13], TEMP[7], SAMP[3], SHADOW2D 205: MOV TEMP[7], TEMP[13] 206: ADD TEMP[8], TEMP[3], IMM[2].yzyy 207: TXL TEMP[8].x, TEMP[8], SAMP[3], SHADOW2D 208: ADD TEMP[10], TEMP[3], IMM[2].yxyy 209: TXL TEMP[10].x, TEMP[10], SAMP[3], SHADOW2D 210: MOV TEMP[4].y, TEMP[13].xxxx 211: MOV TEMP[4].z, TEMP[8].xxxx 212: MOV TEMP[4].w, TEMP[10].xxxx 213: DP4 TEMP[8].x, TEMP[4], IMM[2].wwww 214: MOV TEMP[1].z, TEMP[8].xxxx 215: MOV TEMP[10].xy, TEMP[3].xyyy 216: MOV TEMP[10].z, TEMP[5].xxxx 217: MOV TEMP[10].w, IMM[0].yyyy 218: TXL TEMP[10], TEMP[10], SAMP[3], SHADOW2D 219: MOV TEMP[4], TEMP[10] 220: ADD TEMP[8].x, TEMP[8].xxxx, TEMP[12].xxxx 221: MAD TEMP[8].x, TEMP[10].xxxx, IMM[3].xxxx, TEMP[8].xxxx 222: MOV TEMP[1].y, TEMP[8].xxxx 223: FSLT TEMP[8].x, TEMP[6].xxxx, IMM[0].xxxx 224: UIF TEMP[8].xxxx :0 225: ADD TEMP[8].xyz, TEMP[1].wwww, IMM[3].yzww 226: ABS TEMP[10].x, TEMP[8].xxxx 227: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 228: UIF TEMP[10].xxxx :0 229: MOV TEMP[10].x, CONST[73].xxxx 230: ELSE :0 231: MOV TEMP[10].x, IMM[0].yyyy 232: ENDIF 233: MOV TEMP[11].x, TEMP[10].xxxx 234: ABS TEMP[10].x, TEMP[8].xxxx 235: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 236: UIF TEMP[10].xxxx :0 237: MOV TEMP[10].x, CONST[73].yyyy 238: ELSE :0 239: MOV TEMP[10].x, IMM[0].yyyy 240: ENDIF 241: MOV TEMP[11].y, TEMP[10].xxxx 242: ABS TEMP[10].x, TEMP[8].xxxx 243: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 244: UIF TEMP[10].xxxx :0 245: MOV TEMP[10].x, CONST[73].zzzz 246: ELSE :0 247: MOV TEMP[10].x, IMM[0].yyyy 248: ENDIF 249: MOV TEMP[11].z, TEMP[10].xxxx 250: ABS TEMP[10].x, TEMP[8].xxxx 251: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 252: UIF TEMP[10].xxxx :0 253: MOV TEMP[10].x, CONST[73].wwww 254: ELSE :0 255: MOV TEMP[10].x, IMM[0].yyyy 256: ENDIF 257: MOV TEMP[11].w, TEMP[10].xxxx 258: MOV TEMP[4], TEMP[11] 259: ABS TEMP[10].x, TEMP[8].xxxx 260: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 261: UIF TEMP[10].xxxx :0 262: MOV TEMP[10].x, CONST[74].xxxx 263: ELSE :0 264: MOV TEMP[10].x, IMM[0].yyyy 265: ENDIF 266: MOV TEMP[11].x, TEMP[10].xxxx 267: ABS TEMP[10].x, TEMP[8].xxxx 268: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 269: UIF TEMP[10].xxxx :0 270: MOV TEMP[10].x, CONST[74].yyyy 271: ELSE :0 272: MOV TEMP[10].x, IMM[0].yyyy 273: ENDIF 274: MOV TEMP[11].y, TEMP[10].xxxx 275: ABS TEMP[10].x, TEMP[8].xxxx 276: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 277: UIF TEMP[10].xxxx :0 278: MOV TEMP[10].x, CONST[74].zzzz 279: ELSE :0 280: MOV TEMP[10].x, IMM[0].yyyy 281: ENDIF 282: MOV TEMP[11].z, TEMP[10].xxxx 283: ABS TEMP[10].x, TEMP[8].xxxx 284: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 285: UIF TEMP[10].xxxx :0 286: MOV TEMP[10].x, CONST[74].wwww 287: ELSE :0 288: MOV TEMP[10].x, IMM[0].yyyy 289: ENDIF 290: MOV TEMP[11].w, TEMP[10].xxxx 291: MOV TEMP[7], TEMP[11] 292: ABS TEMP[10].x, TEMP[8].yyyy 293: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 294: UIF TEMP[10].xxxx :0 295: MOV TEMP[10].x, CONST[77].xxxx 296: ELSE :0 297: MOV TEMP[10].x, TEMP[4].xxxx 298: ENDIF 299: MOV TEMP[11].x, TEMP[10].xxxx 300: ABS TEMP[10].x, TEMP[8].yyyy 301: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 302: UIF TEMP[10].xxxx :0 303: MOV TEMP[10].x, CONST[77].yyyy 304: ELSE :0 305: MOV TEMP[10].x, TEMP[4].yyyy 306: ENDIF 307: MOV TEMP[11].y, TEMP[10].xxxx 308: ABS TEMP[10].x, TEMP[8].yyyy 309: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 310: UIF TEMP[10].xxxx :0 311: MOV TEMP[10].x, CONST[77].zzzz 312: ELSE :0 313: MOV TEMP[10].x, TEMP[4].zzzz 314: ENDIF 315: MOV TEMP[11].z, TEMP[10].xxxx 316: ABS TEMP[10].x, TEMP[8].yyyy 317: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 318: UIF TEMP[10].xxxx :0 319: MOV TEMP[10].x, CONST[77].wwww 320: ELSE :0 321: MOV TEMP[10].x, TEMP[4].wwww 322: ENDIF 323: MOV TEMP[11].w, TEMP[10].xxxx 324: MOV TEMP[4], TEMP[11] 325: ABS TEMP[10].x, TEMP[8].yyyy 326: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 327: UIF TEMP[10].xxxx :0 328: MOV TEMP[10].x, CONST[78].xxxx 329: ELSE :0 330: MOV TEMP[10].x, TEMP[7].xxxx 331: ENDIF 332: MOV TEMP[11].x, TEMP[10].xxxx 333: ABS TEMP[10].x, TEMP[8].yyyy 334: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 335: UIF TEMP[10].xxxx :0 336: MOV TEMP[10].x, CONST[78].yyyy 337: ELSE :0 338: MOV TEMP[10].x, TEMP[7].yyyy 339: ENDIF 340: MOV TEMP[11].y, TEMP[10].xxxx 341: ABS TEMP[10].x, TEMP[8].yyyy 342: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 343: UIF TEMP[10].xxxx :0 344: MOV TEMP[10].x, CONST[78].zzzz 345: ELSE :0 346: MOV TEMP[10].x, TEMP[7].zzzz 347: ENDIF 348: MOV TEMP[11].z, TEMP[10].xxxx 349: ABS TEMP[10].x, TEMP[8].yyyy 350: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 351: UIF TEMP[10].xxxx :0 352: MOV TEMP[10].x, CONST[78].wwww 353: ELSE :0 354: MOV TEMP[10].x, TEMP[7].wwww 355: ENDIF 356: MOV TEMP[11].w, TEMP[10].xxxx 357: MOV TEMP[7], TEMP[11] 358: ABS TEMP[10].x, TEMP[8].zzzz 359: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 360: UIF TEMP[10].xxxx :0 361: MOV TEMP[10].x, CONST[81].xxxx 362: ELSE :0 363: MOV TEMP[10].x, TEMP[4].xxxx 364: ENDIF 365: MOV TEMP[11].x, TEMP[10].xxxx 366: ABS TEMP[10].x, TEMP[8].zzzz 367: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 368: UIF TEMP[10].xxxx :0 369: MOV TEMP[10].x, CONST[81].yyyy 370: ELSE :0 371: MOV TEMP[10].x, TEMP[4].yyyy 372: ENDIF 373: MOV TEMP[11].y, TEMP[10].xxxx 374: ABS TEMP[10].x, TEMP[8].zzzz 375: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 376: UIF TEMP[10].xxxx :0 377: MOV TEMP[10].x, CONST[81].zzzz 378: ELSE :0 379: MOV TEMP[10].x, TEMP[4].zzzz 380: ENDIF 381: MOV TEMP[11].z, TEMP[10].xxxx 382: ABS TEMP[10].x, TEMP[8].zzzz 383: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 384: UIF TEMP[10].xxxx :0 385: MOV TEMP[10].x, CONST[81].wwww 386: ELSE :0 387: MOV TEMP[10].x, TEMP[4].wwww 388: ENDIF 389: MOV TEMP[11].w, TEMP[10].xxxx 390: MOV TEMP[4], TEMP[11] 391: ABS TEMP[10].x, TEMP[8].zzzz 392: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 393: UIF TEMP[10].xxxx :0 394: MOV TEMP[10].x, CONST[82].xxxx 395: ELSE :0 396: MOV TEMP[10].x, TEMP[7].xxxx 397: ENDIF 398: MOV TEMP[11].x, TEMP[10].xxxx 399: ABS TEMP[10].x, TEMP[8].zzzz 400: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 401: UIF TEMP[10].xxxx :0 402: MOV TEMP[10].x, CONST[82].yyyy 403: ELSE :0 404: MOV TEMP[10].x, TEMP[7].yyyy 405: ENDIF 406: MOV TEMP[11].y, TEMP[10].xxxx 407: ABS TEMP[10].x, TEMP[8].zzzz 408: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 409: UIF TEMP[10].xxxx :0 410: MOV TEMP[10].x, CONST[82].zzzz 411: ELSE :0 412: MOV TEMP[10].x, TEMP[7].zzzz 413: ENDIF 414: MOV TEMP[11].z, TEMP[10].xxxx 415: ABS TEMP[10].x, TEMP[8].zzzz 416: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 417: UIF TEMP[10].xxxx :0 418: MOV TEMP[10].x, CONST[82].wwww 419: ELSE :0 420: MOV TEMP[10].x, TEMP[7].wwww 421: ENDIF 422: MOV TEMP[11].w, TEMP[10].xxxx 423: DP4 TEMP[4].x, TEMP[2], TEMP[4] 424: MOV_SAT TEMP[4].x, TEMP[4].xxxx 425: DP4 TEMP[10].x, TEMP[2], TEMP[11] 426: MOV_SAT TEMP[10].x, TEMP[10].xxxx 427: MOV TEMP[4].y, TEMP[10].xxxx 428: ABS TEMP[10].x, TEMP[8].xxxx 429: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 430: UIF TEMP[10].xxxx :0 431: MOV TEMP[10].x, CONST[86].zzzz 432: ELSE :0 433: MOV TEMP[10].x, IMM[0].yyyy 434: ENDIF 435: ABS TEMP[12].x, TEMP[8].xxxx 436: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 437: UIF TEMP[12].xxxx :0 438: MOV TEMP[12].x, CONST[86].wwww 439: ELSE :0 440: MOV TEMP[12].x, IMM[0].yyyy 441: ENDIF 442: MOV TEMP[11].y, TEMP[12].xxxx 443: ABS TEMP[12].x, TEMP[8].xxxx 444: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 445: UIF TEMP[12].xxxx :0 446: MOV TEMP[12].x, CONST[86].xxxx 447: ELSE :0 448: MOV TEMP[12].x, IMM[0].yyyy 449: ENDIF 450: MOV TEMP[11].z, TEMP[12].xxxx 451: ABS TEMP[12].x, TEMP[8].xxxx 452: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 453: UIF TEMP[12].xxxx :0 454: MOV TEMP[12].x, CONST[86].yyyy 455: ELSE :0 456: MOV TEMP[12].x, IMM[0].yyyy 457: ENDIF 458: MOV TEMP[11].w, TEMP[12].xxxx 459: ABS TEMP[12].x, TEMP[8].yyyy 460: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 461: UIF TEMP[12].xxxx :0 462: MOV TEMP[12].x, CONST[87].zzzz 463: ELSE :0 464: MOV TEMP[12].x, TEMP[10].xxxx 465: ENDIF 466: ABS TEMP[10].x, TEMP[8].yyyy 467: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 468: UIF TEMP[10].xxxx :0 469: MOV TEMP[10].x, CONST[87].wwww 470: ELSE :0 471: MOV TEMP[10].x, TEMP[11].yyyy 472: ENDIF 473: MOV TEMP[11].y, TEMP[10].xxxx 474: ABS TEMP[10].x, TEMP[8].yyyy 475: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 476: UIF TEMP[10].xxxx :0 477: MOV TEMP[10].x, CONST[87].xxxx 478: ELSE :0 479: MOV TEMP[10].x, TEMP[11].zzzz 480: ENDIF 481: MOV TEMP[11].z, TEMP[10].xxxx 482: ABS TEMP[10].x, TEMP[8].yyyy 483: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 484: UIF TEMP[10].xxxx :0 485: MOV TEMP[10].x, CONST[87].yyyy 486: ELSE :0 487: MOV TEMP[10].x, TEMP[11].wwww 488: ENDIF 489: MOV TEMP[11].w, TEMP[10].xxxx 490: ABS TEMP[10].x, TEMP[8].zzzz 491: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 492: UIF TEMP[10].xxxx :0 493: MOV TEMP[10].x, CONST[88].zzzz 494: ELSE :0 495: MOV TEMP[10].x, TEMP[12].xxxx 496: ENDIF 497: MOV TEMP[11].x, TEMP[10].xxxx 498: ABS TEMP[10].x, TEMP[8].zzzz 499: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 500: UIF TEMP[10].xxxx :0 501: MOV TEMP[10].x, CONST[88].wwww 502: ELSE :0 503: MOV TEMP[10].x, TEMP[11].yyyy 504: ENDIF 505: MOV TEMP[11].y, TEMP[10].xxxx 506: ABS TEMP[10].x, TEMP[8].zzzz 507: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 508: UIF TEMP[10].xxxx :0 509: MOV TEMP[10].x, CONST[88].xxxx 510: ELSE :0 511: MOV TEMP[10].x, TEMP[11].zzzz 512: ENDIF 513: MOV TEMP[11].z, TEMP[10].xxxx 514: ABS TEMP[10].x, TEMP[8].zzzz 515: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 516: UIF TEMP[10].xxxx :0 517: MOV TEMP[10].x, CONST[88].yyyy 518: ELSE :0 519: MOV TEMP[10].x, TEMP[11].wwww 520: ENDIF 521: MOV TEMP[11].w, TEMP[10].xxxx 522: MAD TEMP[3].xy, TEMP[4].xyyy, TEMP[11].xyyy, TEMP[11].zwww 523: ADD TEMP[2], TEMP[3], IMM[2].xxyy 524: TXL TEMP[10].x, TEMP[2], SAMP[3], SHADOW2D 525: MOV TEMP[2].x, TEMP[10].xxxx 526: ADD TEMP[9], TEMP[3], IMM[2].zxyy 527: ADD TEMP[4], TEMP[3], IMM[2].xzyy 528: ADD TEMP[7], TEMP[3], IMM[2].zzyy 529: TXL TEMP[10].x, TEMP[9], SAMP[3], SHADOW2D 530: MOV TEMP[2].y, TEMP[10].xxxx 531: TXL TEMP[10].x, TEMP[4], SAMP[3], SHADOW2D 532: MOV TEMP[2].z, TEMP[10].xxxx 533: TXL TEMP[10].x, TEMP[7], SAMP[3], SHADOW2D 534: MOV TEMP[2].w, TEMP[10].xxxx 535: DP4 TEMP[10].x, TEMP[2], IMM[1].wwww 536: ADD TEMP[2], TEMP[3], IMM[2].xyyy 537: TXL TEMP[12].x, TEMP[2], SAMP[3], SHADOW2D 538: MOV TEMP[2].x, TEMP[12].xxxx 539: ADD TEMP[9], TEMP[3], IMM[2].zyyy 540: TXL TEMP[9].x, TEMP[9], SAMP[3], SHADOW2D 541: ADD TEMP[4], TEMP[3], IMM[2].yzyy 542: TXL TEMP[4].x, TEMP[4], SAMP[3], SHADOW2D 543: ADD TEMP[7], TEMP[3], IMM[2].yxyy 544: TXL TEMP[7].x, TEMP[7], SAMP[3], SHADOW2D 545: MOV TEMP[2].y, TEMP[9].xxxx 546: MOV TEMP[2].z, TEMP[4].xxxx 547: MOV TEMP[2].w, TEMP[7].xxxx 548: DP4 TEMP[4].x, TEMP[2], IMM[2].wwww 549: MOV TEMP[1].w, TEMP[4].xxxx 550: MOV TEMP[7].xy, TEMP[3].xyyy 551: MOV TEMP[7].z, TEMP[5].xxxx 552: MOV TEMP[7].w, IMM[0].yyyy 553: TXL TEMP[5].x, TEMP[7], SAMP[3], SHADOW2D 554: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[10].xxxx 555: MOV TEMP[1].z, TEMP[4].xxxx 556: MAD TEMP[4].x, TEMP[5].xxxx, IMM[3].xxxx, TEMP[4].xxxx 557: MOV TEMP[1].z, TEMP[4].xxxx 558: MOV TEMP[11].xw, TEMP[1] 559: FSGE TEMP[5].x, TEMP[8].zzzz, IMM[0].yyyy 560: UIF TEMP[5].xxxx :0 561: MOV TEMP[5].x, IMM[0].xxxx 562: ELSE :0 563: MOV TEMP[5].x, TEMP[4].xxxx 564: ENDIF 565: MOV TEMP[11].z, TEMP[5].xxxx 566: MOV TEMP[1].x, TEMP[11].xxzw 567: LRP TEMP[2].x, TEMP[6].xxxx, TEMP[1].yyyy, TEMP[5].xxxx 568: MOV TEMP[1].y, TEMP[2].xxxx 569: ENDIF 570: ADD TEMP[2].xyz, -CONST[89].xyzz, IN[5].xyzz 571: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz 572: MAD TEMP[4].x, TEMP[4].xxxx, CONST[68].yyyy, CONST[68].xxxx 573: MOV_SAT TEMP[4].x, TEMP[4].xxxx 574: LRP TEMP[2].x, TEMP[4].xxxx, IMM[0].xxxx, TEMP[1].yyyy 575: ELSE :0 576: MOV TEMP[2].x, IMM[0].xxxx 577: ENDIF 578: MAD TEMP[4].xyz, IN[1].xyzz, TEMP[2].xxxx, IN[4].xyzz 579: MOV TEMP[1].yzw, TEMP[4].yxyz 580: UIF CONST[90].xxxx :0 581: DP3 TEMP[5].x, TEMP[4].xyzz, IMM[4].xyzz 582: RCP TEMP[5].x, TEMP[5].xxxx 583: MUL TEMP[5].x, TEMP[5].xxxx, IN[1].wwww 584: ADD TEMP[2].x, -TEMP[2].xxxx, IMM[0].xxxx 585: MAD TEMP[2].x, TEMP[5].xxxx, -TEMP[2].xxxx, IMM[0].xxxx 586: MUL TEMP[4].xyz, TEMP[4].zyxx, TEMP[2].xxxx 587: MAD TEMP[2].x, TEMP[2].xxxx, IMM[4].wwww, IMM[4].wwww 588: LRP TEMP[4].xyz, TEMP[2].xxxx, TEMP[4].zyxx, TEMP[4].xyzz 589: MOV TEMP[1].yzw, TEMP[4].yxyz 590: ENDIF 591: ADD TEMP[2].x, TEMP[0].wwww, IMM[1].yyyy 592: MAD TEMP[2].x, CONST[20].wwww, TEMP[2].xxxx, IMM[0].xxxx 593: ADD TEMP[4].x, TEMP[0].wwww, CONST[12].xxxx 594: ADD TEMP[5].xyz, IMM[1].yyyy, CONST[1].xyzz 595: MOV_SAT TEMP[4].x, TEMP[4].xxxx 596: MAD TEMP[4].xyz, TEMP[4].xxxx, TEMP[5].xyzz, IMM[0].xxxx 597: MUL TEMP[3].xyz, TEMP[1].yzww, TEMP[4].xyzz 598: MUL TEMP[5].x, TEMP[2].xxxx, CONST[1].wwww 599: MAD TEMP[2].x, TEMP[5].xxxx, IN[4].wwww, -TEMP[5].xxxx 600: MAD TEMP[5].x, CONST[12].wwww, TEMP[2].xxxx, TEMP[5].xxxx 601: DP3 TEMP[7].x, IN[3].xyzz, IN[2].xyzz 602: ADD TEMP[2].x, TEMP[7].xxxx, TEMP[7].xxxx 603: DP3 TEMP[7].x, IN[3].xyzz, IN[3].xyzz 604: MUL TEMP[6].xyz, TEMP[7].xxxx, IN[2].xyzz 605: MAD TEMP[6].xyz, TEMP[2].xxxx, IN[3].xyzz, -TEMP[6].xyzz 606: MOV TEMP[7].xyz, TEMP[6].xyzz 607: TEX TEMP[7].xyz, TEMP[7], SAMP[1], CUBE 608: MUL TEMP[6].xyz, TEMP[7].xyzz, CONST[30].zzzz 609: MUL TEMP[6].xyz, TEMP[1].xxxx, TEMP[6].xyzz 610: MUL TEMP[6].xyz, TEMP[6].xyzz, CONST[0].xyzz 611: MAD TEMP[1].xyz, TEMP[1].yzww, TEMP[4].xyzz, -CONST[19].zzzz 612: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[19].wwww 613: MOV_SAT TEMP[4].xyz, TEMP[1].xyzz 614: MAD TEMP[1].xyz, TEMP[6].xyzz, TEMP[4].xyzz, -TEMP[6].xyzz 615: MAD TEMP[1].xyz, CONST[19].yyyy, TEMP[1].xyzz, TEMP[6].xyzz 616: MAD TEMP[2].xyz, TEMP[1].xyzz, TEMP[1].xyzz, -TEMP[1].xyzz 617: MAD TEMP[1].xyz, CONST[19].xxxx, TEMP[2].xyzz, TEMP[1].xyzz 618: DP3 TEMP[4].x, TEMP[1].xyzz, IMM[5].xyzz 619: LRP TEMP[2].xyz, CONST[3].xyzz, TEMP[1].xyzz, TEMP[4].xxxx 620: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xyzz, TEMP[2].xyzz 621: ADD TEMP[1].xyz, CONST[20].xyzz, -IN[5].xyzz 622: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 623: SQRT TEMP[1].x, TEMP[1].xxxx 624: MAD TEMP[1].x, TEMP[1].xxxx, CONST[21].wwww, CONST[21].xxxx 625: MOV_SAT TEMP[1].x, TEMP[1].xxxx 626: MIN TEMP[1].x, TEMP[1].xxxx, CONST[21].zzzz 627: ABS TEMP[2].x, CONST[12].yyyy 628: MUL TEMP[3].xyz, TEMP[0].xyzz, CONST[30].xxxx 629: MUL TEMP[4].x, CONST[29].wwww, IN[5].wwww 630: FSGE TEMP[6].x, -TEMP[2].xxxx, IMM[0].yyyy 631: UIF TEMP[6].xxxx :0 632: MOV TEMP[5].x, TEMP[5].xxxx 633: ELSE :0 634: MOV TEMP[5].x, TEMP[4].xxxx 635: ENDIF 636: MOV TEMP[2].w, TEMP[5].xxxx 637: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx 638: MAD TEMP[0].xyz, TEMP[0].xyzz, -CONST[30].xxxx, CONST[29].xyzz 639: MAD TEMP[2].xyz, TEMP[1].xxxx, TEMP[0].xyzz, TEMP[3].xyzz 640: MOV OUT[0], TEMP[2] 641: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 48) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 52) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 56) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 192) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 196) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 204) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 312) %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 316) %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 324) %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 328) %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 332) %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 336) %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 344) %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 348) %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 464) %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 468) %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 472) %52 = call float @llvm.SI.load.const(<16 x i8> %24, i32 476) %53 = call float @llvm.SI.load.const(<16 x i8> %24, i32 480) %54 = call float @llvm.SI.load.const(<16 x i8> %24, i32 488) %55 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1080) %56 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1084) %57 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1088) %58 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1092) %59 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1168) %60 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1172) %61 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1176) %62 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1180) %63 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1184) %64 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1188) %65 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1192) %66 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1196) %67 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1232) %68 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1236) %69 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1240) %70 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1244) %71 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1248) %72 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1252) %73 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1256) %74 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1260) %75 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1296) %76 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1300) %77 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1304) %78 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1308) %79 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1312) %80 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1316) %81 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1320) %82 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1324) %83 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1376) %84 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1380) %85 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1384) %86 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1388) %87 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1392) %88 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1396) %89 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1400) %90 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1404) %91 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1408) %92 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1412) %93 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1416) %94 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1420) %95 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1424) %96 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1428) %97 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1432) %98 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1440) %99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %100 = load <8 x i32>, <8 x i32> addrspace(2)* %99, align 32, !tbaa !0 %101 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %102 = load <4 x i32>, <4 x i32> addrspace(2)* %101, align 16, !tbaa !0 %103 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %104 = load <8 x i32>, <8 x i32> addrspace(2)* %103, align 32, !tbaa !0 %105 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %106 = load <4 x i32>, <4 x i32> addrspace(2)* %105, align 16, !tbaa !0 %107 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %108 = load <8 x i32>, <8 x i32> addrspace(2)* %107, align 32, !tbaa !0 %109 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %110 = load <4 x i32>, <4 x i32> addrspace(2)* %109, align 16, !tbaa !0 %111 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %112 = load <8 x i32>, <8 x i32> addrspace(2)* %111, align 32, !tbaa !0 %113 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %114 = load <4 x i32>, <4 x i32> addrspace(2)* %113, align 16, !tbaa !0 %115 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %116 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %117 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %118 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %119 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %120 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %121 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %122 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %123 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %124 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %125 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %126 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) %127 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) %128 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) %129 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) %130 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) %131 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %6, <2 x i32> %8) %132 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %6, <2 x i32> %8) %133 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %6, <2 x i32> %8) %134 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %6, <2 x i32> %8) %135 = bitcast float %115 to i32 %136 = bitcast float %116 to i32 %137 = insertelement <2 x i32> undef, i32 %135, i32 0 %138 = insertelement <2 x i32> %137, i32 %136, i32 1 %139 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %138, <8 x i32> %100, <4 x i32> %102, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %140 = extractelement <4 x float> %139, i32 0 %141 = extractelement <4 x float> %139, i32 1 %142 = extractelement <4 x float> %139, i32 2 %143 = extractelement <4 x float> %139, i32 3 %144 = bitcast float %115 to i32 %145 = bitcast float %116 to i32 %146 = insertelement <2 x i32> undef, i32 %144, i32 0 %147 = insertelement <2 x i32> %146, i32 %145, i32 1 %148 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %147, <8 x i32> %108, <4 x i32> %110, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %149 = extractelement <4 x float> %148, i32 0 %150 = bitcast float %98 to i32 %151 = icmp eq i32 %150, 0 br i1 %151, label %ENDIF, label %IF IF: ; preds = %main_body %152 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1372) %153 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1368) %154 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1364) %155 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1360) %156 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1148) %157 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1144) %158 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1140) %159 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1136) %160 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1132) %161 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1128) %162 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1124) %163 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1120) %164 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1116) %165 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1112) %166 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1108) %167 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1104) %168 = fadd float %131, 0.000000e+00 %169 = fadd float %132, 0.000000e+00 %170 = fadd float %133, 0.000000e+00 %171 = fmul float %131, 0.000000e+00 %172 = fadd float %171, 1.000000e+00 %173 = fmul float %168, %167 %174 = fmul float %169, %166 %175 = fadd float %173, %174 %176 = fmul float %170, %165 %177 = fadd float %175, %176 %178 = fmul float %172, %164 %179 = fadd float %177, %178 %180 = fmul float %168, %163 %181 = fmul float %169, %162 %182 = fadd float %180, %181 %183 = fmul float %170, %161 %184 = fadd float %182, %183 %185 = fmul float %172, %160 %186 = fadd float %184, %185 %187 = call float @llvm.AMDIL.clamp.(float %179, float 0.000000e+00, float 1.000000e+00) %188 = call float @llvm.AMDIL.clamp.(float %186, float 0.000000e+00, float 1.000000e+00) %189 = fsub float %187, %179 %190 = fsub float %188, %186 %191 = fadd float %189, %190 %192 = fmul float %168, %59 %193 = fmul float %169, %60 %194 = fadd float %192, %193 %195 = fmul float %170, %61 %196 = fadd float %194, %195 %197 = fmul float %172, %62 %198 = fadd float %196, %197 %199 = fmul float %168, %63 %200 = fmul float %169, %64 %201 = fadd float %199, %200 %202 = fmul float %170, %65 %203 = fadd float %201, %202 %204 = fmul float %172, %66 %205 = fadd float %203, %204 %206 = call float @llvm.AMDIL.clamp.(float %198, float 0.000000e+00, float 1.000000e+00) %207 = call float @llvm.AMDIL.clamp.(float %205, float 0.000000e+00, float 1.000000e+00) %208 = fsub float %206, %198 %209 = fsub float %207, %205 %210 = fadd float %208, %209 %211 = fmul float %168, %67 %212 = fmul float %169, %68 %213 = fadd float %211, %212 %214 = fmul float %170, %69 %215 = fadd float %213, %214 %216 = fmul float %172, %70 %217 = fadd float %215, %216 %218 = fmul float %168, %71 %219 = fmul float %169, %72 %220 = fadd float %218, %219 %221 = fmul float %170, %73 %222 = fadd float %220, %221 %223 = fmul float %172, %74 %224 = fadd float %222, %223 %225 = call float @llvm.fabs.f32(float %210) %226 = fcmp ole float %225, -0.000000e+00 %. = select i1 %226, float %198, float %217 %227 = call float @llvm.fabs.f32(float %210) %228 = fcmp ole float %227, -0.000000e+00 %temp28.0 = select i1 %228, float %205, float %224 %229 = call float @llvm.fabs.f32(float %210) %230 = fcmp ole float %229, -0.000000e+00 %.230 = select i1 %230, float 1.000000e+00, float 2.000000e+00 %231 = call float @llvm.fabs.f32(float %191) %232 = fcmp ole float %231, -0.000000e+00 %temp28.2 = select i1 %232, float %179, float %. %233 = call float @llvm.fabs.f32(float %191) %234 = fcmp ole float %233, -0.000000e+00 %.temp28.0 = select i1 %234, float %186, float %temp28.0 %235 = call float @llvm.fabs.f32(float %191) %236 = fcmp ole float %235, -0.000000e+00 %temp16.1 = select i1 %236, float 0.000000e+00, float %.230 %237 = fmul float %168, %159 %238 = fmul float %169, %158 %239 = fadd float %237, %238 %240 = fmul float %170, %157 %241 = fadd float %239, %240 %242 = fmul float %172, %156 %243 = fadd float %241, %242 %244 = fadd float %temp28.2, -5.000000e-01 %245 = fadd float %.temp28.0, -5.000000e-01 %246 = call float @llvm.fabs.f32(float %244) %247 = call float @llvm.fabs.f32(float %245) %248 = fsub float %246, %55 %249 = fsub float %247, %55 %250 = fmul float %248, %56 %251 = fmul float %249, %56 %252 = call float @llvm.AMDIL.clamp.(float %250, float 0.000000e+00, float 1.000000e+00) %253 = call float @llvm.AMDIL.clamp.(float %251, float 0.000000e+00, float 1.000000e+00) %254 = fsub float 1.000000e+00, %252 %255 = fsub float 1.000000e+00, %253 %256 = fmul float %255, %254 %257 = call float @llvm.AMDIL.clamp.(float %temp28.2, float 0.000000e+00, float 1.000000e+00) %258 = call float @llvm.AMDIL.clamp.(float %.temp28.0, float 0.000000e+00, float 1.000000e+00) %259 = fadd float %temp16.1, -1.000000e+00 %260 = fadd float %temp16.1, -2.000000e+00 %261 = call float @llvm.fabs.f32(float %temp16.1) %262 = fcmp ole float %261, -0.000000e+00 %.231 = select i1 %262, float %153, float 0.000000e+00 %263 = call float @llvm.fabs.f32(float %temp16.1) %264 = fcmp ole float %263, -0.000000e+00 %temp40.0 = select i1 %264, float %152, float 0.000000e+00 %265 = call float @llvm.fabs.f32(float %temp16.1) %266 = fcmp ole float %265, -0.000000e+00 %.232 = select i1 %266, float %155, float 0.000000e+00 %267 = call float @llvm.fabs.f32(float %temp16.1) %268 = fcmp ole float %267, -0.000000e+00 %temp40.2 = select i1 %268, float %154, float 0.000000e+00 %269 = call float @llvm.fabs.f32(float %259) %270 = fcmp ole float %269, -0.000000e+00 %..231 = select i1 %270, float %85, float %.231 %271 = call float @llvm.fabs.f32(float %259) %272 = fcmp ole float %271, -0.000000e+00 %temp32.1 = select i1 %272, float %86, float %temp40.0 %273 = call float @llvm.fabs.f32(float %259) %274 = fcmp ole float %273, -0.000000e+00 %..232 = select i1 %274, float %83, float %.232 %275 = call float @llvm.fabs.f32(float %259) %276 = fcmp ole float %275, -0.000000e+00 %temp32.3 = select i1 %276, float %84, float %temp40.2 %277 = call float @llvm.fabs.f32(float %260) %278 = fcmp ole float %277, -0.000000e+00 %...231 = select i1 %278, float %89, float %..231 %279 = call float @llvm.fabs.f32(float %260) %280 = fcmp ole float %279, -0.000000e+00 %temp32.5 = select i1 %280, float %90, float %temp32.1 %281 = call float @llvm.fabs.f32(float %260) %282 = fcmp ole float %281, -0.000000e+00 %...232 = select i1 %282, float %87, float %..232 %283 = call float @llvm.fabs.f32(float %260) %284 = fcmp ole float %283, -0.000000e+00 %temp32.7 = select i1 %284, float %88, float %temp32.3 %285 = fmul float %257, %...231 %286 = fadd float %285, %...232 %287 = fmul float %258, %temp32.5 %288 = fadd float %287, %temp32.7 %289 = fadd float %286, 0x3F40000000000000 %290 = fadd float %288, 0x3F40000000000000 %291 = fadd float %243, 0.000000e+00 %292 = bitcast float %291 to i32 %293 = bitcast float %289 to i32 %294 = bitcast float %290 to i32 %295 = insertelement <4 x i32> undef, i32 %292, i32 0 %296 = insertelement <4 x i32> %295, i32 %293, i32 1 %297 = insertelement <4 x i32> %296, i32 %294, i32 2 %298 = insertelement <4 x i32> %297, i32 0, i32 3 %299 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %298, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %300 = extractelement <4 x float> %299, i32 0 %301 = fadd float %286, 0xBF40000000000000 %302 = fadd float %288, 0x3F40000000000000 %303 = fadd float %243, 0.000000e+00 %304 = fadd float %286, 0x3F40000000000000 %305 = fadd float %288, 0xBF40000000000000 %306 = fadd float %243, 0.000000e+00 %307 = fadd float %286, 0xBF40000000000000 %308 = fadd float %288, 0xBF40000000000000 %309 = fadd float %243, 0.000000e+00 %310 = bitcast float %303 to i32 %311 = bitcast float %301 to i32 %312 = bitcast float %302 to i32 %313 = insertelement <4 x i32> undef, i32 %310, i32 0 %314 = insertelement <4 x i32> %313, i32 %311, i32 1 %315 = insertelement <4 x i32> %314, i32 %312, i32 2 %316 = insertelement <4 x i32> %315, i32 0, i32 3 %317 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %316, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %318 = extractelement <4 x float> %317, i32 0 %319 = bitcast float %306 to i32 %320 = bitcast float %304 to i32 %321 = bitcast float %305 to i32 %322 = insertelement <4 x i32> undef, i32 %319, i32 0 %323 = insertelement <4 x i32> %322, i32 %320, i32 1 %324 = insertelement <4 x i32> %323, i32 %321, i32 2 %325 = insertelement <4 x i32> %324, i32 0, i32 3 %326 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %325, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %327 = extractelement <4 x float> %326, i32 0 %328 = bitcast float %309 to i32 %329 = bitcast float %307 to i32 %330 = bitcast float %308 to i32 %331 = insertelement <4 x i32> undef, i32 %328, i32 0 %332 = insertelement <4 x i32> %331, i32 %329, i32 1 %333 = insertelement <4 x i32> %332, i32 %330, i32 2 %334 = insertelement <4 x i32> %333, i32 0, i32 3 %335 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %334, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %336 = extractelement <4 x float> %335, i32 0 %337 = fmul float %300, 6.250000e-02 %338 = fmul float %318, 6.250000e-02 %339 = fadd float %337, %338 %340 = fmul float %327, 6.250000e-02 %341 = fadd float %339, %340 %342 = fmul float %336, 6.250000e-02 %343 = fadd float %341, %342 %344 = fadd float %286, 0x3F40000000000000 %345 = fadd float %288, 0.000000e+00 %346 = fadd float %243, 0.000000e+00 %347 = bitcast float %346 to i32 %348 = bitcast float %344 to i32 %349 = bitcast float %345 to i32 %350 = insertelement <4 x i32> undef, i32 %347, i32 0 %351 = insertelement <4 x i32> %350, i32 %348, i32 1 %352 = insertelement <4 x i32> %351, i32 %349, i32 2 %353 = insertelement <4 x i32> %352, i32 0, i32 3 %354 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %353, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %355 = extractelement <4 x float> %354, i32 0 %356 = fadd float %286, 0xBF40000000000000 %357 = fadd float %288, 0.000000e+00 %358 = fadd float %243, 0.000000e+00 %359 = bitcast float %358 to i32 %360 = bitcast float %356 to i32 %361 = bitcast float %357 to i32 %362 = insertelement <4 x i32> undef, i32 %359, i32 0 %363 = insertelement <4 x i32> %362, i32 %360, i32 1 %364 = insertelement <4 x i32> %363, i32 %361, i32 2 %365 = insertelement <4 x i32> %364, i32 0, i32 3 %366 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %365, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %367 = extractelement <4 x float> %366, i32 0 %368 = extractelement <4 x float> %366, i32 3 %369 = fadd float %286, 0.000000e+00 %370 = fadd float %288, 0xBF40000000000000 %371 = fadd float %243, 0.000000e+00 %372 = bitcast float %371 to i32 %373 = bitcast float %369 to i32 %374 = bitcast float %370 to i32 %375 = insertelement <4 x i32> undef, i32 %372, i32 0 %376 = insertelement <4 x i32> %375, i32 %373, i32 1 %377 = insertelement <4 x i32> %376, i32 %374, i32 2 %378 = insertelement <4 x i32> %377, i32 0, i32 3 %379 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %378, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %380 = extractelement <4 x float> %379, i32 0 %381 = fadd float %286, 0.000000e+00 %382 = fadd float %288, 0x3F40000000000000 %383 = fadd float %243, 0.000000e+00 %384 = bitcast float %383 to i32 %385 = bitcast float %381 to i32 %386 = bitcast float %382 to i32 %387 = insertelement <4 x i32> undef, i32 %384, i32 0 %388 = insertelement <4 x i32> %387, i32 %385, i32 1 %389 = insertelement <4 x i32> %388, i32 %386, i32 2 %390 = insertelement <4 x i32> %389, i32 0, i32 3 %391 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %390, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %392 = extractelement <4 x float> %391, i32 0 %393 = fmul float %355, 1.250000e-01 %394 = fmul float %367, 1.250000e-01 %395 = fadd float %393, %394 %396 = fmul float %380, 1.250000e-01 %397 = fadd float %395, %396 %398 = fmul float %392, 1.250000e-01 %399 = fadd float %397, %398 %400 = bitcast float %243 to i32 %401 = bitcast float %286 to i32 %402 = bitcast float %288 to i32 %403 = insertelement <4 x i32> undef, i32 %400, i32 0 %404 = insertelement <4 x i32> %403, i32 %401, i32 1 %405 = insertelement <4 x i32> %404, i32 %402, i32 2 %406 = insertelement <4 x i32> %405, i32 0, i32 3 %407 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %406, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %408 = extractelement <4 x float> %407, i32 0 %409 = fadd float %399, %343 %410 = fmul float %408, 2.500000e-01 %411 = fadd float %410, %409 %412 = fcmp olt float %256, 1.000000e+00 br i1 %412, label %IF111, label %ENDIF110 ENDIF: ; preds = %main_body, %ENDIF110 %temp8.0 = phi float [ %656, %ENDIF110 ], [ 1.000000e+00, %main_body ] %temp31.0 = phi float [ %temp31.1, %ENDIF110 ], [ undef, %main_body ] %413 = fmul float %117, %temp8.0 %414 = fadd float %413, %127 %415 = fmul float %118, %temp8.0 %416 = fadd float %415, %128 %417 = fmul float %119, %temp8.0 %418 = fadd float %417, %129 %419 = bitcast float %98 to i32 %420 = icmp eq i32 %419, 0 br i1 %420, label %ENDIF224, label %IF225 IF111: ; preds = %IF %421 = fadd float %temp16.1, 0.000000e+00 %422 = fadd float %temp16.1, -1.000000e+00 %423 = fadd float %temp16.1, -2.000000e+00 %424 = call float @llvm.fabs.f32(float %421) %425 = fcmp ole float %424, -0.000000e+00 %.233 = select i1 %425, float %59, float 0.000000e+00 %426 = call float @llvm.fabs.f32(float %421) %427 = fcmp ole float %426, -0.000000e+00 %temp40.5 = select i1 %427, float %60, float 0.000000e+00 %428 = call float @llvm.fabs.f32(float %421) %429 = fcmp ole float %428, -0.000000e+00 %.234 = select i1 %429, float %61, float 0.000000e+00 %430 = call float @llvm.fabs.f32(float %421) %431 = fcmp ole float %430, -0.000000e+00 %temp40.7 = select i1 %431, float %62, float 0.000000e+00 %432 = call float @llvm.fabs.f32(float %421) %433 = fcmp ole float %432, -0.000000e+00 %.235 = select i1 %433, float %63, float 0.000000e+00 %434 = call float @llvm.fabs.f32(float %421) %435 = fcmp ole float %434, -0.000000e+00 %temp40.9 = select i1 %435, float %64, float 0.000000e+00 %436 = call float @llvm.fabs.f32(float %421) %437 = fcmp ole float %436, -0.000000e+00 %.236 = select i1 %437, float %65, float 0.000000e+00 %438 = call float @llvm.fabs.f32(float %421) %439 = fcmp ole float %438, -0.000000e+00 %temp40.11 = select i1 %439, float %66, float 0.000000e+00 %440 = call float @llvm.fabs.f32(float %422) %441 = fcmp ole float %440, -0.000000e+00 %..233 = select i1 %441, float %67, float %.233 %442 = call float @llvm.fabs.f32(float %422) %443 = fcmp ole float %442, -0.000000e+00 %temp40.13 = select i1 %443, float %68, float %temp40.5 %444 = call float @llvm.fabs.f32(float %422) %445 = fcmp ole float %444, -0.000000e+00 %..234 = select i1 %445, float %69, float %.234 %446 = call float @llvm.fabs.f32(float %422) %447 = fcmp ole float %446, -0.000000e+00 %temp40.15 = select i1 %447, float %70, float %temp40.7 %448 = call float @llvm.fabs.f32(float %422) %449 = fcmp ole float %448, -0.000000e+00 %..235 = select i1 %449, float %71, float %.235 %450 = call float @llvm.fabs.f32(float %422) %451 = fcmp ole float %450, -0.000000e+00 %temp40.17 = select i1 %451, float %72, float %temp40.9 %452 = call float @llvm.fabs.f32(float %422) %453 = fcmp ole float %452, -0.000000e+00 %..236 = select i1 %453, float %73, float %.236 %454 = call float @llvm.fabs.f32(float %422) %455 = fcmp ole float %454, -0.000000e+00 %temp40.19 = select i1 %455, float %74, float %temp40.11 %456 = call float @llvm.fabs.f32(float %423) %457 = fcmp ole float %456, -0.000000e+00 %...233 = select i1 %457, float %75, float %..233 %458 = call float @llvm.fabs.f32(float %423) %459 = fcmp ole float %458, -0.000000e+00 %temp40.21 = select i1 %459, float %76, float %temp40.13 %460 = call float @llvm.fabs.f32(float %423) %461 = fcmp ole float %460, -0.000000e+00 %...234 = select i1 %461, float %77, float %..234 %462 = call float @llvm.fabs.f32(float %423) %463 = fcmp ole float %462, -0.000000e+00 %temp40.23 = select i1 %463, float %78, float %temp40.15 %464 = call float @llvm.fabs.f32(float %423) %465 = fcmp ole float %464, -0.000000e+00 %...235 = select i1 %465, float %79, float %..235 %466 = call float @llvm.fabs.f32(float %423) %467 = fcmp ole float %466, -0.000000e+00 %temp40.25 = select i1 %467, float %80, float %temp40.17 %468 = call float @llvm.fabs.f32(float %423) %469 = fcmp ole float %468, -0.000000e+00 %...236 = select i1 %469, float %81, float %..236 %470 = call float @llvm.fabs.f32(float %423) %471 = fcmp ole float %470, -0.000000e+00 %temp40.27 = select i1 %471, float %82, float %temp40.19 %472 = fmul float %168, %...233 %473 = fmul float %169, %temp40.21 %474 = fadd float %472, %473 %475 = fmul float %170, %...234 %476 = fadd float %474, %475 %477 = fmul float %172, %temp40.23 %478 = fadd float %476, %477 %479 = call float @llvm.AMDIL.clamp.(float %478, float 0.000000e+00, float 1.000000e+00) %480 = fmul float %168, %...235 %481 = fmul float %169, %temp40.25 %482 = fadd float %480, %481 %483 = fmul float %170, %...236 %484 = fadd float %482, %483 %485 = fmul float %172, %temp40.27 %486 = fadd float %484, %485 %487 = call float @llvm.AMDIL.clamp.(float %486, float 0.000000e+00, float 1.000000e+00) %488 = call float @llvm.fabs.f32(float %421) %489 = fcmp ole float %488, -0.000000e+00 %.237 = select i1 %489, float %85, float 0.000000e+00 %490 = call float @llvm.fabs.f32(float %421) %491 = fcmp ole float %490, -0.000000e+00 %temp48.1 = select i1 %491, float %86, float 0.000000e+00 %492 = call float @llvm.fabs.f32(float %421) %493 = fcmp ole float %492, -0.000000e+00 %.238 = select i1 %493, float %83, float 0.000000e+00 %494 = call float @llvm.fabs.f32(float %421) %495 = fcmp ole float %494, -0.000000e+00 %temp48.3 = select i1 %495, float %84, float 0.000000e+00 %496 = call float @llvm.fabs.f32(float %422) %497 = fcmp ole float %496, -0.000000e+00 %..237 = select i1 %497, float %89, float %.237 %498 = call float @llvm.fabs.f32(float %422) %499 = fcmp ole float %498, -0.000000e+00 %temp40.29 = select i1 %499, float %90, float %temp48.1 %500 = call float @llvm.fabs.f32(float %422) %501 = fcmp ole float %500, -0.000000e+00 %..238 = select i1 %501, float %87, float %.238 %502 = call float @llvm.fabs.f32(float %422) %503 = fcmp ole float %502, -0.000000e+00 %temp40.31 = select i1 %503, float %88, float %temp48.3 %504 = call float @llvm.fabs.f32(float %423) %505 = fcmp ole float %504, -0.000000e+00 %...237 = select i1 %505, float %93, float %..237 %506 = call float @llvm.fabs.f32(float %423) %507 = fcmp ole float %506, -0.000000e+00 %temp40.33 = select i1 %507, float %94, float %temp40.29 %508 = call float @llvm.fabs.f32(float %423) %509 = fcmp ole float %508, -0.000000e+00 %...238 = select i1 %509, float %91, float %..238 %510 = call float @llvm.fabs.f32(float %423) %511 = fcmp ole float %510, -0.000000e+00 %temp40.35 = select i1 %511, float %92, float %temp40.31 %512 = fmul float %479, %...237 %513 = fadd float %512, %...238 %514 = fmul float %487, %temp40.33 %515 = fadd float %514, %temp40.35 %516 = fadd float %513, 0x3F40000000000000 %517 = fadd float %515, 0x3F40000000000000 %518 = fadd float %243, 0.000000e+00 %519 = bitcast float %518 to i32 %520 = bitcast float %516 to i32 %521 = bitcast float %517 to i32 %522 = insertelement <4 x i32> undef, i32 %519, i32 0 %523 = insertelement <4 x i32> %522, i32 %520, i32 1 %524 = insertelement <4 x i32> %523, i32 %521, i32 2 %525 = insertelement <4 x i32> %524, i32 0, i32 3 %526 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %525, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %527 = extractelement <4 x float> %526, i32 0 %528 = fadd float %513, 0xBF40000000000000 %529 = fadd float %515, 0x3F40000000000000 %530 = fadd float %243, 0.000000e+00 %531 = fadd float %513, 0x3F40000000000000 %532 = fadd float %515, 0xBF40000000000000 %533 = fadd float %243, 0.000000e+00 %534 = fadd float %513, 0xBF40000000000000 %535 = fadd float %515, 0xBF40000000000000 %536 = fadd float %243, 0.000000e+00 %537 = bitcast float %530 to i32 %538 = bitcast float %528 to i32 %539 = bitcast float %529 to i32 %540 = insertelement <4 x i32> undef, i32 %537, i32 0 %541 = insertelement <4 x i32> %540, i32 %538, i32 1 %542 = insertelement <4 x i32> %541, i32 %539, i32 2 %543 = insertelement <4 x i32> %542, i32 0, i32 3 %544 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %543, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %545 = extractelement <4 x float> %544, i32 0 %546 = bitcast float %533 to i32 %547 = bitcast float %531 to i32 %548 = bitcast float %532 to i32 %549 = insertelement <4 x i32> undef, i32 %546, i32 0 %550 = insertelement <4 x i32> %549, i32 %547, i32 1 %551 = insertelement <4 x i32> %550, i32 %548, i32 2 %552 = insertelement <4 x i32> %551, i32 0, i32 3 %553 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %552, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %554 = extractelement <4 x float> %553, i32 0 %555 = bitcast float %536 to i32 %556 = bitcast float %534 to i32 %557 = bitcast float %535 to i32 %558 = insertelement <4 x i32> undef, i32 %555, i32 0 %559 = insertelement <4 x i32> %558, i32 %556, i32 1 %560 = insertelement <4 x i32> %559, i32 %557, i32 2 %561 = insertelement <4 x i32> %560, i32 0, i32 3 %562 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %561, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %563 = extractelement <4 x float> %562, i32 0 %564 = fmul float %527, 6.250000e-02 %565 = fmul float %545, 6.250000e-02 %566 = fadd float %564, %565 %567 = fmul float %554, 6.250000e-02 %568 = fadd float %566, %567 %569 = fmul float %563, 6.250000e-02 %570 = fadd float %568, %569 %571 = fadd float %513, 0x3F40000000000000 %572 = fadd float %515, 0.000000e+00 %573 = fadd float %243, 0.000000e+00 %574 = bitcast float %573 to i32 %575 = bitcast float %571 to i32 %576 = bitcast float %572 to i32 %577 = insertelement <4 x i32> undef, i32 %574, i32 0 %578 = insertelement <4 x i32> %577, i32 %575, i32 1 %579 = insertelement <4 x i32> %578, i32 %576, i32 2 %580 = insertelement <4 x i32> %579, i32 0, i32 3 %581 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %580, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %582 = extractelement <4 x float> %581, i32 0 %583 = fadd float %513, 0xBF40000000000000 %584 = fadd float %515, 0.000000e+00 %585 = fadd float %243, 0.000000e+00 %586 = bitcast float %585 to i32 %587 = bitcast float %583 to i32 %588 = bitcast float %584 to i32 %589 = insertelement <4 x i32> undef, i32 %586, i32 0 %590 = insertelement <4 x i32> %589, i32 %587, i32 1 %591 = insertelement <4 x i32> %590, i32 %588, i32 2 %592 = insertelement <4 x i32> %591, i32 0, i32 3 %593 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %592, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %594 = extractelement <4 x float> %593, i32 0 %595 = fadd float %513, 0.000000e+00 %596 = fadd float %515, 0xBF40000000000000 %597 = fadd float %243, 0.000000e+00 %598 = bitcast float %597 to i32 %599 = bitcast float %595 to i32 %600 = bitcast float %596 to i32 %601 = insertelement <4 x i32> undef, i32 %598, i32 0 %602 = insertelement <4 x i32> %601, i32 %599, i32 1 %603 = insertelement <4 x i32> %602, i32 %600, i32 2 %604 = insertelement <4 x i32> %603, i32 0, i32 3 %605 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %604, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %606 = extractelement <4 x float> %605, i32 0 %607 = fadd float %513, 0.000000e+00 %608 = fadd float %515, 0x3F40000000000000 %609 = fadd float %243, 0.000000e+00 %610 = bitcast float %609 to i32 %611 = bitcast float %607 to i32 %612 = bitcast float %608 to i32 %613 = insertelement <4 x i32> undef, i32 %610, i32 0 %614 = insertelement <4 x i32> %613, i32 %611, i32 1 %615 = insertelement <4 x i32> %614, i32 %612, i32 2 %616 = insertelement <4 x i32> %615, i32 0, i32 3 %617 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %616, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %618 = extractelement <4 x float> %617, i32 0 %619 = fmul float %582, 1.250000e-01 %620 = fmul float %594, 1.250000e-01 %621 = fadd float %619, %620 %622 = fmul float %606, 1.250000e-01 %623 = fadd float %621, %622 %624 = fmul float %618, 1.250000e-01 %625 = fadd float %623, %624 %626 = bitcast float %243 to i32 %627 = bitcast float %513 to i32 %628 = bitcast float %515 to i32 %629 = insertelement <4 x i32> undef, i32 %626, i32 0 %630 = insertelement <4 x i32> %629, i32 %627, i32 1 %631 = insertelement <4 x i32> %630, i32 %628, i32 2 %632 = insertelement <4 x i32> %631, i32 0, i32 3 %633 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %632, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %634 = extractelement <4 x float> %633, i32 0 %635 = fadd float %625, %570 %636 = fmul float %634, 2.500000e-01 %637 = fadd float %636, %635 %638 = fcmp oge float %423, 0.000000e+00 %.239 = select i1 %638, float 1.000000e+00, float %637 %639 = fsub float 1.000000e+00, %256 %640 = fmul float %411, %256 %641 = fmul float %.239, %639 %642 = fadd float %640, %641 br label %ENDIF110 ENDIF110: ; preds = %IF, %IF111 %temp5.0 = phi float [ %642, %IF111 ], [ %411, %IF ] %temp31.1 = phi float [ 0.000000e+00, %IF111 ], [ %368, %IF ] %643 = fsub float %131, %95 %644 = fsub float %132, %96 %645 = fsub float %133, %97 %646 = fmul float %643, %643 %647 = fmul float %644, %644 %648 = fadd float %647, %646 %649 = fmul float %645, %645 %650 = fadd float %648, %649 %651 = fmul float %650, %58 %652 = fadd float %651, %57 %653 = call float @llvm.AMDIL.clamp.(float %652, float 0.000000e+00, float 1.000000e+00) %654 = fsub float 1.000000e+00, %653 %655 = fmul float %temp5.0, %654 %656 = fadd float %653, %655 br label %ENDIF IF225: ; preds = %ENDIF %657 = fmul float %414, 0x3FCB333340000000 %658 = fmul float %416, 0x3FE6E48E80000000 %659 = fadd float %658, %657 %660 = fmul float %418, 0x3FB2752540000000 %661 = fadd float %659, %660 %662 = fdiv float 1.000000e+00, %661 %663 = fmul float %662, %120 %664 = fsub float 1.000000e+00, %temp8.0 %665 = fmul float %664, %663 %666 = fsub float 1.000000e+00, %665 %667 = fmul float %418, %666 %668 = fmul float %416, %666 %669 = fmul float %414, %666 %670 = fmul float %666, 5.000000e-01 %671 = fadd float %670, 5.000000e-01 %672 = fsub float 1.000000e+00, %671 %673 = fmul float %669, %671 %674 = fmul float %667, %672 %675 = fadd float %673, %674 %676 = fsub float 1.000000e+00, %671 %677 = fmul float %668, %671 %678 = fmul float %668, %676 %679 = fadd float %677, %678 %680 = fsub float 1.000000e+00, %671 %681 = fmul float %667, %671 %682 = fmul float %669, %680 %683 = fadd float %681, %682 br label %ENDIF224 ENDIF224: ; preds = %ENDIF, %IF225 %temp5.1 = phi float [ %675, %IF225 ], [ %414, %ENDIF ] %temp6.0 = phi float [ %679, %IF225 ], [ %416, %ENDIF ] %temp7.0 = phi float [ %683, %IF225 ], [ %418, %ENDIF ] %684 = fadd float %143, -1.000000e+00 %685 = fmul float %45, %684 %686 = fadd float %685, 1.000000e+00 %687 = fadd float %143, %35 %688 = fadd float %28, -1.000000e+00 %689 = fadd float %29, -1.000000e+00 %690 = fadd float %30, -1.000000e+00 %691 = call float @llvm.AMDIL.clamp.(float %687, float 0.000000e+00, float 1.000000e+00) %692 = fmul float %691, %688 %693 = fadd float %692, 1.000000e+00 %694 = fmul float %691, %689 %695 = fadd float %694, 1.000000e+00 %696 = fmul float %691, %690 %697 = fadd float %696, 1.000000e+00 %698 = fmul float %temp5.1, %693 %699 = fmul float %temp6.0, %695 %700 = fmul float %temp7.0, %697 %701 = fmul float %686, %31 %702 = fmul float %701, %130 %703 = fsub float %702, %701 %704 = fmul float %37, %703 %705 = fadd float %704, %701 %706 = fmul float %124, %121 %707 = fmul float %125, %122 %708 = fadd float %707, %706 %709 = fmul float %126, %123 %710 = fadd float %708, %709 %711 = fadd float %710, %710 %712 = fmul float %124, %124 %713 = fmul float %125, %125 %714 = fadd float %713, %712 %715 = fmul float %126, %126 %716 = fadd float %714, %715 %717 = fmul float %716, %121 %718 = fmul float %716, %122 %719 = fmul float %716, %123 %720 = fmul float %711, %124 %721 = fsub float %720, %717 %722 = fmul float %711, %125 %723 = fsub float %722, %718 %724 = fmul float %711, %126 %725 = fsub float %724, %719 %726 = insertelement <4 x float> undef, float %721, i32 0 %727 = insertelement <4 x float> %726, float %723, i32 1 %728 = insertelement <4 x float> %727, float %725, i32 2 %729 = insertelement <4 x float> %728, float %temp31.0, i32 3 %730 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %729) %731 = extractelement <4 x float> %730, i32 0 %732 = extractelement <4 x float> %730, i32 1 %733 = extractelement <4 x float> %730, i32 2 %734 = call float @llvm.fabs.f32(float %733) %735 = fdiv float 1.000000e+00, %734 %736 = fmul float %731, %735 %737 = fadd float %736, 1.500000e+00 %738 = fmul float %732, %735 %739 = fadd float %738, 1.500000e+00 %740 = bitcast float %739 to i32 %741 = bitcast float %737 to i32 %bc = bitcast <4 x float> %730 to <4 x i32> %742 = insertelement <4 x i32> undef, i32 %740, i32 0 %743 = insertelement <4 x i32> %742, i32 %741, i32 1 %744 = shufflevector <4 x i32> %743, <4 x i32> %bc, <4 x i32> %745 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %744, <8 x i32> %104, <4 x i32> %106, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %746 = extractelement <4 x float> %745, i32 0 %747 = extractelement <4 x float> %745, i32 1 %748 = extractelement <4 x float> %745, i32 2 %749 = fmul float %746, %54 %750 = fmul float %747, %54 %751 = fmul float %748, %54 %752 = fmul float %149, %749 %753 = fmul float %149, %750 %754 = fmul float %149, %751 %755 = fmul float %752, %25 %756 = fmul float %753, %26 %757 = fmul float %754, %27 %758 = fmul float %temp5.1, %693 %759 = fsub float %758, %40 %760 = fmul float %temp6.0, %695 %761 = fsub float %760, %40 %762 = fmul float %temp7.0, %697 %763 = fsub float %762, %40 %764 = fmul float %759, %41 %765 = fmul float %761, %41 %766 = fmul float %763, %41 %767 = call float @llvm.AMDIL.clamp.(float %764, float 0.000000e+00, float 1.000000e+00) %768 = call float @llvm.AMDIL.clamp.(float %765, float 0.000000e+00, float 1.000000e+00) %769 = call float @llvm.AMDIL.clamp.(float %766, float 0.000000e+00, float 1.000000e+00) %770 = fmul float %755, %767 %771 = fsub float %770, %755 %772 = fmul float %756, %768 %773 = fsub float %772, %756 %774 = fmul float %757, %769 %775 = fsub float %774, %757 %776 = fmul float %39, %771 %777 = fadd float %776, %755 %778 = fmul float %39, %773 %779 = fadd float %778, %756 %780 = fmul float %39, %775 %781 = fadd float %780, %757 %782 = fmul float %777, %777 %783 = fsub float %782, %777 %784 = fmul float %779, %779 %785 = fsub float %784, %779 %786 = fmul float %781, %781 %787 = fsub float %786, %781 %788 = fmul float %38, %783 %789 = fadd float %788, %777 %790 = fmul float %38, %785 %791 = fadd float %790, %779 %792 = fmul float %38, %787 %793 = fadd float %792, %781 %794 = fmul float %789, 0x3FD322D0E0000000 %795 = fmul float %791, 0x3FE2C8B440000000 %796 = fadd float %795, %794 %797 = fmul float %793, 0x3FBD2F1AA0000000 %798 = fadd float %796, %797 %799 = fsub float 1.000000e+00, %32 %800 = fmul float %789, %32 %801 = fmul float %798, %799 %802 = fadd float %800, %801 %803 = fsub float 1.000000e+00, %33 %804 = fmul float %791, %33 %805 = fmul float %798, %803 %806 = fadd float %804, %805 %807 = fsub float 1.000000e+00, %34 %808 = fmul float %793, %34 %809 = fmul float %798, %807 %810 = fadd float %808, %809 %811 = fmul float %140, %698 %812 = fadd float %811, %802 %813 = fmul float %141, %699 %814 = fadd float %813, %806 %815 = fmul float %142, %700 %816 = fadd float %815, %810 %817 = fsub float %42, %131 %818 = fsub float %43, %132 %819 = fsub float %44, %133 %820 = fmul float %817, %817 %821 = fmul float %818, %818 %822 = fadd float %821, %820 %823 = fmul float %819, %819 %824 = fadd float %822, %823 %825 = call float @llvm.sqrt.f32(float %824) %826 = fmul float %825, %48 %827 = fadd float %826, %46 %828 = call float @llvm.AMDIL.clamp.(float %827, float 0.000000e+00, float 1.000000e+00) %829 = call float @llvm.minnum.f32(float %828, float %47) %830 = call float @llvm.fabs.f32(float %36) %831 = fmul float %812, %53 %832 = fmul float %814, %53 %833 = fmul float %816, %53 %834 = fmul float %52, %134 %835 = fcmp ole float %830, -0.000000e+00 %.240 = select i1 %835, float %705, float %834 %836 = fmul float %829, %829 %837 = fmul float %53, %812 %838 = fsub float %49, %837 %839 = fmul float %53, %814 %840 = fsub float %50, %839 %841 = fmul float %53, %816 %842 = fsub float %51, %841 %843 = fmul float %836, %838 %844 = fadd float %843, %831 %845 = fmul float %836, %840 %846 = fadd float %845, %832 %847 = fmul float %836, %842 %848 = fadd float %847, %833 %849 = call i32 @llvm.SI.packf16(float %844, float %846) %850 = bitcast i32 %849 to float %851 = call i32 @llvm.SI.packf16(float %848, float %.240) %852 = bitcast i32 %851 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %850, float %852, float %850, float %852) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} csgo_linux: /portage/portage/sys-devel/llvm-9999/work/llvm-9999/include/llvm/CodeGen/SlotIndexes.h:125: llvm::IndexListEntry* llvm::SlotIndex::listEntry() const: Assertion `isValid() && "Attempt to compare reserved index."' failed. crash_20151213112830_1.dmp[10017]: Uploading dump (out-of-process) /tmp/dumps/crash_20151213112830_1.dmp ./csgo.sh: line 57: 9921 Aborted ${DEBUGGER} "${GAMEROOT}"/${GAMEEXE} "$@" crash_20151213112830_1.dmp[10017]: Finished uploading minidump (out-of-process): success = yes crash_20151213112830_1.dmp[10017]: response: Discarded=1 crash_20151213112830_1.dmp[10017]: file ''/tmp/dumps/crash_20151213112830_1.dmp'', upload yes: ''Discarded=1''