SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = add i32 %5, %8 %15 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %13, i32 0, i32 %14) %16 = extractelement <4 x float> %15, i32 0 %17 = extractelement <4 x float> %15, i32 1 %18 = extractelement <4 x float> %15, i32 2 %19 = extractelement <4 x float> %15, i32 3 %20 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, align 16, !tbaa !0 %22 = add i32 %5, %8 %23 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %21, i32 0, i32 %22) %24 = extractelement <4 x float> %23, i32 0 %25 = extractelement <4 x float> %23, i32 1 %26 = extractelement <4 x float> %23, i32 2 %27 = extractelement <4 x float> %23, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %24, float %25, float %26, float %27) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %16, float %17, float %18, float %19) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6) %24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6) %25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6) %26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6) %27 = call i32 @llvm.SI.packf16(float %23, float %24) %28 = bitcast i32 %27 to float %29 = call i32 @llvm.SI.packf16(float %25, float %26) %30 = bitcast i32 %29 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %28, float %30, float %28, float %30) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s10 ; BEFC030A v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_interp_mov_f32 v2, P0, 3, 0, [m0] ; C80A0302 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 40 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** Using breakpad crash handler Setting breakpad minidump AppID = 730 Forcing breakpad minidump interfaces to load Looking up breakpad interfaces from steamclient Calling BreakpadMiniDumpSystemInit Looking up breakpad interfaces from steamclient Calling BreakpadMiniDumpSystemInit Steam_SetMinidumpSteamID: Caching Steam ID: 76561197960306978 [API loaded yes] Steam_SetMinidumpSteamID: Setting Steam ID: 76561197960306978 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %32, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = call i32 @llvm.SI.packf16(float %34, float %35) %39 = bitcast i32 %38 to float %40 = call i32 @llvm.SI.packf16(float %36, float %37) %41 = bitcast i32 %40 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %39, float %41, float %39, float %41) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[1], IMM[0].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %14, %34 %39 = fmul float %15, %34 %40 = fmul float %16, %34 %41 = fmul float %17, %34 %42 = fmul float %18, %35 %43 = fadd float %42, %38 %44 = fmul float %19, %35 %45 = fadd float %44, %39 %46 = fmul float %20, %35 %47 = fadd float %46, %40 %48 = fmul float %21, %35 %49 = fadd float %48, %41 %50 = fmul float %22, %36 %51 = fadd float %50, %43 %52 = fmul float %23, %36 %53 = fadd float %52, %45 %54 = fmul float %24, %36 %55 = fadd float %54, %47 %56 = fmul float %25, %36 %57 = fadd float %56, %49 %58 = fmul float %26, %37 %59 = fadd float %58, %51 %60 = fmul float %27, %37 %61 = fadd float %60, %53 %62 = fmul float %28, %37 %63 = fadd float %62, %55 %64 = fmul float %29, %37 %65 = fadd float %64, %57 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %61, float %63, float %65) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v0 ; 10080004 v_mul_f32_e32 v5, s5, v0 ; 100A0005 v_mul_f32_e32 v6, s6, v0 ; 100C0006 v_mul_f32_e32 v0, s7, v0 ; 10000007 v_mac_f32_e32 v4, s8, v1 ; 3E080208 v_mac_f32_e32 v5, s9, v1 ; 3E0A0209 v_mac_f32_e32 v6, s10, v1 ; 3E0C020A v_mac_f32_e32 v0, s11, v1 ; 3E00020B v_mac_f32_e32 v4, s12, v2 ; 3E08040C v_mac_f32_e32 v5, s13, v2 ; 3E0A040D v_mac_f32_e32 v6, s14, v2 ; 3E0C040E v_mac_f32_e32 v0, s15, v2 ; 3E00040F v_mac_f32_e32 v4, s16, v3 ; 3E080610 v_mac_f32_e32 v5, s17, v3 ; 3E0A0611 v_mac_f32_e32 v6, s18, v3 ; 3E0C0612 v_mac_f32_e32 v0, s0, v3 ; 3E000600 v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 exp 15, 12, 0, 1, 0, v4, v5, v6, v0 ; F80008CF 00060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 184 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %32, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = call i32 @llvm.SI.packf16(float %34, float %35) %39 = bitcast i32 %38 to float %40 = call i32 @llvm.SI.packf16(float %36, float %37) %41 = bitcast i32 %40 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %39, float %41, float %39, float %41) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6) %24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6) %25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6) %26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6) %27 = call i32 @llvm.SI.packf16(float %23, float %24) %28 = bitcast i32 %27 to float %29 = call i32 @llvm.SI.packf16(float %25, float %26) %30 = bitcast i32 %29 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %28, float %30, float %28, float %30) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s10 ; BEFC030A v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_interp_mov_f32 v2, P0, 3, 0, [m0] ; C80A0302 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 40 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, UINT 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %32, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %34, float %35, float %36, float %37) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 60 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], CUBE, FLOAT 0: TEX OUT[0], IN[0], SAMP[0], CUBE 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %12) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %12) %31 = insertelement <4 x float> undef, float %27, i32 0 %32 = insertelement <4 x float> %31, float %28, i32 1 %33 = insertelement <4 x float> %32, float %29, i32 2 %34 = insertelement <4 x float> %33, float %30, i32 3 %35 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = extractelement <4 x float> %35, i32 2 %39 = extractelement <4 x float> %35, i32 3 %40 = call float @llvm.fabs.f32(float %38) %41 = fdiv float 1.000000e+00, %40 %42 = fmul float %36, %41 %43 = fadd float %42, 1.500000e+00 %44 = fmul float %37, %41 %45 = fadd float %44, 1.500000e+00 %46 = bitcast float %45 to i32 %47 = bitcast float %43 to i32 %48 = bitcast float %39 to i32 %49 = insertelement <4 x i32> undef, i32 %46, i32 0 %50 = insertelement <4 x i32> %49, i32 %47, i32 1 %51 = insertelement <4 x i32> %50, i32 %48, i32 2 %52 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %51, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %53 = extractelement <4 x float> %52, i32 0 %54 = extractelement <4 x float> %52, i32 1 %55 = extractelement <4 x float> %52, i32 2 %56 = extractelement <4 x float> %52, i32 3 %57 = call i32 @llvm.SI.packf16(float %53, float %54) %58 = bitcast i32 %57 to float %59 = call i32 @llvm.SI.packf16(float %55, float %56) %60 = bitcast i32 %59 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %58, float %60, float %58, float %60) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v0, v0, 3, 0, [m0] ; C8000300 v_interp_p2_f32 v0, [v0], v1, 3, 0, [m0] ; C8010301 v_cubeid_f32 v7, v2, v3, v4 ; D2880007 04120702 v_cubema_f32 v0, v2, v3, v4 ; D28E0000 04120702 v_rcp_f32_e64 v0, |v0| ; D3540100 00000100 v_cubesc_f32 v1, v2, v3, v4 ; D28A0001 04120702 v_cubetc_f32 v2, v2, v3, v4 ; D28C0002 04120702 v_mov_b32_e32 v5, 0x3fc00000 ; 7E0A02FF 3FC00000 v_mad_f32 v6, v0, v2, v5 ; D2820006 04160500 v_mac_f32_e32 v5, v0, v1 ; 3E0A0300 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[5:8], s[8:15], s[0:3] ; F0800F00 00020005 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 144 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[1], IMM[0].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %14, %34 %39 = fmul float %15, %34 %40 = fmul float %16, %34 %41 = fmul float %17, %34 %42 = fmul float %18, %35 %43 = fadd float %42, %38 %44 = fmul float %19, %35 %45 = fadd float %44, %39 %46 = fmul float %20, %35 %47 = fadd float %46, %40 %48 = fmul float %21, %35 %49 = fadd float %48, %41 %50 = fmul float %22, %36 %51 = fadd float %50, %43 %52 = fmul float %23, %36 %53 = fadd float %52, %45 %54 = fmul float %24, %36 %55 = fadd float %54, %47 %56 = fmul float %25, %36 %57 = fadd float %56, %49 %58 = fmul float %26, %37 %59 = fadd float %58, %51 %60 = fmul float %27, %37 %61 = fadd float %60, %53 %62 = fmul float %28, %37 %63 = fadd float %62, %55 %64 = fmul float %29, %37 %65 = fadd float %64, %57 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %61, float %63, float %65) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v0 ; 10080004 v_mul_f32_e32 v5, s5, v0 ; 100A0005 v_mul_f32_e32 v6, s6, v0 ; 100C0006 v_mul_f32_e32 v0, s7, v0 ; 10000007 v_mac_f32_e32 v4, s8, v1 ; 3E080208 v_mac_f32_e32 v5, s9, v1 ; 3E0A0209 v_mac_f32_e32 v6, s10, v1 ; 3E0C020A v_mac_f32_e32 v0, s11, v1 ; 3E00020B v_mac_f32_e32 v4, s12, v2 ; 3E08040C v_mac_f32_e32 v5, s13, v2 ; 3E0A040D v_mac_f32_e32 v6, s14, v2 ; 3E0C040E v_mac_f32_e32 v0, s15, v2 ; 3E00040F v_mac_f32_e32 v4, s16, v3 ; 3E080610 v_mac_f32_e32 v5, s17, v3 ; 3E0A0611 v_mac_f32_e32 v6, s18, v3 ; 3E0C0612 v_mac_f32_e32 v0, s0, v3 ; 3E000600 v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 exp 15, 12, 0, 1, 0, v4, v5, v6, v0 ; F80008CF 00060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 184 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], CUBE, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzz 1: TEX TEMP[0], TEMP[0], SAMP[0], CUBE 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = insertelement <4 x float> undef, float %27, i32 0 %31 = insertelement <4 x float> %30, float %28, i32 1 %32 = insertelement <4 x float> %31, float %29, i32 2 %33 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = call float @llvm.fabs.f32(float %36) %39 = fdiv float 1.000000e+00, %38 %40 = fmul float %34, %39 %41 = fadd float %40, 1.500000e+00 %42 = fmul float %35, %39 %43 = fadd float %42, 1.500000e+00 %44 = bitcast float %43 to i32 %45 = bitcast float %41 to i32 %46 = bitcast float %37 to i32 %47 = insertelement <4 x i32> undef, i32 %44, i32 0 %48 = insertelement <4 x i32> %47, i32 %45, i32 1 %49 = insertelement <4 x i32> %48, i32 %46, i32 2 %50 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %49, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %51 = extractelement <4 x float> %50, i32 0 %52 = extractelement <4 x float> %50, i32 1 %53 = extractelement <4 x float> %50, i32 2 %54 = extractelement <4 x float> %50, i32 3 %55 = call i32 @llvm.SI.packf16(float %51, float %52) %56 = bitcast i32 %55 to float %57 = call i32 @llvm.SI.packf16(float %53, float %54) %58 = bitcast i32 %57 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v0, v0, 2, 0, [m0] ; C8000200 v_interp_p2_f32 v0, [v0], v1, 2, 0, [m0] ; C8010201 v_cubeid_f32 v6, v2, v3, v0 ; D2880006 04020702 v_cubema_f32 v1, v2, v3, v0 ; D28E0001 04020702 v_rcp_f32_e64 v1, |v1| ; D3540101 00000101 v_cubesc_f32 v7, v2, v3, v0 ; D28A0007 04020702 v_cubetc_f32 v0, v2, v3, v0 ; D28C0000 04020702 v_mov_b32_e32 v4, 0x3fc00000 ; 7E0802FF 3FC00000 v_mad_f32 v5, v1, v0, v4 ; D2820005 04120101 v_mac_f32_e32 v4, v1, v7 ; 3E080F01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[8:15], s[0:3] ; F0800F00 00020004 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 136 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL CONST[0..57] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 1.0000, 2.2000, 0.0000, 0.0000} 0: ABS TEMP[0].x, CONST[50].xxxx 1: FSLT TEMP[1].x, -TEMP[0].xxxx, TEMP[0].xxxx 2: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: ADD TEMP[0].xyz, IN[1].xyzz, IN[1].xyzz 4: LG2 TEMP[2].x, TEMP[0].xxxx 5: LG2 TEMP[3].x, TEMP[0].yyyy 6: MOV TEMP[2].y, TEMP[3].xxxx 7: LG2 TEMP[3].x, TEMP[0].zzzz 8: MOV TEMP[2].z, TEMP[3].xxxx 9: MUL TEMP[0].xyz, TEMP[2].xyzz, IMM[0].yyyy 10: EX2 TEMP[2].x, TEMP[0].xxxx 11: EX2 TEMP[3].x, TEMP[0].yyyy 12: MOV TEMP[2].y, TEMP[3].xxxx 13: EX2 TEMP[3].x, TEMP[0].zzzz 14: MOV TEMP[2].z, TEMP[3].xxxx 15: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[2].xyzz 16: DP4 TEMP[3].x, IN[2], CONST[48] 17: DP4 TEMP[4].x, IN[2], CONST[49] 18: MOV TEMP[3].y, TEMP[4].xxxx 19: MOV TEMP[0].w, CONST[0].yyyy 20: MOV TEMP[2].w, IN[0].wwww 21: MAD TEMP[2].xyz, IN[3].xyzz, CONST[13].xxxx, IN[0].xyzz 22: DP4 TEMP[0].x, TEMP[2], CONST[54] 23: DP4 TEMP[4].x, TEMP[2], CONST[55] 24: MOV TEMP[0].y, TEMP[4].xxxx 25: DP4 TEMP[2].x, TEMP[2], CONST[56] 26: MOV TEMP[0].z, TEMP[2].xxxx 27: DP4 TEMP[2].x, TEMP[0], CONST[8] 28: DP4 TEMP[4].x, TEMP[0], CONST[9] 29: MOV TEMP[2].y, TEMP[4].xxxx 30: DP4 TEMP[5].x, TEMP[0], CONST[11] 31: MOV TEMP[2].w, TEMP[5].xxxx 32: DP4 TEMP[6].x, TEMP[0], CONST[10] 33: MOV TEMP[0].w, TEMP[6].xxxx 34: MOV TEMP[2].z, TEMP[6].xxxx 35: MOV TEMP[3].zw, CONST[0].xxxx 36: MOV TEMP[1].w, CONST[0].xxxx 37: MOV TEMP[7], TEMP[2] 38: MAD TEMP[6].x, TEMP[6].xxxx, CONST[0].zzzz, -TEMP[5].xxxx 39: MOV TEMP[2].z, TEMP[6].xxxx 40: MOV TEMP[2].y, -TEMP[4].xxxx 41: MAD TEMP[2].xy, CONST[57].xyyy, TEMP[5].xxxx, TEMP[2].xyyy 42: MOV OUT[2], TEMP[3] 43: MOV OUT[0], TEMP[2] 44: MOV OUT[1], TEMP[7] 45: MOV OUT[3], TEMP[1] 46: MOV OUT[4], TEMP[0] 47: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 128) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 132) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 136) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 140) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 144) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 148) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 152) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 156) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 160) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 164) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 168) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 172) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 176) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 180) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 184) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 188) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 208) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 776) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %38 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %39 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %40 = call float @llvm.SI.load.const(<16 x i8> %13, i32 792) %41 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %42 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %43 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %44 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %45 = call float @llvm.SI.load.const(<16 x i8> %13, i32 872) %46 = call float @llvm.SI.load.const(<16 x i8> %13, i32 876) %47 = call float @llvm.SI.load.const(<16 x i8> %13, i32 880) %48 = call float @llvm.SI.load.const(<16 x i8> %13, i32 884) %49 = call float @llvm.SI.load.const(<16 x i8> %13, i32 888) %50 = call float @llvm.SI.load.const(<16 x i8> %13, i32 892) %51 = call float @llvm.SI.load.const(<16 x i8> %13, i32 896) %52 = call float @llvm.SI.load.const(<16 x i8> %13, i32 900) %53 = call float @llvm.SI.load.const(<16 x i8> %13, i32 904) %54 = call float @llvm.SI.load.const(<16 x i8> %13, i32 908) %55 = call float @llvm.SI.load.const(<16 x i8> %13, i32 912) %56 = call float @llvm.SI.load.const(<16 x i8> %13, i32 916) %57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = add i32 %5, %8 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = extractelement <4 x float> %60, i32 3 %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = add i32 %5, %8 %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = extractelement <4 x float> %68, i32 2 %72 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0 %74 = add i32 %5, %8 %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %74) %76 = extractelement <4 x float> %75, i32 0 %77 = extractelement <4 x float> %75, i32 1 %78 = extractelement <4 x float> %75, i32 2 %79 = extractelement <4 x float> %75, i32 3 %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %8 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = call float @llvm.fabs.f32(float %42) %88 = fsub float -0.000000e+00, %87 %89 = fcmp ogt float %87, %88 %90 = select i1 %89, float 1.000000e+00, float 0.000000e+00 %91 = fadd float %69, %69 %92 = fadd float %70, %70 %93 = fadd float %71, %71 %94 = call float @llvm.log2.f32(float %91) %95 = call float @llvm.log2.f32(float %92) %96 = call float @llvm.log2.f32(float %93) %97 = fmul float %94, 0x40019999A0000000 %98 = fmul float %95, 0x40019999A0000000 %99 = fmul float %96, 0x40019999A0000000 %100 = call float @llvm.AMDIL.exp.(float %97) %101 = call float @llvm.AMDIL.exp.(float %98) %102 = call float @llvm.AMDIL.exp.(float %99) %103 = fmul float %90, %100 %104 = fmul float %90, %101 %105 = fmul float %90, %102 %106 = fmul float %76, %34 %107 = fmul float %77, %35 %108 = fadd float %106, %107 %109 = fmul float %78, %36 %110 = fadd float %108, %109 %111 = fmul float %79, %37 %112 = fadd float %110, %111 %113 = fmul float %76, %38 %114 = fmul float %77, %39 %115 = fadd float %113, %114 %116 = fmul float %78, %40 %117 = fadd float %115, %116 %118 = fmul float %79, %41 %119 = fadd float %117, %118 %120 = fmul float %84, %33 %121 = fadd float %120, %61 %122 = fmul float %85, %33 %123 = fadd float %122, %62 %124 = fmul float %86, %33 %125 = fadd float %124, %63 %126 = fmul float %121, %43 %127 = fmul float %123, %44 %128 = fadd float %126, %127 %129 = fmul float %125, %45 %130 = fadd float %128, %129 %131 = fmul float %64, %46 %132 = fadd float %130, %131 %133 = fmul float %121, %47 %134 = fmul float %123, %48 %135 = fadd float %133, %134 %136 = fmul float %125, %49 %137 = fadd float %135, %136 %138 = fmul float %64, %50 %139 = fadd float %137, %138 %140 = fmul float %121, %51 %141 = fmul float %123, %52 %142 = fadd float %140, %141 %143 = fmul float %125, %53 %144 = fadd float %142, %143 %145 = fmul float %64, %54 %146 = fadd float %144, %145 %147 = fmul float %132, %17 %148 = fmul float %139, %18 %149 = fadd float %147, %148 %150 = fmul float %146, %19 %151 = fadd float %149, %150 %152 = fmul float %15, %20 %153 = fadd float %151, %152 %154 = fmul float %132, %21 %155 = fmul float %139, %22 %156 = fadd float %154, %155 %157 = fmul float %146, %23 %158 = fadd float %156, %157 %159 = fmul float %15, %24 %160 = fadd float %158, %159 %161 = fmul float %132, %29 %162 = fmul float %139, %30 %163 = fadd float %161, %162 %164 = fmul float %146, %31 %165 = fadd float %163, %164 %166 = fmul float %15, %32 %167 = fadd float %165, %166 %168 = fmul float %132, %25 %169 = fmul float %139, %26 %170 = fadd float %168, %169 %171 = fmul float %146, %27 %172 = fadd float %170, %171 %173 = fmul float %15, %28 %174 = fadd float %172, %173 %175 = fmul float %174, %16 %176 = fsub float %175, %167 %177 = fmul float %55, %167 %178 = fadd float %177, %153 %179 = fmul float %56, %167 %180 = fsub float %179, %160 %181 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %182 = load <16 x i8>, <16 x i8> addrspace(2)* %181, align 16, !tbaa !0 %183 = call float @llvm.SI.load.const(<16 x i8> %182, i32 0) %184 = fmul float %183, %153 %185 = call float @llvm.SI.load.const(<16 x i8> %182, i32 4) %186 = fmul float %185, %160 %187 = fadd float %184, %186 %188 = call float @llvm.SI.load.const(<16 x i8> %182, i32 8) %189 = fmul float %188, %174 %190 = fadd float %187, %189 %191 = call float @llvm.SI.load.const(<16 x i8> %182, i32 12) %192 = fmul float %191, %167 %193 = fadd float %190, %192 %194 = call float @llvm.SI.load.const(<16 x i8> %182, i32 16) %195 = fmul float %194, %153 %196 = call float @llvm.SI.load.const(<16 x i8> %182, i32 20) %197 = fmul float %196, %160 %198 = fadd float %195, %197 %199 = call float @llvm.SI.load.const(<16 x i8> %182, i32 24) %200 = fmul float %199, %174 %201 = fadd float %198, %200 %202 = call float @llvm.SI.load.const(<16 x i8> %182, i32 28) %203 = fmul float %202, %167 %204 = fadd float %201, %203 %205 = call float @llvm.SI.load.const(<16 x i8> %182, i32 32) %206 = fmul float %205, %153 %207 = call float @llvm.SI.load.const(<16 x i8> %182, i32 36) %208 = fmul float %207, %160 %209 = fadd float %206, %208 %210 = call float @llvm.SI.load.const(<16 x i8> %182, i32 40) %211 = fmul float %210, %174 %212 = fadd float %209, %211 %213 = call float @llvm.SI.load.const(<16 x i8> %182, i32 44) %214 = fmul float %213, %167 %215 = fadd float %212, %214 %216 = call float @llvm.SI.load.const(<16 x i8> %182, i32 48) %217 = fmul float %216, %153 %218 = call float @llvm.SI.load.const(<16 x i8> %182, i32 52) %219 = fmul float %218, %160 %220 = fadd float %217, %219 %221 = call float @llvm.SI.load.const(<16 x i8> %182, i32 56) %222 = fmul float %221, %174 %223 = fadd float %220, %222 %224 = call float @llvm.SI.load.const(<16 x i8> %182, i32 60) %225 = fmul float %224, %167 %226 = fadd float %223, %225 %227 = call float @llvm.SI.load.const(<16 x i8> %182, i32 64) %228 = fmul float %227, %153 %229 = call float @llvm.SI.load.const(<16 x i8> %182, i32 68) %230 = fmul float %229, %160 %231 = fadd float %228, %230 %232 = call float @llvm.SI.load.const(<16 x i8> %182, i32 72) %233 = fmul float %232, %174 %234 = fadd float %231, %233 %235 = call float @llvm.SI.load.const(<16 x i8> %182, i32 76) %236 = fmul float %235, %167 %237 = fadd float %234, %236 %238 = call float @llvm.SI.load.const(<16 x i8> %182, i32 80) %239 = fmul float %238, %153 %240 = call float @llvm.SI.load.const(<16 x i8> %182, i32 84) %241 = fmul float %240, %160 %242 = fadd float %239, %241 %243 = call float @llvm.SI.load.const(<16 x i8> %182, i32 88) %244 = fmul float %243, %174 %245 = fadd float %242, %244 %246 = call float @llvm.SI.load.const(<16 x i8> %182, i32 92) %247 = fmul float %246, %167 %248 = fadd float %245, %247 %249 = call float @llvm.SI.load.const(<16 x i8> %182, i32 96) %250 = fmul float %249, %153 %251 = call float @llvm.SI.load.const(<16 x i8> %182, i32 100) %252 = fmul float %251, %160 %253 = fadd float %250, %252 %254 = call float @llvm.SI.load.const(<16 x i8> %182, i32 104) %255 = fmul float %254, %174 %256 = fadd float %253, %255 %257 = call float @llvm.SI.load.const(<16 x i8> %182, i32 108) %258 = fmul float %257, %167 %259 = fadd float %256, %258 %260 = call float @llvm.SI.load.const(<16 x i8> %182, i32 112) %261 = fmul float %260, %153 %262 = call float @llvm.SI.load.const(<16 x i8> %182, i32 116) %263 = fmul float %262, %160 %264 = fadd float %261, %263 %265 = call float @llvm.SI.load.const(<16 x i8> %182, i32 120) %266 = fmul float %265, %174 %267 = fadd float %264, %266 %268 = call float @llvm.SI.load.const(<16 x i8> %182, i32 124) %269 = fmul float %268, %167 %270 = fadd float %267, %269 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %112, float %119, float %14, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %103, float %104, float %105, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %132, float %139, float %146, float %174) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %178, float %180, float %176, float %167) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %193, float %204, float %215, float %226) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %237, float %248, float %259, float %270) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0x400ccccd ; 7E0202FF 400CCCCD v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_load_dwordx4 s[40:43], s[2:3], 0x40 ; C0940340 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900 buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00 s_buffer_load_dword s37, s[20:23], 0x2c ; C212952C s_buffer_load_dword s38, s[20:23], 0x2d ; C213152D s_buffer_load_dword s39, s[20:23], 0x2e ; C213952E s_buffer_load_dword s0, s[20:23], 0x2f ; C200152F s_buffer_load_dword s44, s[20:23], 0x34 ; C2161534 s_buffer_load_dword s45, s[20:23], 0xd9 ; C21695D9 s_buffer_load_dword s46, s[20:23], 0xda ; C21715DA s_buffer_load_dword s47, s[20:23], 0xdb ; C21795DB s_buffer_load_dword s48, s[20:23], 0xdc ; C21815DC s_buffer_load_dword s49, s[20:23], 0xdd ; C21895DD s_buffer_load_dword s50, s[20:23], 0xc5 ; C21915C5 s_buffer_load_dword s51, s[20:23], 0xc6 ; C21995C6 s_buffer_load_dword s52, s[20:23], 0xc7 ; C21A15C7 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s0 ; 7E000200 s_buffer_load_dword s0, s[20:23], 0xc8 ; C20015C8 s_buffer_load_dword s53, s[20:23], 0xd8 ; C21A95D8 s_buffer_load_dword s54, s[20:23], 0xde ; C21B15DE s_buffer_load_dword s55, s[20:23], 0xdf ; C21B95DF s_buffer_load_dword s56, s[20:23], 0xe0 ; C21C15E0 s_buffer_load_dword s57, s[20:23], 0xe1 ; C21C95E1 s_buffer_load_dword s58, s[20:23], 0xe2 ; C21D15E2 s_buffer_load_dword s59, s[20:23], 0x22 ; C21D9522 s_buffer_load_dword s1, s[20:23], 0x23 ; C2009523 s_buffer_load_dword s60, s[20:23], 0x24 ; C21E1524 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_gt_f32_e64 s[2:3], |s0|, -|s0| ; D0080302 40000000 s_waitcnt vmcnt(0) ; BF8C0770 v_cndmask_b32_e64 v16, 0, 1.0, s[2:3] ; D2000010 0009E480 s_buffer_load_dword s61, s[20:23], 0x25 ; C21E9525 s_buffer_load_dword s62, s[20:23], 0x26 ; C21F1526 s_buffer_load_dword s63, s[20:23], 0xc0 ; C21F95C0 s_buffer_load_dword s64, s[20:23], 0xc1 ; C22015C1 s_buffer_load_dword s65, s[20:23], 0xc2 ; C22095C2 s_buffer_load_dword s66, s[20:23], 0xc3 ; C22115C3 v_mov_b32_e32 v17, s1 ; 7E220201 s_buffer_load_dword s67, s[20:23], 0xc4 ; C22195C4 s_buffer_load_dword s1, s[20:23], 0x0 ; C2009500 s_buffer_load_dword s6, s[20:23], 0x1 ; C2031501 s_buffer_load_dword s0, s[20:23], 0x2 ; C2001502 s_buffer_load_dword s68, s[20:23], 0x20 ; C2221520 s_buffer_load_dword s69, s[20:23], 0x21 ; C2229521 s_buffer_load_dword s70, s[20:23], 0xe3 ; C22315E3 s_buffer_load_dword s12, s[20:23], 0xe4 ; C20615E4 s_buffer_load_dword s10, s[20:23], 0xe5 ; C20515E5 s_buffer_load_dword s2, s[20:23], 0x27 ; C2011527 s_buffer_load_dword s71, s[20:23], 0x28 ; C2239528 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v18, s1 ; 7E240201 s_buffer_load_dword s72, s[20:23], 0x29 ; C2241529 s_buffer_load_dword s28, s[20:23], 0x2a ; C20E152A s_buffer_load_dword s3, s[20:23], 0x2b ; C201952B s_buffer_load_dword s13, s[40:43], 0x0 ; C206A900 s_buffer_load_dword s23, s[40:43], 0x1 ; C20BA901 s_buffer_load_dword s4, s[40:43], 0x2 ; C2022902 s_buffer_load_dword s1, s[40:43], 0x3 ; C200A903 s_buffer_load_dword s16, s[40:43], 0x4 ; C2082904 v_mov_b32_e32 v19, s2 ; 7E260202 s_buffer_load_dword s26, s[40:43], 0x5 ; C20D2905 s_buffer_load_dword s8, s[40:43], 0x6 ; C2042906 s_buffer_load_dword s2, s[40:43], 0x7 ; C2012907 s_buffer_load_dword s19, s[40:43], 0x8 ; C209A908 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v20, s3 ; 7E280203 s_buffer_load_dword s30, s[40:43], 0x9 ; C20F2909 s_buffer_load_dword s11, s[40:43], 0xa ; C205A90A s_buffer_load_dword s3, s[40:43], 0xb ; C201A90B s_buffer_load_dword s22, s[40:43], 0xc ; C20B290C s_buffer_load_dword s32, s[40:43], 0xd ; C210290D s_buffer_load_dword s15, s[40:43], 0xe ; C207A90E s_buffer_load_dword s5, s[40:43], 0xf ; C202A90F s_buffer_load_dword s25, s[40:43], 0x10 ; C20CA910 s_buffer_load_dword s33, s[40:43], 0x11 ; C210A911 s_buffer_load_dword s18, s[40:43], 0x12 ; C2092912 s_buffer_load_dword s7, s[40:43], 0x13 ; C203A913 s_buffer_load_dword s27, s[40:43], 0x14 ; C20DA914 s_buffer_load_dword s34, s[40:43], 0x15 ; C2112915 s_buffer_load_dword s20, s[40:43], 0x16 ; C20A2916 s_buffer_load_dword s9, s[40:43], 0x17 ; C204A917 s_buffer_load_dword s29, s[40:43], 0x18 ; C20EA918 s_buffer_load_dword s35, s[40:43], 0x19 ; C211A919 s_buffer_load_dword s21, s[40:43], 0x1a ; C20AA91A s_buffer_load_dword s14, s[40:43], 0x1b ; C207291B s_buffer_load_dword s31, s[40:43], 0x1c ; C20FA91C s_buffer_load_dword s36, s[40:43], 0x1d ; C212291D s_buffer_load_dword s24, s[40:43], 0x1e ; C20C291E s_buffer_load_dword s17, s[40:43], 0x1f ; C208A91F v_add_f32_e32 v6, v6, v6 ; 060C0D06 v_mul_f32_e32 v21, s64, v10 ; 102A1440 v_mul_f32_e32 v10, s50, v10 ; 10141432 v_mad_f32 v2, s44, v13, v2 ; D2820002 040A1A2C v_mad_f32 v3, s44, v14, v3 ; D2820003 040E1C2C v_mad_f32 v4, s44, v15, v4 ; D2820004 04121E2C v_mac_f32_e32 v21, s63, v9 ; 3E2A123F v_mac_f32_e32 v10, s67, v9 ; 3E141243 v_mul_f32_e32 v9, s45, v3 ; 1012062D v_mac_f32_e32 v21, s65, v11 ; 3E2A1641 v_mac_f32_e32 v10, s51, v11 ; 3E141633 v_mac_f32_e32 v21, s66, v12 ; 3E2A1842 v_mac_f32_e32 v10, s52, v12 ; 3E141834 v_mul_f32_e32 v11, s49, v3 ; 10160631 v_mul_f32_e32 v3, s57, v3 ; 10060639 v_mac_f32_e32 v9, s53, v2 ; 3E120435 v_mac_f32_e32 v11, s48, v2 ; 3E160430 v_mac_f32_e32 v3, s56, v2 ; 3E060438 v_mac_f32_e32 v9, s46, v4 ; 3E12082E v_mac_f32_e32 v11, s54, v4 ; 3E160836 v_mac_f32_e32 v3, s58, v4 ; 3E06083A v_mac_f32_e32 v9, s47, v5 ; 3E120A2F v_mac_f32_e32 v11, s55, v5 ; 3E160A37 v_mac_f32_e32 v3, s70, v5 ; 3E060A46 v_mul_f32_e32 v2, s69, v11 ; 10041645 v_mac_f32_e32 v2, s68, v9 ; 3E041244 v_mul_f32_e32 v4, s61, v11 ; 1008163D v_mac_f32_e32 v4, s60, v9 ; 3E08123C v_mul_f32_e32 v5, s38, v11 ; 100A1626 v_mul_f32_e32 v12, s72, v11 ; 10181648 v_mac_f32_e32 v5, s37, v9 ; 3E0A1225 v_mac_f32_e32 v12, s71, v9 ; 3E181247 v_mac_f32_e32 v2, s59, v3 ; 3E04063B v_mac_f32_e32 v4, s62, v3 ; 3E08063E v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_add_f32_e32 v7, v7, v7 ; 060E0F07 v_add_f32_e32 v8, v8, v8 ; 06101108 v_log_f32_e32 v6, v6 ; 7E0C4F06 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_log_f32_e32 v8, v8 ; 7E104F08 exp 15, 32, 0, 0, 0, v21, v10, v18, v18 ; F800020F 12120A15 v_mul_f32_e32 v6, v1, v6 ; 100C0D01 v_mul_f32_e32 v7, v1, v7 ; 100E0F01 v_mul_f32_e32 v1, v1, v8 ; 10021101 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_f32_e32 v6, v6, v16 ; 100C2106 v_mul_f32_e32 v7, v7, v16 ; 100E2107 v_mac_f32_e32 v12, s28, v3 ; 3E18061C v_mac_f32_e32 v2, s6, v17 ; 3E042206 v_mac_f32_e32 v4, s6, v19 ; 3E082606 v_mac_f32_e32 v5, s6, v0 ; 3E0A0006 v_mac_f32_e32 v12, s6, v20 ; 3E182806 v_mad_f32 v0, s12, v5, v2 ; D2820000 040A0A0C v_mad_f32 v8, s10, v5, -v4 ; D2820008 84120A0A s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v10, s23, v4 ; 10140817 v_mul_f32_e32 v13, s26, v4 ; 101A081A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v14, s30, v4 ; 101C081E v_mul_f32_e32 v15, s32, v4 ; 101E0820 v_mul_f32_e32 v17, s33, v4 ; 10220821 v_mul_f32_e32 v19, s34, v4 ; 10260822 v_mul_f32_e32 v20, s35, v4 ; 10280823 v_mul_f32_e32 v4, s36, v4 ; 10080824 v_mac_f32_e32 v10, s13, v2 ; 3E14040D v_mac_f32_e32 v13, s16, v2 ; 3E1A0410 v_mac_f32_e32 v14, s19, v2 ; 3E1C0413 v_mac_f32_e32 v15, s22, v2 ; 3E1E0416 v_mac_f32_e32 v17, s25, v2 ; 3E220419 v_mac_f32_e32 v19, s27, v2 ; 3E26041B v_mac_f32_e32 v20, s29, v2 ; 3E28041D v_mac_f32_e32 v4, s31, v2 ; 3E08041F v_mac_f32_e32 v10, s4, v12 ; 3E141804 v_mac_f32_e32 v13, s8, v12 ; 3E1A1808 v_mac_f32_e32 v14, s11, v12 ; 3E1C180B v_mac_f32_e32 v15, s15, v12 ; 3E1E180F v_mac_f32_e32 v17, s18, v12 ; 3E221812 v_mac_f32_e32 v19, s20, v12 ; 3E261814 v_mac_f32_e32 v20, s21, v12 ; 3E281815 v_mac_f32_e32 v4, s24, v12 ; 3E081818 v_mac_f32_e32 v10, s1, v5 ; 3E140A01 v_mac_f32_e32 v13, s2, v5 ; 3E1A0A02 v_mac_f32_e32 v14, s3, v5 ; 3E1C0A03 v_mac_f32_e32 v15, s5, v5 ; 3E1E0A05 v_mac_f32_e32 v17, s7, v5 ; 3E220A07 v_mac_f32_e32 v19, s9, v5 ; 3E260A09 v_mac_f32_e32 v20, s14, v5 ; 3E280A0E v_mac_f32_e32 v4, s17, v5 ; 3E080A11 v_mad_f32 v2, v12, s0, -v5 ; D2820002 8414010C v_mul_f32_e32 v1, v1, v16 ; 10022101 exp 15, 33, 0, 0, 0, v6, v7, v1, v18 ; F800021F 12010706 exp 15, 34, 0, 0, 0, v9, v11, v3, v12 ; F800022F 0C030B09 exp 15, 12, 0, 0, 0, v0, v8, v2, v5 ; F80000CF 05020800 exp 15, 13, 0, 0, 0, v10, v13, v14, v15 ; F80000DF 0F0E0D0A exp 15, 14, 0, 1, 0, v17, v19, v20, v4 ; F80008EF 04141311 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 24 Code Size: 872 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..30] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { -1.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: ADD TEMP[1].x, TEMP[0].wwww, IMM[0].xxxx 3: MAD TEMP[1].x, CONST[20].wwww, TEMP[1].xxxx, IMM[0].yyyy 4: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1].wwww 5: MAD TEMP[2].x, TEMP[1].xxxx, IN[1].wwww, -TEMP[1].xxxx 6: MAD TEMP[1].x, CONST[12].wwww, TEMP[2].xxxx, TEMP[1].xxxx 7: ABS TEMP[3].x, CONST[12].yyyy 8: MUL TEMP[4].x, CONST[29].wwww, IN[2].wwww 9: FSGE TEMP[5].x, -TEMP[3].xxxx, IMM[0].zzzz 10: UIF TEMP[5].xxxx :0 11: MOV TEMP[5].x, TEMP[1].xxxx 12: ELSE :0 13: MOV TEMP[5].x, TEMP[4].xxxx 14: ENDIF 15: MOV TEMP[3].w, TEMP[5].xxxx 16: ADD TEMP[4].x, TEMP[0].wwww, CONST[12].xxxx 17: MOV_SAT TEMP[4].x, TEMP[4].xxxx 18: ADD TEMP[1].xyz, IMM[0].xxxx, CONST[1].xyzz 19: MAD TEMP[1].xyz, TEMP[4].xxxx, TEMP[1].xyzz, IMM[0].yyyy 20: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz 21: MUL TEMP[1].xyz, TEMP[0].xyzz, CONST[30].xxxx 22: MAD TEMP[0].xyz, TEMP[0].xyzz, -CONST[30].xxxx, CONST[29].xyzz 23: ADD TEMP[2].xyz, CONST[20].xyzz, -IN[2].xyzz 24: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz 25: SQRT TEMP[2].x, TEMP[2].xxxx 26: MAD TEMP[2].x, TEMP[2].xxxx, CONST[21].wwww, CONST[21].xxxx 27: MOV_SAT TEMP[2].x, TEMP[2].xxxx 28: MIN TEMP[2].x, TEMP[2].xxxx, CONST[21].zzzz 29: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 30: MAD TEMP[3].xyz, TEMP[2].xxxx, TEMP[0].xyzz, TEMP[1].xyzz 31: MOV OUT[0], TEMP[3] 32: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 192) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 196) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 204) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 324) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 328) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 332) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 336) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 344) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 348) %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 464) %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 468) %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 472) %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 476) %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 480) %44 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %45 = load <8 x i32>, <8 x i32> addrspace(2)* %44, align 32, !tbaa !0 %46 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %50 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %51 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %52 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %53 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %54 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %55 = bitcast float %48 to i32 %56 = bitcast float %49 to i32 %57 = insertelement <2 x i32> undef, i32 %55, i32 0 %58 = insertelement <2 x i32> %57, i32 %56, i32 1 %59 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %58, <8 x i32> %45, <4 x i32> %47, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %60 = extractelement <4 x float> %59, i32 0 %61 = extractelement <4 x float> %59, i32 1 %62 = extractelement <4 x float> %59, i32 2 %63 = extractelement <4 x float> %59, i32 3 %64 = fadd float %63, -1.000000e+00 %65 = fmul float %35, %64 %66 = fadd float %65, 1.000000e+00 %67 = fmul float %66, %28 %68 = fmul float %67, %50 %69 = fsub float %68, %67 %70 = fmul float %31, %69 %71 = fadd float %70, %67 %72 = call float @llvm.fabs.f32(float %30) %73 = fmul float %42, %54 %74 = fcmp ole float %72, -0.000000e+00 %. = select i1 %74, float %71, float %73 %75 = fadd float %63, %29 %76 = call float @llvm.AMDIL.clamp.(float %75, float 0.000000e+00, float 1.000000e+00) %77 = fadd float %25, -1.000000e+00 %78 = fadd float %26, -1.000000e+00 %79 = fadd float %27, -1.000000e+00 %80 = fmul float %76, %77 %81 = fadd float %80, 1.000000e+00 %82 = fmul float %76, %78 %83 = fadd float %82, 1.000000e+00 %84 = fmul float %76, %79 %85 = fadd float %84, 1.000000e+00 %86 = fmul float %60, %81 %87 = fmul float %61, %83 %88 = fmul float %62, %85 %89 = fmul float %86, %43 %90 = fmul float %87, %43 %91 = fmul float %88, %43 %92 = fmul float %43, %86 %93 = fsub float %39, %92 %94 = fmul float %43, %87 %95 = fsub float %40, %94 %96 = fmul float %43, %88 %97 = fsub float %41, %96 %98 = fsub float %32, %51 %99 = fsub float %33, %52 %100 = fsub float %34, %53 %101 = fmul float %98, %98 %102 = fmul float %99, %99 %103 = fadd float %102, %101 %104 = fmul float %100, %100 %105 = fadd float %103, %104 %106 = call float @llvm.sqrt.f32(float %105) %107 = fmul float %106, %38 %108 = fadd float %107, %36 %109 = call float @llvm.AMDIL.clamp.(float %108, float 0.000000e+00, float 1.000000e+00) %110 = call float @llvm.minnum.f32(float %109, float %37) %111 = fmul float %110, %110 %112 = fmul float %111, %93 %113 = fadd float %112, %89 %114 = fmul float %111, %95 %115 = fadd float %114, %90 %116 = fmul float %111, %97 %117 = fadd float %116, %91 %118 = call i32 @llvm.SI.packf16(float %113, float %115) %119 = bitcast i32 %118 to float %120 = call i32 @llvm.SI.packf16(float %117, float %.) %121 = bitcast i32 %120 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %119, float %121, float %119, float %121) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s11, s[0:3], 0x6 ; C2058106 s_buffer_load_dword s12, s[0:3], 0x7 ; C2060107 s_buffer_load_dword s13, s[0:3], 0x30 ; C2068130 s_buffer_load_dword s14, s[0:3], 0x31 ; C2070131 s_buffer_load_dword s15, s[0:3], 0x33 ; C2078133 s_buffer_load_dword s16, s[0:3], 0x50 ; C2080150 s_buffer_load_dword s17, s[0:3], 0x51 ; C2088151 s_buffer_load_dword s18, s[0:3], 0x52 ; C2090152 s_buffer_load_dword s19, s[0:3], 0x53 ; C2098153 s_buffer_load_dword s20, s[0:3], 0x54 ; C20A0154 s_buffer_load_dword s21, s[0:3], 0x56 ; C20A8156 s_buffer_load_dword s22, s[0:3], 0x57 ; C20B0157 s_buffer_load_dword s23, s[0:3], 0x74 ; C20B8174 s_buffer_load_dword s24, s[0:3], 0x75 ; C20C0175 s_buffer_load_dword s25, s[0:3], 0x76 ; C20C8176 s_buffer_load_dword s26, s[0:3], 0x77 ; C20D0177 s_buffer_load_dword s0, s[0:3], 0x78 ; C2000178 s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v0, v0, 3, 2, [m0] ; C8000B00 v_interp_p2_f32 v0, [v0], v1, 3, 2, [m0] ; C8010B01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[4:7] ; F0800F00 00270802 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v11, s19, -s19 ; D2820001 804C270B v_mad_f32 v1, v1, s12, s12 ; D2820001 00301901 v_mad_f32 v2, v1, v4, -v1 ; D2820002 84060901 v_mac_f32_e32 v1, s15, v2 ; 3E02040F v_mul_f32_e32 v0, s26, v0 ; 1000001A v_mov_b32_e32 v2, 0x80000000 ; 7E0402FF 80000000 v_cmp_le_f32_e64 vcc, |s14|, v2 ; D006016A 0002040E v_cndmask_b32_e32 v0, v0, v1 ; 00000300 v_add_f32_e32 v1, s13, v11 ; 0602160D v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mad_f32 v2, s8, v1, -v1 ; D2820002 84060208 v_mad_f32 v3, s9, v1, -v1 ; D2820003 84060209 v_mad_f32 v1, s11, v1, -v1 ; D2820001 8406020B v_mad_f32 v2, v8, v2, v8 ; D2820002 04220508 v_mad_f32 v3, v9, v3, v9 ; D2820003 04260709 v_mad_f32 v1, v10, v1, v10 ; D2820001 042A030A v_mul_f32_e32 v4, s0, v2 ; 10080400 v_mul_f32_e32 v8, s0, v3 ; 10100600 v_mul_f32_e32 v9, s0, v1 ; 10120200 v_mov_b32_e32 v10, s23 ; 7E140217 v_mad_f32 v2, -v2, s0, v10 ; D2820002 24280102 v_mov_b32_e32 v10, s24 ; 7E140218 v_mad_f32 v3, -v3, s0, v10 ; D2820003 24280103 v_mov_b32_e32 v10, s25 ; 7E140219 v_mad_f32 v1, -v1, s0, v10 ; D2820001 24280101 v_sub_f32_e32 v5, s16, v5 ; 080A0A10 v_sub_f32_e32 v6, s17, v6 ; 080C0C11 v_sub_f32_e32 v7, s18, v7 ; 080E0E12 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06 v_mac_f32_e32 v5, v7, v7 ; 3E0A0F07 v_sqrt_f32_e32 v5, v5 ; 7E0A6705 v_mov_b32_e32 v6, s20 ; 7E0C0214 v_mac_f32_e32 v6, s22, v5 ; 3E0C0A16 v_add_f32_e64 v5, 0, v6 clamp ; D2060805 00020C80 v_min_f32_e32 v5, s21, v5 ; 1E0A0A15 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_mac_f32_e32 v4, v2, v5 ; 3E080B02 v_mac_f32_e32 v8, v3, v5 ; 3E100B03 v_mac_f32_e32 v9, v1, v5 ; 3E120B01 v_cvt_pkrtz_f16_f32_e32 v1, v4, v8 ; 5E021104 v_cvt_pkrtz_f16_f32_e32 v0, v9, v0 ; 5E000109 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 416 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6) %24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6) %25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6) %26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %23, float %24, float %25, float %26) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s10 ; BEFC030A v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 32 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL OUT[1], POSITION DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxy 1: TEX OUT[1].z, IN[0], SAMP[0], 2D 2: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %32, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %34 = extractelement <4 x float> %33, i32 2 call void @llvm.SI.export(i32 1, i32 0, i32 0, i32 8, i32 0, float %34, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_mov_b32_e32 v0, 0 ; 7E000280 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 4, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800400 00030102 s_waitcnt vmcnt(0) ; BF8C0770 exp 1, 8, 0, 0, 0, v1, v0, v0, v0 ; F8000081 00000001 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 0, 0, 1, 1, v0, v0, v0, v1 ; F800180F 01000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 3D, FLOAT 0: TEX OUT[0], IN[0], SAMP[0], 3D 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %12) %30 = bitcast float %27 to i32 %31 = bitcast float %28 to i32 %32 = bitcast float %29 to i32 %33 = insertelement <4 x i32> undef, i32 %30, i32 0 %34 = insertelement <4 x i32> %33, i32 %31, i32 1 %35 = insertelement <4 x i32> %34, i32 %32, i32 2 %36 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %35, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %37 = extractelement <4 x float> %36, i32 0 %38 = extractelement <4 x float> %36, i32 1 %39 = extractelement <4 x float> %36, i32 2 %40 = extractelement <4 x float> %36, i32 3 %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float %43 = call i32 @llvm.SI.packf16(float %39, float %40) %44 = bitcast i32 %43 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 76 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[1], IMM[0].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %14, %34 %39 = fmul float %15, %34 %40 = fmul float %16, %34 %41 = fmul float %17, %34 %42 = fmul float %18, %35 %43 = fadd float %42, %38 %44 = fmul float %19, %35 %45 = fadd float %44, %39 %46 = fmul float %20, %35 %47 = fadd float %46, %40 %48 = fmul float %21, %35 %49 = fadd float %48, %41 %50 = fmul float %22, %36 %51 = fadd float %50, %43 %52 = fmul float %23, %36 %53 = fadd float %52, %45 %54 = fmul float %24, %36 %55 = fadd float %54, %47 %56 = fmul float %25, %36 %57 = fadd float %56, %49 %58 = fmul float %26, %37 %59 = fadd float %58, %51 %60 = fmul float %27, %37 %61 = fadd float %60, %53 %62 = fmul float %28, %37 %63 = fadd float %62, %55 %64 = fmul float %29, %37 %65 = fadd float %64, %57 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %61, float %63, float %65) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v0 ; 10080004 v_mul_f32_e32 v5, s5, v0 ; 100A0005 v_mul_f32_e32 v6, s6, v0 ; 100C0006 v_mul_f32_e32 v0, s7, v0 ; 10000007 v_mac_f32_e32 v4, s8, v1 ; 3E080208 v_mac_f32_e32 v5, s9, v1 ; 3E0A0209 v_mac_f32_e32 v6, s10, v1 ; 3E0C020A v_mac_f32_e32 v0, s11, v1 ; 3E00020B v_mac_f32_e32 v4, s12, v2 ; 3E08040C v_mac_f32_e32 v5, s13, v2 ; 3E0A040D v_mac_f32_e32 v6, s14, v2 ; 3E0C040E v_mac_f32_e32 v0, s15, v2 ; 3E00040F v_mac_f32_e32 v4, s16, v3 ; 3E080610 v_mac_f32_e32 v5, s17, v3 ; 3E0A0611 v_mac_f32_e32 v6, s18, v3 ; 3E0C0612 v_mac_f32_e32 v0, s0, v3 ; 3E000600 v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 exp 15, 12, 0, 1, 0, v4, v5, v6, v0 ; F80008CF 00060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 184 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 3D, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzz 1: TEX TEMP[0], TEMP[0], SAMP[0], 3D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = bitcast float %27 to i32 %31 = bitcast float %28 to i32 %32 = bitcast float %29 to i32 %33 = insertelement <4 x i32> undef, i32 %30, i32 0 %34 = insertelement <4 x i32> %33, i32 %31, i32 1 %35 = insertelement <4 x i32> %34, i32 %32, i32 2 %36 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %35, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %37 = extractelement <4 x float> %36, i32 0 %38 = extractelement <4 x float> %36, i32 1 %39 = extractelement <4 x float> %36, i32 2 %40 = extractelement <4 x float> %36, i32 3 %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float %43 = call i32 @llvm.SI.packf16(float %39, float %40) %44 = bitcast i32 %43 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 76 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** sh: ifconfig: Kommando nicht gefunden. SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[0] 2: DP4 TEMP[1].x, IN[1], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[1], CONST[2] 5: DP4 TEMP[2].x, IN[1], CONST[3] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], IN[0] 9: MOV OUT[0], TEMP[0] 10: MOV OUT[2], TEMP[1] 11: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %14 %47 = fmul float %43, %15 %48 = fadd float %46, %47 %49 = fmul float %44, %16 %50 = fadd float %48, %49 %51 = fmul float %45, %17 %52 = fadd float %50, %51 %53 = fmul float %42, %18 %54 = fmul float %43, %19 %55 = fadd float %53, %54 %56 = fmul float %44, %20 %57 = fadd float %55, %56 %58 = fmul float %45, %21 %59 = fadd float %57, %58 %60 = fmul float %42, %22 %61 = fmul float %43, %23 %62 = fadd float %60, %61 %63 = fmul float %44, %24 %64 = fadd float %62, %63 %65 = fmul float %45, %25 %66 = fadd float %64, %65 %67 = fmul float %42, %26 %68 = fmul float %43, %27 %69 = fadd float %67, %68 %70 = fmul float %44, %28 %71 = fadd float %69, %70 %72 = fmul float %45, %29 %73 = fadd float %71, %72 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %34, float %35, float %36, float %37) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %66, float %73, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %52, float %59, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107 s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108 s_buffer_load_dword s16, s[0:3], 0x9 ; C2080109 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_buffer_load_dword s5, s[0:3], 0xb ; C202810B s_buffer_load_dword s6, s[0:3], 0xc ; C203010C s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s12, s[0:3], 0xe ; C206010E s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s17, s[0:3], 0x3 ; C2088103 s_buffer_load_dword s18, s[0:3], 0x4 ; C2090104 s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s14, v7 ; 10000E0E s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s8, v7 ; 10040E08 v_mul_f32_e32 v3, s16, v7 ; 10060E10 v_mul_f32_e32 v4, s7, v7 ; 10080E07 v_mac_f32_e32 v0, s13, v6 ; 3E000C0D v_mac_f32_e32 v2, s18, v6 ; 3E040C12 v_mac_f32_e32 v3, s11, v6 ; 3E060C0B v_mac_f32_e32 v4, s6, v6 ; 3E080C06 v_mac_f32_e32 v0, s15, v8 ; 3E00100F v_mac_f32_e32 v2, s9, v8 ; 3E041009 v_mac_f32_e32 v3, s4, v8 ; 3E061004 v_mac_f32_e32 v4, s12, v8 ; 3E08100C v_mac_f32_e32 v0, s17, v9 ; 3E001211 v_mac_f32_e32 v2, s10, v9 ; 3E04120A v_mac_f32_e32 v3, s5, v9 ; 3E061205 v_mac_f32_e32 v4, s0, v9 ; 3E081200 exp 15, 33, 0, 0, 0, v3, v4, v0, v0 ; F800021F 00000403 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 12, 0, 1, 0, v0, v2, v3, v1 ; F80008CF 01030200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 216 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xyz, TEMP[0].xyzx 3: MUL TEMP[0].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[1].w, TEMP[0].xxxx 5: MOV OUT[0], TEMP[1] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %30 = bitcast float %28 to i32 %31 = bitcast float %29 to i32 %32 = insertelement <2 x i32> undef, i32 %30, i32 0 %33 = insertelement <2 x i32> %32, i32 %31, i32 1 %34 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %33, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = fmul float %38, %27 %40 = call i32 @llvm.SI.packf16(float %35, float %36) %41 = bitcast i32 %40 to float %42 = call i32 @llvm.SI.packf16(float %37, float %39) %43 = bitcast i32 %42 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %41, float %43, float %41, float %43) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[12:19], s[0:3] ; F0800F00 00030303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[2] 2: DP4 TEMP[1].x, IN[1], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[1], CONST[4] 5: DP4 TEMP[2].x, IN[1], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], IN[0] 9: MOV OUT[2], CONST[0] 10: MOV OUT[3], CONST[1] 11: MOV OUT[0], TEMP[0] 12: MOV OUT[4], TEMP[1] 13: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 64) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 68) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 72) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 76) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 92) %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %8 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = fmul float %50, %22 %55 = fmul float %51, %23 %56 = fadd float %54, %55 %57 = fmul float %52, %24 %58 = fadd float %56, %57 %59 = fmul float %53, %25 %60 = fadd float %58, %59 %61 = fmul float %50, %26 %62 = fmul float %51, %27 %63 = fadd float %61, %62 %64 = fmul float %52, %28 %65 = fadd float %63, %64 %66 = fmul float %53, %29 %67 = fadd float %65, %66 %68 = fmul float %50, %30 %69 = fmul float %51, %31 %70 = fadd float %68, %69 %71 = fmul float %52, %32 %72 = fadd float %70, %71 %73 = fmul float %53, %33 %74 = fadd float %72, %73 %75 = fmul float %50, %34 %76 = fmul float %51, %35 %77 = fadd float %75, %76 %78 = fmul float %52, %36 %79 = fadd float %77, %78 %80 = fmul float %53, %37 %81 = fadd float %79, %80 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %42, float %43, float %44, float %45) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %74, float %81, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %67, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_buffer_load_dword s9, s[0:3], 0x10 ; C2048110 s_buffer_load_dword s10, s[0:3], 0x11 ; C2050111 s_buffer_load_dword s11, s[0:3], 0x12 ; C2058112 s_buffer_load_dword s16, s[0:3], 0x13 ; C2080113 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s12, s[0:3], 0x9 ; C2060109 s_buffer_load_dword s13, s[0:3], 0xa ; C206810A s_buffer_load_dword s14, s[0:3], 0xb ; C207010B s_buffer_load_dword s15, s[0:3], 0x0 ; C2078100 s_buffer_load_dword s17, s[0:3], 0x1 ; C2088101 s_buffer_load_dword s18, s[0:3], 0x2 ; C2090102 s_buffer_load_dword s19, s[0:3], 0x3 ; C2098103 s_buffer_load_dword s20, s[0:3], 0x4 ; C20A0104 s_buffer_load_dword s21, s[0:3], 0xd ; C20A810D s_buffer_load_dword s22, s[0:3], 0xc ; C20B010C s_buffer_load_dword s23, s[0:3], 0x14 ; C20B8114 s_buffer_load_dword s24, s[0:3], 0x15 ; C20C0115 s_buffer_load_dword s25, s[0:3], 0xe ; C20C810E s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s15 ; 7E00020F v_mov_b32_e32 v10, s17 ; 7E140211 v_mov_b32_e32 v11, s18 ; 7E160212 s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v2, s19 ; 7E040213 v_mov_b32_e32 v3, s20 ; 7E060214 v_mov_b32_e32 v4, s4 ; 7E080204 v_mov_b32_e32 v5, s5 ; 7E0A0205 exp 15, 33, 0, 0, 0, v0, v10, v11, v2 ; F800021F 020B0A00 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, s6 ; 7E000206 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, s12, v7 ; 10040E0C v_mul_f32_e32 v10, s21, v7 ; 10140E15 v_mul_f32_e32 v11, s10, v7 ; 10160E0A exp 15, 34, 0, 0, 0, v3, v4, v5, v0 ; F800022F 00050403 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s24, v7 ; 10000E18 v_mac_f32_e32 v2, s7, v6 ; 3E040C07 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s9, v6 ; 3E160C09 v_mac_f32_e32 v0, s23, v6 ; 3E000C17 v_mac_f32_e32 v2, s13, v8 ; 3E04100D v_mac_f32_e32 v10, s25, v8 ; 3E141019 v_mac_f32_e32 v11, s11, v8 ; 3E16100B v_mac_f32_e32 v0, s26, v8 ; 3E00101A v_mac_f32_e32 v2, s14, v9 ; 3E04120E v_mac_f32_e32 v10, s8, v9 ; 3E141208 v_mac_f32_e32 v11, s16, v9 ; 3E161210 v_mac_f32_e32 v0, s0, v9 ; 3E001200 exp 15, 35, 0, 0, 0, v11, v0, v0, v0 ; F800023F 0000000B s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v2, v10, v0, v1 ; F80008CF 01000A02 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 308 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[2], IN[1] 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %36 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %37 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %38 = bitcast float %36 to i32 %39 = bitcast float %37 to i32 %40 = insertelement <2 x i32> undef, i32 %38, i32 0 %41 = insertelement <2 x i32> %40, i32 %39, i32 1 %42 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %41, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = fmul float %43, %32 %48 = fadd float %47, %28 %49 = fmul float %44, %33 %50 = fadd float %49, %29 %51 = fmul float %45, %34 %52 = fadd float %51, %30 %53 = fmul float %46, %35 %54 = fadd float %53, %31 %55 = fmul float %54, %27 %56 = call i32 @llvm.SI.packf16(float %48, float %50) %57 = bitcast i32 %56 to float %58 = call i32 @llvm.SI.packf16(float %52, float %55) %59 = bitcast i32 %58 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %57, float %59, float %57, float %59) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v3, v7, v11 ; 3E061707 v_mac_f32_e32 v4, v8, v12 ; 3E081908 v_mac_f32_e32 v5, v9, v13 ; 3E0A1B09 v_mac_f32_e32 v6, v10, v14 ; 3E0C1D0A v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..3] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[2] 2: DP4 TEMP[1].x, IN[2], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV OUT[2], IN[1] 5: MOV OUT[3], CONST[0] 6: MOV OUT[1], IN[0] 7: MOV OUT[4], CONST[1] 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %8 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = fmul float %50, %22 %55 = fmul float %51, %23 %56 = fadd float %54, %55 %57 = fmul float %52, %24 %58 = fadd float %56, %57 %59 = fmul float %53, %25 %60 = fadd float %58, %59 %61 = fmul float %50, %26 %62 = fmul float %51, %27 %63 = fadd float %61, %62 %64 = fmul float %52, %28 %65 = fadd float %63, %64 %66 = fmul float %53, %29 %67 = fadd float %65, %66 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %34, float %35, float %36, float %37) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %42, float %43, float %44, float %45) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %67, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107 s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108 s_buffer_load_dword s20, s[0:3], 0x9 ; C20A0109 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_buffer_load_dword s5, s[0:3], 0xb ; C202810B s_buffer_load_dword s6, s[0:3], 0xc ; C203010C s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s12, s[0:3], 0xe ; C206010E s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s16, s[0:3], 0x3 ; C2080103 s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s8 ; 7E000208 v_mov_b32_e32 v14, s13 ; 7E1C020D v_mov_b32_e32 v15, s14 ; 7E1E020E v_mov_b32_e32 v16, s15 ; 7E20020F v_mov_b32_e32 v17, s16 ; 7E220210 v_mov_b32_e32 v18, s17 ; 7E240211 s_waitcnt vmcnt(2) ; BF8C0772 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 33, 0, 0, 0, v6, v7, v8, v9 ; F800021F 09080706 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v2, s9 ; 7E040209 v_mov_b32_e32 v3, s10 ; 7E06020A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, s20, v11 ; 10081614 exp 15, 34, 0, 0, 0, v14, v15, v16, v17 ; F800022F 11100F0E v_mul_f32_e32 v5, s7, v11 ; 100A1607 v_mac_f32_e32 v4, s11, v10 ; 3E08140B v_mac_f32_e32 v5, s6, v10 ; 3E0A1406 v_mac_f32_e32 v4, s4, v12 ; 3E081804 v_mac_f32_e32 v5, s12, v12 ; 3E0A180C v_mac_f32_e32 v4, s5, v13 ; 3E081A05 v_mac_f32_e32 v5, s0, v13 ; 3E0A1A00 exp 15, 35, 0, 0, 0, v18, v0, v2, v3 ; F800023F 03020012 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v4, v5, v0, v1 ; F80008CF 01000504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 20 Code Size: 252 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MAD TEMP[0], IN[0], IN[3], IN[2] 1: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %24 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %25 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %27 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) %36 = fmul float %23, %32 %37 = fadd float %36, %28 %38 = fmul float %24, %33 %39 = fadd float %38, %29 %40 = fmul float %25, %34 %41 = fadd float %40, %30 %42 = fmul float %26, %35 %43 = fadd float %42, %31 %44 = fmul float %43, %27 %45 = call i32 @llvm.SI.packf16(float %37, float %39) %46 = bitcast i32 %45 to float %47 = call i32 @llvm.SI.packf16(float %41, float %44) %48 = bitcast i32 %47 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %46, float %48, float %46, float %48) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 2, 3, [m0] ; C8340E00 v_interp_p2_f32 v13, [v13], v1, 2, 3, [m0] ; C8350E01 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 v_mac_f32_e32 v7, v11, v2 ; 3E0E050B v_mac_f32_e32 v8, v12, v3 ; 3E10070C v_mac_f32_e32 v9, v13, v4 ; 3E12090D v_mac_f32_e32 v10, v0, v5 ; 3E140B00 v_mul_f32_e32 v0, v6, v10 ; 10001506 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 v_cvt_pkrtz_f16_f32_e32 v0, v9, v0 ; 5E000109 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 148 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL CONST[0..57] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 2.2000, 0.0000, 0.0000, 0.0000} 0: LG2 TEMP[0].x, IN[1].xxxx 1: LG2 TEMP[1].x, IN[1].yyyy 2: MOV TEMP[0].y, TEMP[1].xxxx 3: LG2 TEMP[1].x, IN[1].zzzz 4: MOV TEMP[0].z, TEMP[1].xxxx 5: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx 6: EX2 TEMP[1].x, TEMP[0].xxxx 7: EX2 TEMP[2].x, TEMP[0].yyyy 8: MOV TEMP[1].y, TEMP[2].xxxx 9: EX2 TEMP[2].x, TEMP[0].zzzz 10: MOV TEMP[1].z, TEMP[2].xxxx 11: DP4 TEMP[2].x, IN[2], CONST[48] 12: DP4 TEMP[3].x, IN[2], CONST[49] 13: MOV TEMP[2].y, TEMP[3].xxxx 14: MOV TEMP[0].w, CONST[0].yyyy 15: MOV TEMP[3].w, IN[0].wwww 16: MAD TEMP[3].xyz, IN[3].xyzz, CONST[13].xxxx, IN[0].xyzz 17: DP4 TEMP[0].x, TEMP[3], CONST[54] 18: DP4 TEMP[4].x, TEMP[3], CONST[55] 19: MOV TEMP[0].y, TEMP[4].xxxx 20: DP4 TEMP[3].x, TEMP[3], CONST[56] 21: MOV TEMP[0].z, TEMP[3].xxxx 22: DP4 TEMP[3].x, TEMP[0], CONST[8] 23: DP4 TEMP[4].x, TEMP[0], CONST[9] 24: MOV TEMP[3].y, TEMP[4].xxxx 25: DP4 TEMP[5].x, TEMP[0], CONST[11] 26: MOV TEMP[3].w, TEMP[5].xxxx 27: DP4 TEMP[6].x, TEMP[0], CONST[10] 28: MOV TEMP[0].w, TEMP[6].xxxx 29: MOV TEMP[3].z, TEMP[6].xxxx 30: MOV TEMP[2].zw, CONST[0].xxxx 31: MOV TEMP[1].w, IN[1].wwww 32: MOV TEMP[7], TEMP[3] 33: MAD TEMP[6].x, TEMP[6].xxxx, CONST[0].zzzz, -TEMP[5].xxxx 34: MOV TEMP[3].z, TEMP[6].xxxx 35: MOV TEMP[3].y, -TEMP[4].xxxx 36: MAD TEMP[3].xy, CONST[57].xyyy, TEMP[5].xxxx, TEMP[3].xyyy 37: MOV OUT[2], TEMP[2] 38: MOV OUT[3], CONST[0].xxxx 39: MOV OUT[0], TEMP[3] 40: MOV OUT[1], TEMP[7] 41: MOV OUT[4], TEMP[1] 42: MOV OUT[5], TEMP[0] 43: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 128) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 132) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 136) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 140) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 144) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 148) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 152) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 156) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 160) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 164) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 168) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 172) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 176) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 180) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 184) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 188) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 208) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 776) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %38 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %39 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %40 = call float @llvm.SI.load.const(<16 x i8> %13, i32 792) %41 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %42 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %43 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %44 = call float @llvm.SI.load.const(<16 x i8> %13, i32 872) %45 = call float @llvm.SI.load.const(<16 x i8> %13, i32 876) %46 = call float @llvm.SI.load.const(<16 x i8> %13, i32 880) %47 = call float @llvm.SI.load.const(<16 x i8> %13, i32 884) %48 = call float @llvm.SI.load.const(<16 x i8> %13, i32 888) %49 = call float @llvm.SI.load.const(<16 x i8> %13, i32 892) %50 = call float @llvm.SI.load.const(<16 x i8> %13, i32 896) %51 = call float @llvm.SI.load.const(<16 x i8> %13, i32 900) %52 = call float @llvm.SI.load.const(<16 x i8> %13, i32 904) %53 = call float @llvm.SI.load.const(<16 x i8> %13, i32 908) %54 = call float @llvm.SI.load.const(<16 x i8> %13, i32 912) %55 = call float @llvm.SI.load.const(<16 x i8> %13, i32 916) %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 %58 = add i32 %5, %8 %59 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %58) %60 = extractelement <4 x float> %59, i32 0 %61 = extractelement <4 x float> %59, i32 1 %62 = extractelement <4 x float> %59, i32 2 %63 = extractelement <4 x float> %59, i32 3 %64 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 %66 = add i32 %5, %8 %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %66) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = extractelement <4 x float> %67, i32 2 %71 = extractelement <4 x float> %67, i32 3 %72 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0 %74 = add i32 %5, %8 %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %74) %76 = extractelement <4 x float> %75, i32 0 %77 = extractelement <4 x float> %75, i32 1 %78 = extractelement <4 x float> %75, i32 2 %79 = extractelement <4 x float> %75, i32 3 %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %8 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = call float @llvm.log2.f32(float %68) %88 = call float @llvm.log2.f32(float %69) %89 = call float @llvm.log2.f32(float %70) %90 = fmul float %87, 0x40019999A0000000 %91 = fmul float %88, 0x40019999A0000000 %92 = fmul float %89, 0x40019999A0000000 %93 = call float @llvm.AMDIL.exp.(float %90) %94 = call float @llvm.AMDIL.exp.(float %91) %95 = call float @llvm.AMDIL.exp.(float %92) %96 = fmul float %76, %34 %97 = fmul float %77, %35 %98 = fadd float %96, %97 %99 = fmul float %78, %36 %100 = fadd float %98, %99 %101 = fmul float %79, %37 %102 = fadd float %100, %101 %103 = fmul float %76, %38 %104 = fmul float %77, %39 %105 = fadd float %103, %104 %106 = fmul float %78, %40 %107 = fadd float %105, %106 %108 = fmul float %79, %41 %109 = fadd float %107, %108 %110 = fmul float %84, %33 %111 = fadd float %110, %60 %112 = fmul float %85, %33 %113 = fadd float %112, %61 %114 = fmul float %86, %33 %115 = fadd float %114, %62 %116 = fmul float %111, %42 %117 = fmul float %113, %43 %118 = fadd float %116, %117 %119 = fmul float %115, %44 %120 = fadd float %118, %119 %121 = fmul float %63, %45 %122 = fadd float %120, %121 %123 = fmul float %111, %46 %124 = fmul float %113, %47 %125 = fadd float %123, %124 %126 = fmul float %115, %48 %127 = fadd float %125, %126 %128 = fmul float %63, %49 %129 = fadd float %127, %128 %130 = fmul float %111, %50 %131 = fmul float %113, %51 %132 = fadd float %130, %131 %133 = fmul float %115, %52 %134 = fadd float %132, %133 %135 = fmul float %63, %53 %136 = fadd float %134, %135 %137 = fmul float %122, %17 %138 = fmul float %129, %18 %139 = fadd float %137, %138 %140 = fmul float %136, %19 %141 = fadd float %139, %140 %142 = fmul float %15, %20 %143 = fadd float %141, %142 %144 = fmul float %122, %21 %145 = fmul float %129, %22 %146 = fadd float %144, %145 %147 = fmul float %136, %23 %148 = fadd float %146, %147 %149 = fmul float %15, %24 %150 = fadd float %148, %149 %151 = fmul float %122, %29 %152 = fmul float %129, %30 %153 = fadd float %151, %152 %154 = fmul float %136, %31 %155 = fadd float %153, %154 %156 = fmul float %15, %32 %157 = fadd float %155, %156 %158 = fmul float %122, %25 %159 = fmul float %129, %26 %160 = fadd float %158, %159 %161 = fmul float %136, %27 %162 = fadd float %160, %161 %163 = fmul float %15, %28 %164 = fadd float %162, %163 %165 = fmul float %164, %16 %166 = fsub float %165, %157 %167 = fmul float %54, %157 %168 = fadd float %167, %143 %169 = fmul float %55, %157 %170 = fsub float %169, %150 %171 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %172 = load <16 x i8>, <16 x i8> addrspace(2)* %171, align 16, !tbaa !0 %173 = call float @llvm.SI.load.const(<16 x i8> %172, i32 0) %174 = fmul float %173, %143 %175 = call float @llvm.SI.load.const(<16 x i8> %172, i32 4) %176 = fmul float %175, %150 %177 = fadd float %174, %176 %178 = call float @llvm.SI.load.const(<16 x i8> %172, i32 8) %179 = fmul float %178, %164 %180 = fadd float %177, %179 %181 = call float @llvm.SI.load.const(<16 x i8> %172, i32 12) %182 = fmul float %181, %157 %183 = fadd float %180, %182 %184 = call float @llvm.SI.load.const(<16 x i8> %172, i32 16) %185 = fmul float %184, %143 %186 = call float @llvm.SI.load.const(<16 x i8> %172, i32 20) %187 = fmul float %186, %150 %188 = fadd float %185, %187 %189 = call float @llvm.SI.load.const(<16 x i8> %172, i32 24) %190 = fmul float %189, %164 %191 = fadd float %188, %190 %192 = call float @llvm.SI.load.const(<16 x i8> %172, i32 28) %193 = fmul float %192, %157 %194 = fadd float %191, %193 %195 = call float @llvm.SI.load.const(<16 x i8> %172, i32 32) %196 = fmul float %195, %143 %197 = call float @llvm.SI.load.const(<16 x i8> %172, i32 36) %198 = fmul float %197, %150 %199 = fadd float %196, %198 %200 = call float @llvm.SI.load.const(<16 x i8> %172, i32 40) %201 = fmul float %200, %164 %202 = fadd float %199, %201 %203 = call float @llvm.SI.load.const(<16 x i8> %172, i32 44) %204 = fmul float %203, %157 %205 = fadd float %202, %204 %206 = call float @llvm.SI.load.const(<16 x i8> %172, i32 48) %207 = fmul float %206, %143 %208 = call float @llvm.SI.load.const(<16 x i8> %172, i32 52) %209 = fmul float %208, %150 %210 = fadd float %207, %209 %211 = call float @llvm.SI.load.const(<16 x i8> %172, i32 56) %212 = fmul float %211, %164 %213 = fadd float %210, %212 %214 = call float @llvm.SI.load.const(<16 x i8> %172, i32 60) %215 = fmul float %214, %157 %216 = fadd float %213, %215 %217 = call float @llvm.SI.load.const(<16 x i8> %172, i32 64) %218 = fmul float %217, %143 %219 = call float @llvm.SI.load.const(<16 x i8> %172, i32 68) %220 = fmul float %219, %150 %221 = fadd float %218, %220 %222 = call float @llvm.SI.load.const(<16 x i8> %172, i32 72) %223 = fmul float %222, %164 %224 = fadd float %221, %223 %225 = call float @llvm.SI.load.const(<16 x i8> %172, i32 76) %226 = fmul float %225, %157 %227 = fadd float %224, %226 %228 = call float @llvm.SI.load.const(<16 x i8> %172, i32 80) %229 = fmul float %228, %143 %230 = call float @llvm.SI.load.const(<16 x i8> %172, i32 84) %231 = fmul float %230, %150 %232 = fadd float %229, %231 %233 = call float @llvm.SI.load.const(<16 x i8> %172, i32 88) %234 = fmul float %233, %164 %235 = fadd float %232, %234 %236 = call float @llvm.SI.load.const(<16 x i8> %172, i32 92) %237 = fmul float %236, %157 %238 = fadd float %235, %237 %239 = call float @llvm.SI.load.const(<16 x i8> %172, i32 96) %240 = fmul float %239, %143 %241 = call float @llvm.SI.load.const(<16 x i8> %172, i32 100) %242 = fmul float %241, %150 %243 = fadd float %240, %242 %244 = call float @llvm.SI.load.const(<16 x i8> %172, i32 104) %245 = fmul float %244, %164 %246 = fadd float %243, %245 %247 = call float @llvm.SI.load.const(<16 x i8> %172, i32 108) %248 = fmul float %247, %157 %249 = fadd float %246, %248 %250 = call float @llvm.SI.load.const(<16 x i8> %172, i32 112) %251 = fmul float %250, %143 %252 = call float @llvm.SI.load.const(<16 x i8> %172, i32 116) %253 = fmul float %252, %150 %254 = fadd float %251, %253 %255 = call float @llvm.SI.load.const(<16 x i8> %172, i32 120) %256 = fmul float %255, %164 %257 = fadd float %254, %256 %258 = call float @llvm.SI.load.const(<16 x i8> %172, i32 124) %259 = fmul float %258, %157 %260 = fadd float %257, %259 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %102, float %109, float %14, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %14, float %14, float %14, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %93, float %94, float %95, float %71) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %122, float %129, float %136, float %164) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %168, float %170, float %166, float %157) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %183, float %194, float %205, float %216) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %227, float %238, float %249, float %260) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[52:55], s[2:3], 0x0 ; C09A0300 s_load_dwordx4 s[24:27], s[2:3], 0x40 ; C08C0340 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[32:35], s[8:9], 0xc ; C090090C v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s3, s[52:55], 0x2c ; C201B52C s_buffer_load_dword s5, s[52:55], 0x2d ; C202B52D s_buffer_load_dword s1, s[52:55], 0x2e ; C200B52E s_buffer_load_dword s0, s[52:55], 0x2f ; C200352F s_buffer_load_dword s28, s[52:55], 0x34 ; C20E3534 buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 buffer_load_format_xyzw v[5:8], v0, s[16:19], 0 idxen ; E00C2000 80040500 buffer_load_format_xyzw v[9:12], v0, s[20:23], 0 idxen ; E00C2000 80050900 buffer_load_format_xyzw v[13:16], v0, s[32:35], 0 idxen ; E00C2000 80080D00 s_buffer_load_dword s14, s[52:55], 0xda ; C20735DA s_buffer_load_dword s11, s[52:55], 0xdb ; C205B5DB s_buffer_load_dword s17, s[52:55], 0xdc ; C208B5DC s_buffer_load_dword s33, s[52:55], 0xdd ; C210B5DD s_buffer_load_dword s15, s[52:55], 0xde ; C207B5DE s_buffer_load_dword s45, s[52:55], 0xc5 ; C216B5C5 s_buffer_load_dword s32, s[52:55], 0xc6 ; C21035C6 s_buffer_load_dword s29, s[52:55], 0xc7 ; C20EB5C7 s_buffer_load_dword s22, s[52:55], 0xd8 ; C20B35D8 s_buffer_load_dword s40, s[52:55], 0xd9 ; C21435D9 s_buffer_load_dword s18, s[52:55], 0xdf ; C20935DF s_buffer_load_dword s30, s[52:55], 0xe0 ; C20F35E0 s_buffer_load_dword s41, s[52:55], 0xe1 ; C214B5E1 s_buffer_load_dword s21, s[52:55], 0xe2 ; C20AB5E2 s_buffer_load_dword s20, s[52:55], 0xe3 ; C20A35E3 s_buffer_load_dword s48, s[52:55], 0xc0 ; C21835C0 s_buffer_load_dword s51, s[52:55], 0xc1 ; C219B5C1 s_buffer_load_dword s44, s[52:55], 0xc2 ; C21635C2 s_buffer_load_dword s42, s[52:55], 0xc3 ; C21535C3 s_buffer_load_dword s49, s[52:55], 0xc4 ; C218B5C4 s_buffer_load_dword s13, s[52:55], 0x22 ; C206B522 s_buffer_load_dword s2, s[52:55], 0x23 ; C2013523 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s0 ; 7E000200 s_buffer_load_dword s19, s[52:55], 0x24 ; C209B524 s_buffer_load_dword s31, s[52:55], 0x25 ; C20FB525 s_buffer_load_dword s16, s[52:55], 0x26 ; C2083526 s_buffer_load_dword s0, s[52:55], 0x0 ; C2003500 s_buffer_load_dword s12, s[52:55], 0x1 ; C2063501 s_buffer_load_dword s9, s[52:55], 0x2 ; C204B502 s_buffer_load_dword s34, s[52:55], 0x20 ; C2113520 s_buffer_load_dword s38, s[52:55], 0x21 ; C2133521 s_buffer_load_dword s6, s[52:55], 0x27 ; C2033527 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v16, s2 ; 7E200202 s_buffer_load_dword s39, s[52:55], 0x28 ; C213B528 s_buffer_load_dword s47, s[52:55], 0x29 ; C217B529 s_buffer_load_dword s37, s[52:55], 0x2a ; C212B52A s_buffer_load_dword s50, s[52:55], 0x2b ; C219352B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v17, s0 ; 7E220200 s_buffer_load_dword s7, s[24:27], 0x0 ; C2039900 s_buffer_load_dword s23, s[24:27], 0x1 ; C20B9901 s_buffer_load_dword s4, s[24:27], 0x2 ; C2021902 s_buffer_load_dword s0, s[24:27], 0x3 ; C2001903 s_buffer_load_dword s8, s[24:27], 0x4 ; C2041904 v_mov_b32_e32 v18, s6 ; 7E240206 s_buffer_load_dword s35, s[24:27], 0x5 ; C2119905 s_buffer_load_dword s6, s[24:27], 0x6 ; C2031906 s_buffer_load_dword s2, s[24:27], 0x7 ; C2011907 s_buffer_load_dword s10, s[24:27], 0x8 ; C2051908 s_buffer_load_dword s36, s[24:27], 0x9 ; C2121909 s_buffer_load_dword s43, s[52:55], 0xe4 ; C215B5E4 s_buffer_load_dword s46, s[52:55], 0xe5 ; C21735E5 s_buffer_load_dword s52, s[24:27], 0xa ; C21A190A s_buffer_load_dword s53, s[24:27], 0xb ; C21A990B s_buffer_load_dword s54, s[24:27], 0xc ; C21B190C s_buffer_load_dword s55, s[24:27], 0xd ; C21B990D s_buffer_load_dword s56, s[24:27], 0xe ; C21C190E s_buffer_load_dword s57, s[24:27], 0xf ; C21C990F s_buffer_load_dword s58, s[24:27], 0x10 ; C21D1910 s_buffer_load_dword s59, s[24:27], 0x11 ; C21D9911 s_buffer_load_dword s60, s[24:27], 0x12 ; C21E1912 s_buffer_load_dword s61, s[24:27], 0x13 ; C21E9913 s_buffer_load_dword s62, s[24:27], 0x14 ; C21F1914 s_buffer_load_dword s63, s[24:27], 0x15 ; C21F9915 s_buffer_load_dword s64, s[24:27], 0x16 ; C2201916 s_buffer_load_dword s65, s[24:27], 0x17 ; C2209917 s_buffer_load_dword s66, s[24:27], 0x18 ; C2211918 s_buffer_load_dword s67, s[24:27], 0x19 ; C2219919 s_buffer_load_dword s68, s[24:27], 0x1a ; C222191A s_buffer_load_dword s69, s[24:27], 0x1b ; C222991B s_buffer_load_dword s70, s[24:27], 0x1c ; C223191C s_buffer_load_dword s71, s[24:27], 0x1d ; C223991D s_buffer_load_dword s72, s[24:27], 0x1e ; C224191E s_buffer_load_dword s24, s[24:27], 0x1f ; C20C191F v_mul_f32_e32 v19, s51, v10 ; 10261433 v_mul_f32_e32 v10, s45, v10 ; 1014142D v_mad_f32 v1, s28, v13, v1 ; D2820001 04061A1C v_mad_f32 v2, s28, v14, v2 ; D2820002 040A1C1C v_mad_f32 v3, s28, v15, v3 ; D2820003 040E1E1C v_mac_f32_e32 v19, s48, v9 ; 3E261230 v_mac_f32_e32 v10, s49, v9 ; 3E141231 v_mul_f32_e32 v9, s40, v2 ; 10120428 v_mul_f32_e32 v13, s33, v2 ; 101A0421 v_mul_f32_e32 v2, s41, v2 ; 10040429 v_mac_f32_e32 v19, s44, v11 ; 3E26162C v_mac_f32_e32 v10, s32, v11 ; 3E141620 v_mac_f32_e32 v19, s42, v12 ; 3E26182A v_mac_f32_e32 v10, s29, v12 ; 3E14181D v_mac_f32_e32 v9, s22, v1 ; 3E120216 v_mac_f32_e32 v13, s17, v1 ; 3E1A0211 v_mac_f32_e32 v2, s30, v1 ; 3E04021E v_mac_f32_e32 v9, s14, v3 ; 3E12060E v_mac_f32_e32 v13, s15, v3 ; 3E1A060F v_mac_f32_e32 v2, s21, v3 ; 3E040615 v_mac_f32_e32 v9, s11, v4 ; 3E12080B v_mac_f32_e32 v13, s18, v4 ; 3E1A0812 v_mac_f32_e32 v2, s20, v4 ; 3E040814 v_mov_b32_e32 v1, s50 ; 7E020232 v_mul_f32_e32 v3, s38, v13 ; 10061A26 v_mul_f32_e32 v4, s31, v13 ; 10081A1F v_mul_f32_e32 v11, s5, v13 ; 10161A05 v_mul_f32_e32 v12, s47, v13 ; 10181A2F v_mac_f32_e32 v3, s34, v9 ; 3E061222 v_mac_f32_e32 v4, s19, v9 ; 3E081213 v_mac_f32_e32 v11, s3, v9 ; 3E161203 v_mac_f32_e32 v12, s39, v9 ; 3E181227 v_mac_f32_e32 v3, s13, v2 ; 3E06040D v_mac_f32_e32 v4, s16, v2 ; 3E080410 v_mac_f32_e32 v11, s1, v2 ; 3E160401 v_mac_f32_e32 v12, s37, v2 ; 3E180425 v_mac_f32_e32 v3, s12, v16 ; 3E06200C v_mac_f32_e32 v4, s12, v18 ; 3E08240C v_mac_f32_e32 v11, s12, v0 ; 3E16000C v_mac_f32_e32 v12, s12, v1 ; 3E18020C v_mad_f32 v0, v12, s9, -v11 ; D2820000 842C130C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s23, v4 ; 10020817 v_mul_f32_e32 v14, s35, v4 ; 101C0823 v_mul_f32_e32 v15, s36, v4 ; 101E0824 v_mul_f32_e32 v16, s55, v4 ; 10200837 v_mul_f32_e32 v18, s59, v4 ; 1024083B v_mul_f32_e32 v20, s63, v4 ; 1028083F v_mul_f32_e32 v21, s67, v4 ; 102A0843 v_log_f32_e32 v5, v5 ; 7E0A4F05 v_log_f32_e32 v6, v6 ; 7E0C4F06 exp 15, 32, 0, 0, 0, v19, v10, v17, v17 ; F800020F 11110A13 v_log_f32_e32 v7, v7 ; 7E0E4F07 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v10, 0x400ccccd ; 7E1402FF 400CCCCD v_mul_f32_e32 v5, v10, v5 ; 100A0B0A v_mul_f32_e32 v6, v10, v6 ; 100C0D0A v_mul_f32_e32 v7, v10, v7 ; 100E0F0A v_mul_f32_e32 v10, s71, v4 ; 10140847 v_mac_f32_e32 v1, s7, v3 ; 3E020607 v_mac_f32_e32 v14, s8, v3 ; 3E1C0608 v_mac_f32_e32 v15, s10, v3 ; 3E1E060A v_mac_f32_e32 v16, s54, v3 ; 3E200636 v_mac_f32_e32 v18, s58, v3 ; 3E24063A v_mac_f32_e32 v20, s62, v3 ; 3E28063E v_mac_f32_e32 v21, s66, v3 ; 3E2A0642 v_mac_f32_e32 v10, s70, v3 ; 3E140646 v_mad_f32 v3, s43, v11, v3 ; D2820003 040E162B v_mad_f32 v4, s46, v11, -v4 ; D2820004 8412162E v_mac_f32_e32 v1, s4, v12 ; 3E021804 v_mac_f32_e32 v14, s6, v12 ; 3E1C1806 v_mac_f32_e32 v15, s52, v12 ; 3E1E1834 v_mac_f32_e32 v16, s56, v12 ; 3E201838 v_mac_f32_e32 v18, s60, v12 ; 3E24183C v_mac_f32_e32 v20, s64, v12 ; 3E281840 v_mac_f32_e32 v21, s68, v12 ; 3E2A1844 v_mac_f32_e32 v10, s72, v12 ; 3E141848 v_mac_f32_e32 v1, s0, v11 ; 3E021600 v_mac_f32_e32 v14, s2, v11 ; 3E1C1602 v_mac_f32_e32 v15, s53, v11 ; 3E1E1635 v_mac_f32_e32 v16, s57, v11 ; 3E201639 v_mac_f32_e32 v18, s61, v11 ; 3E24163D v_mac_f32_e32 v20, s65, v11 ; 3E281641 v_mac_f32_e32 v21, s69, v11 ; 3E2A1645 v_mac_f32_e32 v10, s24, v11 ; 3E141618 exp 15, 33, 0, 0, 0, v17, v17, v17, v17 ; F800021F 11111111 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_exp_f32_e32 v7, v7 ; 7E0E4B07 exp 15, 34, 0, 0, 0, v5, v6, v7, v8 ; F800022F 08070605 exp 15, 35, 0, 0, 0, v9, v13, v2, v12 ; F800023F 0C020D09 exp 15, 12, 0, 0, 0, v3, v4, v0, v11 ; F80000CF 0B000403 exp 15, 13, 0, 0, 0, v1, v14, v15, v16 ; F80000DF 100F0E01 exp 15, 14, 0, 1, 0, v18, v20, v21, v10 ; F80008EF 0A151412 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 24 Code Size: 824 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], SHADOW2D, FLOAT DCL CONST[0..90] DCL TEMP[0..15], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, -0.5000} IMM[1] FLT32 { -0.0000, -1.0000, -2.0000, 0.0625} IMM[2] FLT32 { 0.0005, 0.0000, -0.0005, 0.1250} IMM[3] FLT32 { 0.2500, 0.0000, -1.0000, -2.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: UIF CONST[90].xxxx :0 3: MAD TEMP[1], IN[3].xyzx, IMM[0].xxxy, IMM[0].yyyx 4: DP4 TEMP[2].x, TEMP[1], CONST[69] 5: DP4 TEMP[3].x, TEMP[1], CONST[70] 6: MOV TEMP[2].y, TEMP[3].xxxx 7: MOV_SAT TEMP[4].xy, TEMP[2].xyyy 8: ADD TEMP[4].xy, -TEMP[2].xyyy, TEMP[4].xyyy 9: DP2 TEMP[5].x, TEMP[4].xyyy, IMM[0].xxxx 10: DP4 TEMP[4].x, TEMP[1], CONST[73] 11: DP4 TEMP[6].x, TEMP[1], CONST[74] 12: MOV TEMP[4].y, TEMP[6].xxxx 13: MOV_SAT TEMP[7].xy, TEMP[4].xyyy 14: ADD TEMP[7].xy, -TEMP[4].xyyy, TEMP[7].xyyy 15: DP2 TEMP[8].x, TEMP[7].xyyy, IMM[0].xxxx 16: MOV TEMP[4].w, TEMP[8].xxxx 17: DP4 TEMP[7].x, TEMP[1], CONST[77] 18: DP4 TEMP[9].x, TEMP[1], CONST[78] 19: MOV TEMP[4].z, IMM[0].xxxx 20: MOV TEMP[10].w, TEMP[4] 21: ABS TEMP[11].x, TEMP[8].xxxx 22: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 23: UIF TEMP[11].xxxx :0 24: MOV TEMP[11].x, TEMP[4].xxxx 25: ELSE :0 26: MOV TEMP[11].x, TEMP[7].xxxx 27: ENDIF 28: MOV TEMP[10].x, TEMP[11].xxxx 29: ABS TEMP[11].x, TEMP[8].xxxx 30: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 31: UIF TEMP[11].xxxx :0 32: MOV TEMP[6].x, TEMP[6].xxxx 33: ELSE :0 34: MOV TEMP[6].x, TEMP[9].xxxx 35: ENDIF 36: MOV TEMP[10].y, TEMP[6].xxxx 37: ABS TEMP[6].x, TEMP[8].xxxx 38: FSGE TEMP[6].x, -TEMP[6].xxxx, IMM[0].yyyy 39: UIF TEMP[6].xxxx :0 40: MOV TEMP[6].x, IMM[0].xxxx 41: ELSE :0 42: MOV TEMP[6].x, IMM[0].zzzz 43: ENDIF 44: MOV TEMP[10].z, TEMP[6].xxxx 45: MOV TEMP[4].xyz, TEMP[10] 46: ABS TEMP[6].x, TEMP[5].xxxx 47: FSGE TEMP[6].x, -TEMP[6].xxxx, IMM[0].yyyy 48: UIF TEMP[6].xxxx :0 49: MOV TEMP[6].x, TEMP[2].xxxx 50: ELSE :0 51: MOV TEMP[6].x, TEMP[4].xxxx 52: ENDIF 53: MOV TEMP[10].x, TEMP[6].xxxx 54: ABS TEMP[6].x, TEMP[5].xxxx 55: FSGE TEMP[6].x, -TEMP[6].xxxx, IMM[0].yyyy 56: UIF TEMP[6].xxxx :0 57: MOV TEMP[3].x, TEMP[3].xxxx 58: ELSE :0 59: MOV TEMP[3].x, TEMP[4].yyyy 60: ENDIF 61: MOV TEMP[10].y, TEMP[3].xxxx 62: ABS TEMP[3].x, TEMP[5].xxxx 63: FSGE TEMP[3].x, -TEMP[3].xxxx, IMM[0].yyyy 64: UIF TEMP[3].xxxx :0 65: MOV TEMP[3].x, IMM[0].yyyy 66: ELSE :0 67: MOV TEMP[3].x, TEMP[4].zzzz 68: ENDIF 69: MOV TEMP[10].z, TEMP[3].xxxx 70: MOV TEMP[2].z, TEMP[10].xyzx 71: DP4 TEMP[5].x, TEMP[1], CONST[71] 72: MOV TEMP[4].z, TEMP[5].xxxx 73: ADD TEMP[7].xy, TEMP[10].xyyy, IMM[0].wwww 74: ABS TEMP[6].xy, TEMP[7].xyyy 75: ADD TEMP[7].xy, TEMP[6].xyyy, -CONST[67].zzzz 76: MUL TEMP[7].xy, TEMP[7].xyyy, CONST[67].wwww 77: MOV_SAT TEMP[6].xy, TEMP[7].xyyy 78: ADD TEMP[7].xy, -TEMP[6].xyyy, IMM[0].xxxx 79: MUL TEMP[6].x, TEMP[7].yyyy, TEMP[7].xxxx 80: MOV_SAT TEMP[8].xy, TEMP[10].xyyy 81: ADD TEMP[7].xyz, TEMP[3].xxxx, IMM[1].xyzz 82: MOV TEMP[3].y, IMM[0].yyyy 83: ABS TEMP[11].x, TEMP[7].xxxx 84: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 85: UIF TEMP[11].xxxx :0 86: MOV TEMP[11].x, CONST[85].zzzz 87: ELSE :0 88: MOV TEMP[11].x, IMM[0].yyyy 89: ENDIF 90: ABS TEMP[12].x, TEMP[7].xxxx 91: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 92: UIF TEMP[12].xxxx :0 93: MOV TEMP[12].x, CONST[85].wwww 94: ELSE :0 95: MOV TEMP[12].x, IMM[0].yyyy 96: ENDIF 97: MOV TEMP[10].y, TEMP[12].xxxx 98: ABS TEMP[12].x, TEMP[7].xxxx 99: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 100: UIF TEMP[12].xxxx :0 101: MOV TEMP[12].x, CONST[85].xxxx 102: ELSE :0 103: MOV TEMP[12].x, IMM[0].yyyy 104: ENDIF 105: MOV TEMP[10].z, TEMP[12].xxxx 106: ABS TEMP[12].x, TEMP[7].xxxx 107: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 108: UIF TEMP[12].xxxx :0 109: MOV TEMP[12].x, CONST[85].yyyy 110: ELSE :0 111: MOV TEMP[12].x, IMM[0].yyyy 112: ENDIF 113: MOV TEMP[10].w, TEMP[12].xxxx 114: ABS TEMP[12].x, TEMP[7].yyyy 115: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 116: UIF TEMP[12].xxxx :0 117: MOV TEMP[12].x, CONST[86].zzzz 118: ELSE :0 119: MOV TEMP[12].x, TEMP[11].xxxx 120: ENDIF 121: MOV TEMP[10].x, TEMP[12].xxxx 122: ABS TEMP[11].x, TEMP[7].yyyy 123: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 124: UIF TEMP[11].xxxx :0 125: MOV TEMP[11].x, CONST[86].wwww 126: ELSE :0 127: MOV TEMP[11].x, TEMP[10].yyyy 128: ENDIF 129: MOV TEMP[10].y, TEMP[11].xxxx 130: ABS TEMP[11].x, TEMP[7].yyyy 131: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 132: UIF TEMP[11].xxxx :0 133: MOV TEMP[11].x, CONST[86].xxxx 134: ELSE :0 135: MOV TEMP[11].x, TEMP[10].zzzz 136: ENDIF 137: MOV TEMP[10].z, TEMP[11].xxxx 138: ABS TEMP[11].x, TEMP[7].yyyy 139: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 140: UIF TEMP[11].xxxx :0 141: MOV TEMP[11].x, CONST[86].yyyy 142: ELSE :0 143: MOV TEMP[11].x, TEMP[10].wwww 144: ENDIF 145: MOV TEMP[10].w, TEMP[11].xxxx 146: MOV TEMP[9], TEMP[10] 147: ABS TEMP[11].x, TEMP[7].zzzz 148: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 149: UIF TEMP[11].xxxx :0 150: MOV TEMP[11].x, CONST[87].zzzz 151: ELSE :0 152: MOV TEMP[11].x, TEMP[9].xxxx 153: ENDIF 154: MOV TEMP[10].x, TEMP[11].xxxx 155: ABS TEMP[11].x, TEMP[7].zzzz 156: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 157: UIF TEMP[11].xxxx :0 158: MOV TEMP[11].x, CONST[87].wwww 159: ELSE :0 160: MOV TEMP[11].x, TEMP[9].yyyy 161: ENDIF 162: MOV TEMP[10].y, TEMP[11].xxxx 163: ABS TEMP[11].x, TEMP[7].zzzz 164: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 165: UIF TEMP[11].xxxx :0 166: MOV TEMP[11].x, CONST[87].xxxx 167: ELSE :0 168: MOV TEMP[11].x, TEMP[9].zzzz 169: ENDIF 170: MOV TEMP[10].z, TEMP[11].xxxx 171: ABS TEMP[11].x, TEMP[7].zzzz 172: FSGE TEMP[11].x, -TEMP[11].xxxx, IMM[0].yyyy 173: UIF TEMP[11].xxxx :0 174: MOV TEMP[11].x, CONST[87].yyyy 175: ELSE :0 176: MOV TEMP[11].x, TEMP[9].wwww 177: ENDIF 178: MOV TEMP[10].w, TEMP[11].xxxx 179: MAD TEMP[4].xy, TEMP[8].xyyy, TEMP[10].xyyy, TEMP[10].zwww 180: MOV TEMP[4].w, IMM[0].yyyy 181: ADD TEMP[7], TEMP[4], IMM[2].xxyy 182: TXL TEMP[8].x, TEMP[7], SAMP[1], SHADOW2D 183: MOV TEMP[7].x, TEMP[8].xxxx 184: ADD TEMP[9], TEMP[4], IMM[2].zxyy 185: ADD TEMP[8], TEMP[4], IMM[2].xzyy 186: ADD TEMP[11], TEMP[4], IMM[2].zzyy 187: TXL TEMP[12].x, TEMP[9], SAMP[1], SHADOW2D 188: MOV TEMP[7].y, TEMP[12].xxxx 189: TXL TEMP[12].x, TEMP[8], SAMP[1], SHADOW2D 190: MOV TEMP[7].z, TEMP[12].xxxx 191: TXL TEMP[12].x, TEMP[11], SAMP[1], SHADOW2D 192: MOV TEMP[7].w, TEMP[12].xxxx 193: DP4 TEMP[12].x, TEMP[7], IMM[1].wwww 194: ADD TEMP[7], TEMP[4], IMM[2].xyyy 195: TXL TEMP[13].x, TEMP[7], SAMP[1], SHADOW2D 196: MOV TEMP[7].x, TEMP[13].xxxx 197: ADD TEMP[9], TEMP[4], IMM[2].zyyy 198: TXL TEMP[13], TEMP[9], SAMP[1], SHADOW2D 199: MOV TEMP[9], TEMP[13] 200: ADD TEMP[8], TEMP[4], IMM[2].yzyy 201: TXL TEMP[14], TEMP[8], SAMP[1], SHADOW2D 202: MOV TEMP[8], TEMP[14] 203: ADD TEMP[11], TEMP[4], IMM[2].yxyy 204: TXL TEMP[15], TEMP[11], SAMP[1], SHADOW2D 205: MOV TEMP[11], TEMP[15] 206: MOV TEMP[7].y, TEMP[13].xxxx 207: MOV TEMP[7].z, TEMP[14].xxxx 208: MOV TEMP[7].w, TEMP[15].xxxx 209: DP4 TEMP[13].x, TEMP[7], IMM[2].wwww 210: MOV TEMP[14].xy, TEMP[4].xyyy 211: MOV TEMP[14].z, TEMP[5].xxxx 212: MOV TEMP[14].w, IMM[0].yyyy 213: TXL TEMP[14], TEMP[14], SAMP[1], SHADOW2D 214: MOV TEMP[7].xyz, TEMP[14] 215: ADD TEMP[2].x, TEMP[13].xxxx, TEMP[12].xxxx 216: MAD TEMP[2].x, TEMP[14].xxxx, IMM[3].xxxx, TEMP[2].xxxx 217: FSLT TEMP[12].x, TEMP[6].xxxx, IMM[0].xxxx 218: UIF TEMP[12].xxxx :0 219: ADD TEMP[7].xyz, TEMP[2].zzzz, IMM[3].yzww 220: ABS TEMP[12].x, TEMP[7].xxxx 221: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 222: UIF TEMP[12].xxxx :0 223: MOV TEMP[12].x, CONST[73].xxxx 224: ELSE :0 225: MOV TEMP[12].x, IMM[0].yyyy 226: ENDIF 227: MOV TEMP[10].x, TEMP[12].xxxx 228: ABS TEMP[12].x, TEMP[7].xxxx 229: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 230: UIF TEMP[12].xxxx :0 231: MOV TEMP[12].x, CONST[73].yyyy 232: ELSE :0 233: MOV TEMP[12].x, IMM[0].yyyy 234: ENDIF 235: MOV TEMP[10].y, TEMP[12].xxxx 236: ABS TEMP[12].x, TEMP[7].xxxx 237: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 238: UIF TEMP[12].xxxx :0 239: MOV TEMP[12].x, CONST[73].zzzz 240: ELSE :0 241: MOV TEMP[12].x, IMM[0].yyyy 242: ENDIF 243: MOV TEMP[10].z, TEMP[12].xxxx 244: ABS TEMP[12].x, TEMP[7].xxxx 245: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 246: UIF TEMP[12].xxxx :0 247: MOV TEMP[12].x, CONST[73].wwww 248: ELSE :0 249: MOV TEMP[12].x, IMM[0].yyyy 250: ENDIF 251: MOV TEMP[10].w, TEMP[12].xxxx 252: MOV TEMP[9], TEMP[10] 253: ABS TEMP[12].x, TEMP[7].xxxx 254: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 255: UIF TEMP[12].xxxx :0 256: MOV TEMP[12].x, CONST[74].xxxx 257: ELSE :0 258: MOV TEMP[12].x, IMM[0].yyyy 259: ENDIF 260: MOV TEMP[10].x, TEMP[12].xxxx 261: ABS TEMP[12].x, TEMP[7].xxxx 262: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 263: UIF TEMP[12].xxxx :0 264: MOV TEMP[12].x, CONST[74].yyyy 265: ELSE :0 266: MOV TEMP[12].x, IMM[0].yyyy 267: ENDIF 268: MOV TEMP[10].y, TEMP[12].xxxx 269: ABS TEMP[12].x, TEMP[7].xxxx 270: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 271: UIF TEMP[12].xxxx :0 272: MOV TEMP[12].x, CONST[74].zzzz 273: ELSE :0 274: MOV TEMP[12].x, IMM[0].yyyy 275: ENDIF 276: MOV TEMP[10].z, TEMP[12].xxxx 277: ABS TEMP[12].x, TEMP[7].xxxx 278: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 279: UIF TEMP[12].xxxx :0 280: MOV TEMP[12].x, CONST[74].wwww 281: ELSE :0 282: MOV TEMP[12].x, IMM[0].yyyy 283: ENDIF 284: MOV TEMP[10].w, TEMP[12].xxxx 285: MOV TEMP[8], TEMP[10] 286: ABS TEMP[12].x, TEMP[7].yyyy 287: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 288: UIF TEMP[12].xxxx :0 289: MOV TEMP[12].x, CONST[77].xxxx 290: ELSE :0 291: MOV TEMP[12].x, TEMP[9].xxxx 292: ENDIF 293: MOV TEMP[10].x, TEMP[12].xxxx 294: ABS TEMP[12].x, TEMP[7].yyyy 295: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 296: UIF TEMP[12].xxxx :0 297: MOV TEMP[12].x, CONST[77].yyyy 298: ELSE :0 299: MOV TEMP[12].x, TEMP[9].yyyy 300: ENDIF 301: MOV TEMP[10].y, TEMP[12].xxxx 302: ABS TEMP[12].x, TEMP[7].yyyy 303: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 304: UIF TEMP[12].xxxx :0 305: MOV TEMP[12].x, CONST[77].zzzz 306: ELSE :0 307: MOV TEMP[12].x, TEMP[9].zzzz 308: ENDIF 309: MOV TEMP[10].z, TEMP[12].xxxx 310: ABS TEMP[12].x, TEMP[7].yyyy 311: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 312: UIF TEMP[12].xxxx :0 313: MOV TEMP[12].x, CONST[77].wwww 314: ELSE :0 315: MOV TEMP[12].x, TEMP[9].wwww 316: ENDIF 317: MOV TEMP[10].w, TEMP[12].xxxx 318: MOV TEMP[9], TEMP[10] 319: ABS TEMP[12].x, TEMP[7].yyyy 320: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 321: UIF TEMP[12].xxxx :0 322: MOV TEMP[12].x, CONST[78].xxxx 323: ELSE :0 324: MOV TEMP[12].x, TEMP[8].xxxx 325: ENDIF 326: MOV TEMP[10].x, TEMP[12].xxxx 327: ABS TEMP[12].x, TEMP[7].yyyy 328: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 329: UIF TEMP[12].xxxx :0 330: MOV TEMP[12].x, CONST[78].yyyy 331: ELSE :0 332: MOV TEMP[12].x, TEMP[8].yyyy 333: ENDIF 334: MOV TEMP[10].y, TEMP[12].xxxx 335: ABS TEMP[12].x, TEMP[7].yyyy 336: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 337: UIF TEMP[12].xxxx :0 338: MOV TEMP[12].x, CONST[78].zzzz 339: ELSE :0 340: MOV TEMP[12].x, TEMP[8].zzzz 341: ENDIF 342: MOV TEMP[10].z, TEMP[12].xxxx 343: ABS TEMP[12].x, TEMP[7].yyyy 344: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 345: UIF TEMP[12].xxxx :0 346: MOV TEMP[12].x, CONST[78].wwww 347: ELSE :0 348: MOV TEMP[12].x, TEMP[8].wwww 349: ENDIF 350: MOV TEMP[10].w, TEMP[12].xxxx 351: MOV TEMP[8], TEMP[10] 352: ABS TEMP[12].x, TEMP[7].zzzz 353: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 354: UIF TEMP[12].xxxx :0 355: MOV TEMP[12].x, CONST[81].xxxx 356: ELSE :0 357: MOV TEMP[12].x, TEMP[9].xxxx 358: ENDIF 359: MOV TEMP[10].x, TEMP[12].xxxx 360: ABS TEMP[12].x, TEMP[7].zzzz 361: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 362: UIF TEMP[12].xxxx :0 363: MOV TEMP[12].x, CONST[81].yyyy 364: ELSE :0 365: MOV TEMP[12].x, TEMP[9].yyyy 366: ENDIF 367: MOV TEMP[10].y, TEMP[12].xxxx 368: ABS TEMP[12].x, TEMP[7].zzzz 369: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 370: UIF TEMP[12].xxxx :0 371: MOV TEMP[12].x, CONST[81].zzzz 372: ELSE :0 373: MOV TEMP[12].x, TEMP[9].zzzz 374: ENDIF 375: MOV TEMP[10].z, TEMP[12].xxxx 376: ABS TEMP[12].x, TEMP[7].zzzz 377: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 378: UIF TEMP[12].xxxx :0 379: MOV TEMP[12].x, CONST[81].wwww 380: ELSE :0 381: MOV TEMP[12].x, TEMP[9].wwww 382: ENDIF 383: MOV TEMP[10].w, TEMP[12].xxxx 384: MOV TEMP[9], TEMP[10] 385: ABS TEMP[12].x, TEMP[7].zzzz 386: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 387: UIF TEMP[12].xxxx :0 388: MOV TEMP[12].x, CONST[82].xxxx 389: ELSE :0 390: MOV TEMP[12].x, TEMP[8].xxxx 391: ENDIF 392: MOV TEMP[10].x, TEMP[12].xxxx 393: ABS TEMP[12].x, TEMP[7].zzzz 394: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 395: UIF TEMP[12].xxxx :0 396: MOV TEMP[12].x, CONST[82].yyyy 397: ELSE :0 398: MOV TEMP[12].x, TEMP[8].yyyy 399: ENDIF 400: MOV TEMP[10].y, TEMP[12].xxxx 401: ABS TEMP[12].x, TEMP[7].zzzz 402: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 403: UIF TEMP[12].xxxx :0 404: MOV TEMP[12].x, CONST[82].zzzz 405: ELSE :0 406: MOV TEMP[12].x, TEMP[8].zzzz 407: ENDIF 408: MOV TEMP[10].z, TEMP[12].xxxx 409: ABS TEMP[12].x, TEMP[7].zzzz 410: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 411: UIF TEMP[12].xxxx :0 412: MOV TEMP[12].x, CONST[82].wwww 413: ELSE :0 414: MOV TEMP[12].x, TEMP[8].wwww 415: ENDIF 416: MOV TEMP[10].w, TEMP[12].xxxx 417: DP4 TEMP[9].x, TEMP[1], TEMP[9] 418: MOV_SAT TEMP[9].x, TEMP[9].xxxx 419: DP4 TEMP[12].x, TEMP[1], TEMP[10] 420: MOV_SAT TEMP[12].x, TEMP[12].xxxx 421: MOV TEMP[9].y, TEMP[12].xxxx 422: ABS TEMP[12].x, TEMP[7].xxxx 423: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 424: UIF TEMP[12].xxxx :0 425: MOV TEMP[12].x, CONST[86].zzzz 426: ELSE :0 427: MOV TEMP[12].x, IMM[0].yyyy 428: ENDIF 429: ABS TEMP[13].x, TEMP[7].xxxx 430: FSGE TEMP[13].x, -TEMP[13].xxxx, IMM[0].yyyy 431: UIF TEMP[13].xxxx :0 432: MOV TEMP[13].x, CONST[86].wwww 433: ELSE :0 434: MOV TEMP[13].x, IMM[0].yyyy 435: ENDIF 436: MOV TEMP[10].y, TEMP[13].xxxx 437: ABS TEMP[13].x, TEMP[7].xxxx 438: FSGE TEMP[13].x, -TEMP[13].xxxx, IMM[0].yyyy 439: UIF TEMP[13].xxxx :0 440: MOV TEMP[13].x, CONST[86].xxxx 441: ELSE :0 442: MOV TEMP[13].x, IMM[0].yyyy 443: ENDIF 444: MOV TEMP[10].z, TEMP[13].xxxx 445: ABS TEMP[13].x, TEMP[7].xxxx 446: FSGE TEMP[13].x, -TEMP[13].xxxx, IMM[0].yyyy 447: UIF TEMP[13].xxxx :0 448: MOV TEMP[13].x, CONST[86].yyyy 449: ELSE :0 450: MOV TEMP[13].x, IMM[0].yyyy 451: ENDIF 452: MOV TEMP[10].w, TEMP[13].xxxx 453: ABS TEMP[13].x, TEMP[7].yyyy 454: FSGE TEMP[13].x, -TEMP[13].xxxx, IMM[0].yyyy 455: UIF TEMP[13].xxxx :0 456: MOV TEMP[13].x, CONST[87].zzzz 457: ELSE :0 458: MOV TEMP[13].x, TEMP[12].xxxx 459: ENDIF 460: ABS TEMP[12].x, TEMP[7].yyyy 461: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 462: UIF TEMP[12].xxxx :0 463: MOV TEMP[12].x, CONST[87].wwww 464: ELSE :0 465: MOV TEMP[12].x, TEMP[10].yyyy 466: ENDIF 467: MOV TEMP[10].y, TEMP[12].xxxx 468: ABS TEMP[12].x, TEMP[7].yyyy 469: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 470: UIF TEMP[12].xxxx :0 471: MOV TEMP[12].x, CONST[87].xxxx 472: ELSE :0 473: MOV TEMP[12].x, TEMP[10].zzzz 474: ENDIF 475: MOV TEMP[10].z, TEMP[12].xxxx 476: ABS TEMP[12].x, TEMP[7].yyyy 477: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 478: UIF TEMP[12].xxxx :0 479: MOV TEMP[12].x, CONST[87].yyyy 480: ELSE :0 481: MOV TEMP[12].x, TEMP[10].wwww 482: ENDIF 483: MOV TEMP[10].w, TEMP[12].xxxx 484: ABS TEMP[12].x, TEMP[7].zzzz 485: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 486: UIF TEMP[12].xxxx :0 487: MOV TEMP[12].x, CONST[88].zzzz 488: ELSE :0 489: MOV TEMP[12].x, TEMP[13].xxxx 490: ENDIF 491: MOV TEMP[10].x, TEMP[12].xxxx 492: ABS TEMP[12].x, TEMP[7].zzzz 493: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 494: UIF TEMP[12].xxxx :0 495: MOV TEMP[12].x, CONST[88].wwww 496: ELSE :0 497: MOV TEMP[12].x, TEMP[10].yyyy 498: ENDIF 499: MOV TEMP[10].y, TEMP[12].xxxx 500: ABS TEMP[12].x, TEMP[7].zzzz 501: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 502: UIF TEMP[12].xxxx :0 503: MOV TEMP[12].x, CONST[88].xxxx 504: ELSE :0 505: MOV TEMP[12].x, TEMP[10].zzzz 506: ENDIF 507: MOV TEMP[10].z, TEMP[12].xxxx 508: ABS TEMP[12].x, TEMP[7].zzzz 509: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 510: UIF TEMP[12].xxxx :0 511: MOV TEMP[12].x, CONST[88].yyyy 512: ELSE :0 513: MOV TEMP[12].x, TEMP[10].wwww 514: ENDIF 515: MOV TEMP[10].w, TEMP[12].xxxx 516: MAD TEMP[4].xy, TEMP[9].xyyy, TEMP[10].xyyy, TEMP[10].zwww 517: ADD TEMP[1], TEMP[4], IMM[2].xxyy 518: TXL TEMP[10].x, TEMP[1], SAMP[1], SHADOW2D 519: MOV TEMP[1].x, TEMP[10].xxxx 520: ADD TEMP[3], TEMP[4], IMM[2].zxyy 521: ADD TEMP[9], TEMP[4], IMM[2].xzyy 522: ADD TEMP[8], TEMP[4], IMM[2].zzyy 523: TXL TEMP[10].x, TEMP[3], SAMP[1], SHADOW2D 524: MOV TEMP[1].y, TEMP[10].xxxx 525: TXL TEMP[10].x, TEMP[9], SAMP[1], SHADOW2D 526: MOV TEMP[1].z, TEMP[10].xxxx 527: TXL TEMP[10].x, TEMP[8], SAMP[1], SHADOW2D 528: MOV TEMP[1].w, TEMP[10].xxxx 529: DP4 TEMP[10].x, TEMP[1], IMM[1].wwww 530: ADD TEMP[3], TEMP[4], IMM[2].xyyy 531: TXL TEMP[12].x, TEMP[3], SAMP[1], SHADOW2D 532: MOV TEMP[3].x, TEMP[12].xxxx 533: ADD TEMP[9], TEMP[4], IMM[2].zyyy 534: TXL TEMP[9].x, TEMP[9], SAMP[1], SHADOW2D 535: ADD TEMP[8], TEMP[4], IMM[2].yzyy 536: TXL TEMP[8].x, TEMP[8], SAMP[1], SHADOW2D 537: ADD TEMP[11], TEMP[4], IMM[2].yxyy 538: TXL TEMP[11].x, TEMP[11], SAMP[1], SHADOW2D 539: MOV TEMP[3].y, TEMP[9].xxxx 540: MOV TEMP[3].z, TEMP[8].xxxx 541: MOV TEMP[3].w, TEMP[11].xxxx 542: DP4 TEMP[3].x, TEMP[3], IMM[2].wwww 543: MOV TEMP[8].xy, TEMP[4].xyyy 544: MOV TEMP[8].z, TEMP[5].xxxx 545: MOV TEMP[8].w, IMM[0].yyyy 546: TXL TEMP[5].x, TEMP[8], SAMP[1], SHADOW2D 547: ADD TEMP[1].x, TEMP[3].xxxx, TEMP[10].xxxx 548: MAD TEMP[1].x, TEMP[5].xxxx, IMM[3].xxxx, TEMP[1].xxxx 549: FSGE TEMP[3].x, TEMP[7].zzzz, IMM[0].yyyy 550: UIF TEMP[3].xxxx :0 551: MOV TEMP[3].x, IMM[0].xxxx 552: ELSE :0 553: MOV TEMP[3].x, TEMP[1].xxxx 554: ENDIF 555: LRP TEMP[4].x, TEMP[6].xxxx, TEMP[2].xxxx, TEMP[3].xxxx 556: MOV TEMP[2].x, TEMP[4].xxxx 557: ENDIF 558: ADD TEMP[1].xyz, -CONST[89].xyzz, IN[3].xyzz 559: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz 560: MAD TEMP[1].x, TEMP[3].xxxx, CONST[68].yyyy, CONST[68].xxxx 561: MOV_SAT TEMP[3].x, TEMP[1].xxxx 562: LRP TEMP[4].x, TEMP[3].xxxx, IMM[0].xxxx, TEMP[2].xxxx 563: ELSE :0 564: MOV TEMP[4].x, IMM[0].xxxx 565: ENDIF 566: MAD TEMP[1].xyz, IN[1].xyzz, TEMP[4].xxxx, IN[2].xyzz 567: ADD TEMP[3].x, TEMP[0].wwww, IMM[1].yyyy 568: MAD TEMP[3].x, CONST[20].wwww, TEMP[3].xxxx, IMM[0].xxxx 569: ADD TEMP[4].x, TEMP[0].wwww, CONST[12].xxxx 570: ADD TEMP[2].xyz, IMM[1].yyyy, CONST[1].xyzz 571: MOV_SAT TEMP[4].x, TEMP[4].xxxx 572: MAD TEMP[2].xyz, TEMP[4].xxxx, TEMP[2].xyzz, IMM[0].xxxx 573: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz 574: MUL TEMP[2].x, TEMP[3].xxxx, CONST[1].wwww 575: MAD TEMP[3].x, TEMP[2].xxxx, IN[2].wwww, -TEMP[2].xxxx 576: MAD TEMP[2].x, CONST[12].wwww, TEMP[3].xxxx, TEMP[2].xxxx 577: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz 578: ADD TEMP[1].xyz, CONST[20].xyzz, -IN[3].xyzz 579: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 580: SQRT TEMP[1].x, TEMP[1].xxxx 581: MAD TEMP[1].x, TEMP[1].xxxx, CONST[21].wwww, CONST[21].xxxx 582: MOV_SAT TEMP[1].x, TEMP[1].xxxx 583: MIN TEMP[1].x, TEMP[1].xxxx, CONST[21].zzzz 584: ABS TEMP[3].x, CONST[12].yyyy 585: MUL TEMP[4].xyz, TEMP[0].xyzz, CONST[30].xxxx 586: MUL TEMP[5].x, CONST[29].wwww, IN[3].wwww 587: FSGE TEMP[3].x, -TEMP[3].xxxx, IMM[0].yyyy 588: UIF TEMP[3].xxxx :0 589: MOV TEMP[3].x, TEMP[2].xxxx 590: ELSE :0 591: MOV TEMP[3].x, TEMP[5].xxxx 592: ENDIF 593: MOV TEMP[2].w, TEMP[3].xxxx 594: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx 595: MAD TEMP[0].xyz, TEMP[0].xyzz, -CONST[30].xxxx, CONST[29].xyzz 596: MAD TEMP[2].xyz, TEMP[1].xxxx, TEMP[0].xyzz, TEMP[4].xyzz 597: MOV OUT[0], TEMP[2] 598: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 192) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 196) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 204) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 324) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 328) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 332) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 336) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 344) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 348) %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 464) %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 468) %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 472) %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 476) %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 480) %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1080) %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1084) %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1088) %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1092) %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1168) %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1172) %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1176) %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1180) %52 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1184) %53 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1188) %54 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1192) %55 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1196) %56 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1232) %57 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1236) %58 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1240) %59 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1244) %60 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1248) %61 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1252) %62 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1256) %63 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1260) %64 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1296) %65 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1300) %66 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1304) %67 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1308) %68 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1312) %69 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1316) %70 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1320) %71 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1324) %72 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1376) %73 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1380) %74 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1384) %75 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1388) %76 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1392) %77 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1396) %78 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1400) %79 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1404) %80 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1408) %81 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1412) %82 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1416) %83 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1420) %84 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1424) %85 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1428) %86 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1432) %87 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1440) %88 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %89 = load <8 x i32>, <8 x i32> addrspace(2)* %88, align 32, !tbaa !0 %90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %91 = load <4 x i32>, <4 x i32> addrspace(2)* %90, align 16, !tbaa !0 %92 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %93 = load <8 x i32>, <8 x i32> addrspace(2)* %92, align 32, !tbaa !0 %94 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %95 = load <4 x i32>, <4 x i32> addrspace(2)* %94, align 16, !tbaa !0 %96 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %97 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %98 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %99 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %100 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %101 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %102 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %103 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %104 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %105 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %106 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %107 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) %108 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) %109 = bitcast float %96 to i32 %110 = bitcast float %97 to i32 %111 = insertelement <2 x i32> undef, i32 %109, i32 0 %112 = insertelement <2 x i32> %111, i32 %110, i32 1 %113 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %112, <8 x i32> %89, <4 x i32> %91, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %114 = extractelement <4 x float> %113, i32 0 %115 = extractelement <4 x float> %113, i32 1 %116 = extractelement <4 x float> %113, i32 2 %117 = extractelement <4 x float> %113, i32 3 %118 = bitcast float %87 to i32 %119 = icmp eq i32 %118, 0 br i1 %119, label %ENDIF, label %IF IF: ; preds = %main_body %120 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1372) %121 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1368) %122 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1364) %123 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1360) %124 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1148) %125 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1144) %126 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1140) %127 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1136) %128 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1132) %129 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1128) %130 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1124) %131 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1120) %132 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1116) %133 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1112) %134 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1108) %135 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1104) %136 = fadd float %105, 0.000000e+00 %137 = fadd float %106, 0.000000e+00 %138 = fadd float %107, 0.000000e+00 %139 = fmul float %105, 0.000000e+00 %140 = fadd float %139, 1.000000e+00 %141 = fmul float %136, %135 %142 = fmul float %137, %134 %143 = fadd float %141, %142 %144 = fmul float %138, %133 %145 = fadd float %143, %144 %146 = fmul float %140, %132 %147 = fadd float %145, %146 %148 = fmul float %136, %131 %149 = fmul float %137, %130 %150 = fadd float %148, %149 %151 = fmul float %138, %129 %152 = fadd float %150, %151 %153 = fmul float %140, %128 %154 = fadd float %152, %153 %155 = call float @llvm.AMDIL.clamp.(float %147, float 0.000000e+00, float 1.000000e+00) %156 = call float @llvm.AMDIL.clamp.(float %154, float 0.000000e+00, float 1.000000e+00) %157 = fsub float %155, %147 %158 = fsub float %156, %154 %159 = fadd float %157, %158 %160 = fmul float %136, %48 %161 = fmul float %137, %49 %162 = fadd float %160, %161 %163 = fmul float %138, %50 %164 = fadd float %162, %163 %165 = fmul float %140, %51 %166 = fadd float %164, %165 %167 = fmul float %136, %52 %168 = fmul float %137, %53 %169 = fadd float %167, %168 %170 = fmul float %138, %54 %171 = fadd float %169, %170 %172 = fmul float %140, %55 %173 = fadd float %171, %172 %174 = call float @llvm.AMDIL.clamp.(float %166, float 0.000000e+00, float 1.000000e+00) %175 = call float @llvm.AMDIL.clamp.(float %173, float 0.000000e+00, float 1.000000e+00) %176 = fsub float %174, %166 %177 = fsub float %175, %173 %178 = fadd float %176, %177 %179 = fmul float %136, %56 %180 = fmul float %137, %57 %181 = fadd float %179, %180 %182 = fmul float %138, %58 %183 = fadd float %181, %182 %184 = fmul float %140, %59 %185 = fadd float %183, %184 %186 = fmul float %136, %60 %187 = fmul float %137, %61 %188 = fadd float %186, %187 %189 = fmul float %138, %62 %190 = fadd float %188, %189 %191 = fmul float %140, %63 %192 = fadd float %190, %191 %193 = call float @llvm.fabs.f32(float %178) %194 = fcmp ole float %193, -0.000000e+00 %. = select i1 %194, float %166, float %185 %195 = call float @llvm.fabs.f32(float %178) %196 = fcmp ole float %195, -0.000000e+00 %temp24.0 = select i1 %196, float %173, float %192 %197 = call float @llvm.fabs.f32(float %178) %198 = fcmp ole float %197, -0.000000e+00 %.236 = select i1 %198, float 1.000000e+00, float 2.000000e+00 %199 = call float @llvm.fabs.f32(float %159) %200 = fcmp ole float %199, -0.000000e+00 %temp24.2 = select i1 %200, float %147, float %. %201 = call float @llvm.fabs.f32(float %159) %202 = fcmp ole float %201, -0.000000e+00 %.temp24.0 = select i1 %202, float %154, float %temp24.0 %203 = call float @llvm.fabs.f32(float %159) %204 = fcmp ole float %203, -0.000000e+00 %temp12.1 = select i1 %204, float 0.000000e+00, float %.236 %205 = fmul float %136, %127 %206 = fmul float %137, %126 %207 = fadd float %205, %206 %208 = fmul float %138, %125 %209 = fadd float %207, %208 %210 = fmul float %140, %124 %211 = fadd float %209, %210 %212 = fadd float %temp24.2, -5.000000e-01 %213 = fadd float %.temp24.0, -5.000000e-01 %214 = call float @llvm.fabs.f32(float %212) %215 = call float @llvm.fabs.f32(float %213) %216 = fsub float %214, %44 %217 = fsub float %215, %44 %218 = fmul float %216, %45 %219 = fmul float %217, %45 %220 = call float @llvm.AMDIL.clamp.(float %218, float 0.000000e+00, float 1.000000e+00) %221 = call float @llvm.AMDIL.clamp.(float %219, float 0.000000e+00, float 1.000000e+00) %222 = fsub float 1.000000e+00, %220 %223 = fsub float 1.000000e+00, %221 %224 = fmul float %223, %222 %225 = call float @llvm.AMDIL.clamp.(float %temp24.2, float 0.000000e+00, float 1.000000e+00) %226 = call float @llvm.AMDIL.clamp.(float %.temp24.0, float 0.000000e+00, float 1.000000e+00) %227 = fadd float %temp12.1, -1.000000e+00 %228 = fadd float %temp12.1, -2.000000e+00 %229 = call float @llvm.fabs.f32(float %temp12.1) %230 = fcmp ole float %229, -0.000000e+00 %.237 = select i1 %230, float %121, float 0.000000e+00 %231 = call float @llvm.fabs.f32(float %temp12.1) %232 = fcmp ole float %231, -0.000000e+00 %temp48.0 = select i1 %232, float %120, float 0.000000e+00 %233 = call float @llvm.fabs.f32(float %temp12.1) %234 = fcmp ole float %233, -0.000000e+00 %.238 = select i1 %234, float %123, float 0.000000e+00 %235 = call float @llvm.fabs.f32(float %temp12.1) %236 = fcmp ole float %235, -0.000000e+00 %temp48.2 = select i1 %236, float %122, float 0.000000e+00 %237 = call float @llvm.fabs.f32(float %227) %238 = fcmp ole float %237, -0.000000e+00 %..237 = select i1 %238, float %74, float %.237 %239 = call float @llvm.fabs.f32(float %227) %240 = fcmp ole float %239, -0.000000e+00 %temp44.2 = select i1 %240, float %75, float %temp48.0 %241 = call float @llvm.fabs.f32(float %227) %242 = fcmp ole float %241, -0.000000e+00 %..238 = select i1 %242, float %72, float %.238 %243 = call float @llvm.fabs.f32(float %227) %244 = fcmp ole float %243, -0.000000e+00 %temp44.4 = select i1 %244, float %73, float %temp48.2 %245 = call float @llvm.fabs.f32(float %228) %246 = fcmp ole float %245, -0.000000e+00 %...237 = select i1 %246, float %78, float %..237 %247 = call float @llvm.fabs.f32(float %228) %248 = fcmp ole float %247, -0.000000e+00 %temp44.6 = select i1 %248, float %79, float %temp44.2 %249 = call float @llvm.fabs.f32(float %228) %250 = fcmp ole float %249, -0.000000e+00 %...238 = select i1 %250, float %76, float %..238 %251 = call float @llvm.fabs.f32(float %228) %252 = fcmp ole float %251, -0.000000e+00 %temp44.8 = select i1 %252, float %77, float %temp44.4 %253 = fmul float %225, %...237 %254 = fadd float %253, %...238 %255 = fmul float %226, %temp44.6 %256 = fadd float %255, %temp44.8 %257 = fadd float %254, 0x3F40000000000000 %258 = fadd float %256, 0x3F40000000000000 %259 = fadd float %211, 0.000000e+00 %260 = bitcast float %259 to i32 %261 = bitcast float %257 to i32 %262 = bitcast float %258 to i32 %263 = insertelement <4 x i32> undef, i32 %260, i32 0 %264 = insertelement <4 x i32> %263, i32 %261, i32 1 %265 = insertelement <4 x i32> %264, i32 %262, i32 2 %266 = insertelement <4 x i32> %265, i32 0, i32 3 %267 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %266, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %268 = extractelement <4 x float> %267, i32 0 %269 = fadd float %254, 0xBF40000000000000 %270 = fadd float %256, 0x3F40000000000000 %271 = fadd float %211, 0.000000e+00 %272 = fadd float %254, 0x3F40000000000000 %273 = fadd float %256, 0xBF40000000000000 %274 = fadd float %211, 0.000000e+00 %275 = fadd float %254, 0xBF40000000000000 %276 = fadd float %256, 0xBF40000000000000 %277 = fadd float %211, 0.000000e+00 %278 = bitcast float %271 to i32 %279 = bitcast float %269 to i32 %280 = bitcast float %270 to i32 %281 = insertelement <4 x i32> undef, i32 %278, i32 0 %282 = insertelement <4 x i32> %281, i32 %279, i32 1 %283 = insertelement <4 x i32> %282, i32 %280, i32 2 %284 = insertelement <4 x i32> %283, i32 0, i32 3 %285 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %284, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %286 = extractelement <4 x float> %285, i32 0 %287 = bitcast float %274 to i32 %288 = bitcast float %272 to i32 %289 = bitcast float %273 to i32 %290 = insertelement <4 x i32> undef, i32 %287, i32 0 %291 = insertelement <4 x i32> %290, i32 %288, i32 1 %292 = insertelement <4 x i32> %291, i32 %289, i32 2 %293 = insertelement <4 x i32> %292, i32 0, i32 3 %294 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %293, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %295 = extractelement <4 x float> %294, i32 0 %296 = bitcast float %277 to i32 %297 = bitcast float %275 to i32 %298 = bitcast float %276 to i32 %299 = insertelement <4 x i32> undef, i32 %296, i32 0 %300 = insertelement <4 x i32> %299, i32 %297, i32 1 %301 = insertelement <4 x i32> %300, i32 %298, i32 2 %302 = insertelement <4 x i32> %301, i32 0, i32 3 %303 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %302, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %304 = extractelement <4 x float> %303, i32 0 %305 = fmul float %268, 6.250000e-02 %306 = fmul float %286, 6.250000e-02 %307 = fadd float %305, %306 %308 = fmul float %295, 6.250000e-02 %309 = fadd float %307, %308 %310 = fmul float %304, 6.250000e-02 %311 = fadd float %309, %310 %312 = fadd float %254, 0x3F40000000000000 %313 = fadd float %256, 0.000000e+00 %314 = fadd float %211, 0.000000e+00 %315 = bitcast float %314 to i32 %316 = bitcast float %312 to i32 %317 = bitcast float %313 to i32 %318 = insertelement <4 x i32> undef, i32 %315, i32 0 %319 = insertelement <4 x i32> %318, i32 %316, i32 1 %320 = insertelement <4 x i32> %319, i32 %317, i32 2 %321 = insertelement <4 x i32> %320, i32 0, i32 3 %322 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %321, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %323 = extractelement <4 x float> %322, i32 0 %324 = fadd float %254, 0xBF40000000000000 %325 = fadd float %256, 0.000000e+00 %326 = fadd float %211, 0.000000e+00 %327 = bitcast float %326 to i32 %328 = bitcast float %324 to i32 %329 = bitcast float %325 to i32 %330 = insertelement <4 x i32> undef, i32 %327, i32 0 %331 = insertelement <4 x i32> %330, i32 %328, i32 1 %332 = insertelement <4 x i32> %331, i32 %329, i32 2 %333 = insertelement <4 x i32> %332, i32 0, i32 3 %334 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %333, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %335 = extractelement <4 x float> %334, i32 0 %336 = fadd float %254, 0.000000e+00 %337 = fadd float %256, 0xBF40000000000000 %338 = fadd float %211, 0.000000e+00 %339 = bitcast float %338 to i32 %340 = bitcast float %336 to i32 %341 = bitcast float %337 to i32 %342 = insertelement <4 x i32> undef, i32 %339, i32 0 %343 = insertelement <4 x i32> %342, i32 %340, i32 1 %344 = insertelement <4 x i32> %343, i32 %341, i32 2 %345 = insertelement <4 x i32> %344, i32 0, i32 3 %346 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %345, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %347 = extractelement <4 x float> %346, i32 0 %348 = fadd float %254, 0.000000e+00 %349 = fadd float %256, 0x3F40000000000000 %350 = fadd float %211, 0.000000e+00 %351 = bitcast float %350 to i32 %352 = bitcast float %348 to i32 %353 = bitcast float %349 to i32 %354 = insertelement <4 x i32> undef, i32 %351, i32 0 %355 = insertelement <4 x i32> %354, i32 %352, i32 1 %356 = insertelement <4 x i32> %355, i32 %353, i32 2 %357 = insertelement <4 x i32> %356, i32 0, i32 3 %358 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %357, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %359 = extractelement <4 x float> %358, i32 0 %360 = fmul float %323, 1.250000e-01 %361 = fmul float %335, 1.250000e-01 %362 = fadd float %360, %361 %363 = fmul float %347, 1.250000e-01 %364 = fadd float %362, %363 %365 = fmul float %359, 1.250000e-01 %366 = fadd float %364, %365 %367 = bitcast float %211 to i32 %368 = bitcast float %254 to i32 %369 = bitcast float %256 to i32 %370 = insertelement <4 x i32> undef, i32 %367, i32 0 %371 = insertelement <4 x i32> %370, i32 %368, i32 1 %372 = insertelement <4 x i32> %371, i32 %369, i32 2 %373 = insertelement <4 x i32> %372, i32 0, i32 3 %374 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %373, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %375 = extractelement <4 x float> %374, i32 0 %376 = fadd float %366, %311 %377 = fmul float %375, 2.500000e-01 %378 = fadd float %377, %376 %379 = fcmp olt float %224, 1.000000e+00 br i1 %379, label %IF119, label %ENDIF118 ENDIF: ; preds = %main_body, %ENDIF118 %temp16.0 = phi float [ %682, %ENDIF118 ], [ 1.000000e+00, %main_body ] %380 = fmul float %98, %temp16.0 %381 = fadd float %380, %101 %382 = fmul float %99, %temp16.0 %383 = fadd float %382, %102 %384 = fmul float %100, %temp16.0 %385 = fadd float %384, %103 %386 = fadd float %117, -1.000000e+00 %387 = fmul float %35, %386 %388 = fadd float %387, 1.000000e+00 %389 = fadd float %117, %29 %390 = fadd float %25, -1.000000e+00 %391 = fadd float %26, -1.000000e+00 %392 = fadd float %27, -1.000000e+00 %393 = call float @llvm.AMDIL.clamp.(float %389, float 0.000000e+00, float 1.000000e+00) %394 = fmul float %393, %390 %395 = fadd float %394, 1.000000e+00 %396 = fmul float %393, %391 %397 = fadd float %396, 1.000000e+00 %398 = fmul float %393, %392 %399 = fadd float %398, 1.000000e+00 %400 = fmul float %381, %395 %401 = fmul float %383, %397 %402 = fmul float %385, %399 %403 = fmul float %388, %28 %404 = fmul float %403, %104 %405 = fsub float %404, %403 %406 = fmul float %31, %405 %407 = fadd float %406, %403 %408 = fmul float %114, %400 %409 = fmul float %115, %401 %410 = fmul float %116, %402 %411 = fsub float %32, %105 %412 = fsub float %33, %106 %413 = fsub float %34, %107 %414 = fmul float %411, %411 %415 = fmul float %412, %412 %416 = fadd float %415, %414 %417 = fmul float %413, %413 %418 = fadd float %416, %417 %419 = call float @llvm.sqrt.f32(float %418) %420 = fmul float %419, %38 %421 = fadd float %420, %36 %422 = call float @llvm.AMDIL.clamp.(float %421, float 0.000000e+00, float 1.000000e+00) %423 = call float @llvm.minnum.f32(float %422, float %37) %424 = call float @llvm.fabs.f32(float %30) %425 = fmul float %408, %43 %426 = fmul float %409, %43 %427 = fmul float %410, %43 %428 = fmul float %42, %108 %429 = fcmp ole float %424, -0.000000e+00 %.235 = select i1 %429, float %407, float %428 %430 = fmul float %423, %423 %431 = fmul float %43, %408 %432 = fsub float %39, %431 %433 = fmul float %43, %409 %434 = fsub float %40, %433 %435 = fmul float %43, %410 %436 = fsub float %41, %435 %437 = fmul float %430, %432 %438 = fadd float %437, %425 %439 = fmul float %430, %434 %440 = fadd float %439, %426 %441 = fmul float %430, %436 %442 = fadd float %441, %427 %443 = call i32 @llvm.SI.packf16(float %438, float %440) %444 = bitcast i32 %443 to float %445 = call i32 @llvm.SI.packf16(float %442, float %.235) %446 = bitcast i32 %445 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %444, float %446, float %444, float %446) ret void IF119: ; preds = %IF %447 = fadd float %temp12.1, 0.000000e+00 %448 = fadd float %temp12.1, -1.000000e+00 %449 = fadd float %temp12.1, -2.000000e+00 %450 = call float @llvm.fabs.f32(float %447) %451 = fcmp ole float %450, -0.000000e+00 %.239 = select i1 %451, float %48, float 0.000000e+00 %452 = call float @llvm.fabs.f32(float %447) %453 = fcmp ole float %452, -0.000000e+00 %temp48.5 = select i1 %453, float %49, float 0.000000e+00 %454 = call float @llvm.fabs.f32(float %447) %455 = fcmp ole float %454, -0.000000e+00 %.240 = select i1 %455, float %50, float 0.000000e+00 %456 = call float @llvm.fabs.f32(float %447) %457 = fcmp ole float %456, -0.000000e+00 %temp48.7 = select i1 %457, float %51, float 0.000000e+00 %458 = call float @llvm.fabs.f32(float %447) %459 = fcmp ole float %458, -0.000000e+00 %.241 = select i1 %459, float %52, float 0.000000e+00 %460 = call float @llvm.fabs.f32(float %447) %461 = fcmp ole float %460, -0.000000e+00 %temp48.9 = select i1 %461, float %53, float 0.000000e+00 %462 = call float @llvm.fabs.f32(float %447) %463 = fcmp ole float %462, -0.000000e+00 %.242 = select i1 %463, float %54, float 0.000000e+00 %464 = call float @llvm.fabs.f32(float %447) %465 = fcmp ole float %464, -0.000000e+00 %temp48.11 = select i1 %465, float %55, float 0.000000e+00 %466 = call float @llvm.fabs.f32(float %448) %467 = fcmp ole float %466, -0.000000e+00 %..239 = select i1 %467, float %56, float %.239 %468 = call float @llvm.fabs.f32(float %448) %469 = fcmp ole float %468, -0.000000e+00 %temp48.13 = select i1 %469, float %57, float %temp48.5 %470 = call float @llvm.fabs.f32(float %448) %471 = fcmp ole float %470, -0.000000e+00 %..240 = select i1 %471, float %58, float %.240 %472 = call float @llvm.fabs.f32(float %448) %473 = fcmp ole float %472, -0.000000e+00 %temp48.15 = select i1 %473, float %59, float %temp48.7 %474 = call float @llvm.fabs.f32(float %448) %475 = fcmp ole float %474, -0.000000e+00 %..241 = select i1 %475, float %60, float %.241 %476 = call float @llvm.fabs.f32(float %448) %477 = fcmp ole float %476, -0.000000e+00 %temp48.17 = select i1 %477, float %61, float %temp48.9 %478 = call float @llvm.fabs.f32(float %448) %479 = fcmp ole float %478, -0.000000e+00 %..242 = select i1 %479, float %62, float %.242 %480 = call float @llvm.fabs.f32(float %448) %481 = fcmp ole float %480, -0.000000e+00 %temp48.19 = select i1 %481, float %63, float %temp48.11 %482 = call float @llvm.fabs.f32(float %449) %483 = fcmp ole float %482, -0.000000e+00 %...239 = select i1 %483, float %64, float %..239 %484 = call float @llvm.fabs.f32(float %449) %485 = fcmp ole float %484, -0.000000e+00 %temp48.21 = select i1 %485, float %65, float %temp48.13 %486 = call float @llvm.fabs.f32(float %449) %487 = fcmp ole float %486, -0.000000e+00 %...240 = select i1 %487, float %66, float %..240 %488 = call float @llvm.fabs.f32(float %449) %489 = fcmp ole float %488, -0.000000e+00 %temp48.23 = select i1 %489, float %67, float %temp48.15 %490 = call float @llvm.fabs.f32(float %449) %491 = fcmp ole float %490, -0.000000e+00 %...241 = select i1 %491, float %68, float %..241 %492 = call float @llvm.fabs.f32(float %449) %493 = fcmp ole float %492, -0.000000e+00 %temp48.25 = select i1 %493, float %69, float %temp48.17 %494 = call float @llvm.fabs.f32(float %449) %495 = fcmp ole float %494, -0.000000e+00 %...242 = select i1 %495, float %70, float %..242 %496 = call float @llvm.fabs.f32(float %449) %497 = fcmp ole float %496, -0.000000e+00 %temp48.27 = select i1 %497, float %71, float %temp48.19 %498 = fmul float %136, %...239 %499 = fmul float %137, %temp48.21 %500 = fadd float %498, %499 %501 = fmul float %138, %...240 %502 = fadd float %500, %501 %503 = fmul float %140, %temp48.23 %504 = fadd float %502, %503 %505 = call float @llvm.AMDIL.clamp.(float %504, float 0.000000e+00, float 1.000000e+00) %506 = fmul float %136, %...241 %507 = fmul float %137, %temp48.25 %508 = fadd float %506, %507 %509 = fmul float %138, %...242 %510 = fadd float %508, %509 %511 = fmul float %140, %temp48.27 %512 = fadd float %510, %511 %513 = call float @llvm.AMDIL.clamp.(float %512, float 0.000000e+00, float 1.000000e+00) %514 = call float @llvm.fabs.f32(float %447) %515 = fcmp ole float %514, -0.000000e+00 %.243 = select i1 %515, float %74, float 0.000000e+00 %516 = call float @llvm.fabs.f32(float %447) %517 = fcmp ole float %516, -0.000000e+00 %temp52.0 = select i1 %517, float %75, float 0.000000e+00 %518 = call float @llvm.fabs.f32(float %447) %519 = fcmp ole float %518, -0.000000e+00 %.244 = select i1 %519, float %72, float 0.000000e+00 %520 = call float @llvm.fabs.f32(float %447) %521 = fcmp ole float %520, -0.000000e+00 %temp52.2 = select i1 %521, float %73, float 0.000000e+00 %522 = call float @llvm.fabs.f32(float %448) %523 = fcmp ole float %522, -0.000000e+00 %..243 = select i1 %523, float %78, float %.243 %524 = call float @llvm.fabs.f32(float %448) %525 = fcmp ole float %524, -0.000000e+00 %temp48.29 = select i1 %525, float %79, float %temp52.0 %526 = call float @llvm.fabs.f32(float %448) %527 = fcmp ole float %526, -0.000000e+00 %..244 = select i1 %527, float %76, float %.244 %528 = call float @llvm.fabs.f32(float %448) %529 = fcmp ole float %528, -0.000000e+00 %temp48.31 = select i1 %529, float %77, float %temp52.2 %530 = call float @llvm.fabs.f32(float %449) %531 = fcmp ole float %530, -0.000000e+00 %...243 = select i1 %531, float %82, float %..243 %532 = call float @llvm.fabs.f32(float %449) %533 = fcmp ole float %532, -0.000000e+00 %temp48.33 = select i1 %533, float %83, float %temp48.29 %534 = call float @llvm.fabs.f32(float %449) %535 = fcmp ole float %534, -0.000000e+00 %...244 = select i1 %535, float %80, float %..244 %536 = call float @llvm.fabs.f32(float %449) %537 = fcmp ole float %536, -0.000000e+00 %temp48.35 = select i1 %537, float %81, float %temp48.31 %538 = fmul float %505, %...243 %539 = fadd float %538, %...244 %540 = fmul float %513, %temp48.33 %541 = fadd float %540, %temp48.35 %542 = fadd float %539, 0x3F40000000000000 %543 = fadd float %541, 0x3F40000000000000 %544 = fadd float %211, 0.000000e+00 %545 = bitcast float %544 to i32 %546 = bitcast float %542 to i32 %547 = bitcast float %543 to i32 %548 = insertelement <4 x i32> undef, i32 %545, i32 0 %549 = insertelement <4 x i32> %548, i32 %546, i32 1 %550 = insertelement <4 x i32> %549, i32 %547, i32 2 %551 = insertelement <4 x i32> %550, i32 0, i32 3 %552 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %551, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %553 = extractelement <4 x float> %552, i32 0 %554 = fadd float %539, 0xBF40000000000000 %555 = fadd float %541, 0x3F40000000000000 %556 = fadd float %211, 0.000000e+00 %557 = fadd float %539, 0x3F40000000000000 %558 = fadd float %541, 0xBF40000000000000 %559 = fadd float %211, 0.000000e+00 %560 = fadd float %539, 0xBF40000000000000 %561 = fadd float %541, 0xBF40000000000000 %562 = fadd float %211, 0.000000e+00 %563 = bitcast float %556 to i32 %564 = bitcast float %554 to i32 %565 = bitcast float %555 to i32 %566 = insertelement <4 x i32> undef, i32 %563, i32 0 %567 = insertelement <4 x i32> %566, i32 %564, i32 1 %568 = insertelement <4 x i32> %567, i32 %565, i32 2 %569 = insertelement <4 x i32> %568, i32 0, i32 3 %570 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %569, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %571 = extractelement <4 x float> %570, i32 0 %572 = bitcast float %559 to i32 %573 = bitcast float %557 to i32 %574 = bitcast float %558 to i32 %575 = insertelement <4 x i32> undef, i32 %572, i32 0 %576 = insertelement <4 x i32> %575, i32 %573, i32 1 %577 = insertelement <4 x i32> %576, i32 %574, i32 2 %578 = insertelement <4 x i32> %577, i32 0, i32 3 %579 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %578, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %580 = extractelement <4 x float> %579, i32 0 %581 = bitcast float %562 to i32 %582 = bitcast float %560 to i32 %583 = bitcast float %561 to i32 %584 = insertelement <4 x i32> undef, i32 %581, i32 0 %585 = insertelement <4 x i32> %584, i32 %582, i32 1 %586 = insertelement <4 x i32> %585, i32 %583, i32 2 %587 = insertelement <4 x i32> %586, i32 0, i32 3 %588 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %587, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %589 = extractelement <4 x float> %588, i32 0 %590 = fmul float %553, 6.250000e-02 %591 = fmul float %571, 6.250000e-02 %592 = fadd float %590, %591 %593 = fmul float %580, 6.250000e-02 %594 = fadd float %592, %593 %595 = fmul float %589, 6.250000e-02 %596 = fadd float %594, %595 %597 = fadd float %539, 0x3F40000000000000 %598 = fadd float %541, 0.000000e+00 %599 = fadd float %211, 0.000000e+00 %600 = bitcast float %599 to i32 %601 = bitcast float %597 to i32 %602 = bitcast float %598 to i32 %603 = insertelement <4 x i32> undef, i32 %600, i32 0 %604 = insertelement <4 x i32> %603, i32 %601, i32 1 %605 = insertelement <4 x i32> %604, i32 %602, i32 2 %606 = insertelement <4 x i32> %605, i32 0, i32 3 %607 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %606, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %608 = extractelement <4 x float> %607, i32 0 %609 = fadd float %539, 0xBF40000000000000 %610 = fadd float %541, 0.000000e+00 %611 = fadd float %211, 0.000000e+00 %612 = bitcast float %611 to i32 %613 = bitcast float %609 to i32 %614 = bitcast float %610 to i32 %615 = insertelement <4 x i32> undef, i32 %612, i32 0 %616 = insertelement <4 x i32> %615, i32 %613, i32 1 %617 = insertelement <4 x i32> %616, i32 %614, i32 2 %618 = insertelement <4 x i32> %617, i32 0, i32 3 %619 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %618, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %620 = extractelement <4 x float> %619, i32 0 %621 = fadd float %539, 0.000000e+00 %622 = fadd float %541, 0xBF40000000000000 %623 = fadd float %211, 0.000000e+00 %624 = bitcast float %623 to i32 %625 = bitcast float %621 to i32 %626 = bitcast float %622 to i32 %627 = insertelement <4 x i32> undef, i32 %624, i32 0 %628 = insertelement <4 x i32> %627, i32 %625, i32 1 %629 = insertelement <4 x i32> %628, i32 %626, i32 2 %630 = insertelement <4 x i32> %629, i32 0, i32 3 %631 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %630, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %632 = extractelement <4 x float> %631, i32 0 %633 = fadd float %539, 0.000000e+00 %634 = fadd float %541, 0x3F40000000000000 %635 = fadd float %211, 0.000000e+00 %636 = bitcast float %635 to i32 %637 = bitcast float %633 to i32 %638 = bitcast float %634 to i32 %639 = insertelement <4 x i32> undef, i32 %636, i32 0 %640 = insertelement <4 x i32> %639, i32 %637, i32 1 %641 = insertelement <4 x i32> %640, i32 %638, i32 2 %642 = insertelement <4 x i32> %641, i32 0, i32 3 %643 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %642, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %644 = extractelement <4 x float> %643, i32 0 %645 = fmul float %608, 1.250000e-01 %646 = fmul float %620, 1.250000e-01 %647 = fadd float %645, %646 %648 = fmul float %632, 1.250000e-01 %649 = fadd float %647, %648 %650 = fmul float %644, 1.250000e-01 %651 = fadd float %649, %650 %652 = bitcast float %211 to i32 %653 = bitcast float %539 to i32 %654 = bitcast float %541 to i32 %655 = insertelement <4 x i32> undef, i32 %652, i32 0 %656 = insertelement <4 x i32> %655, i32 %653, i32 1 %657 = insertelement <4 x i32> %656, i32 %654, i32 2 %658 = insertelement <4 x i32> %657, i32 0, i32 3 %659 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %658, <8 x i32> %93, <4 x i32> %95, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %660 = extractelement <4 x float> %659, i32 0 %661 = fadd float %651, %596 %662 = fmul float %660, 2.500000e-01 %663 = fadd float %662, %661 %664 = fcmp oge float %449, 0.000000e+00 %.245 = select i1 %664, float 1.000000e+00, float %663 %665 = fsub float 1.000000e+00, %224 %666 = fmul float %378, %224 %667 = fmul float %.245, %665 %668 = fadd float %666, %667 br label %ENDIF118 ENDIF118: ; preds = %IF, %IF119 %temp8.0 = phi float [ %668, %IF119 ], [ %378, %IF ] %669 = fsub float %105, %84 %670 = fsub float %106, %85 %671 = fsub float %107, %86 %672 = fmul float %669, %669 %673 = fmul float %670, %670 %674 = fadd float %673, %672 %675 = fmul float %671, %671 %676 = fadd float %674, %675 %677 = fmul float %676, %47 %678 = fadd float %677, %46 %679 = call float @llvm.AMDIL.clamp.(float %678, float 0.000000e+00, float 1.000000e+00) %680 = fsub float 1.000000e+00, %679 %681 = fmul float %temp8.0, %680 %682 = fadd float %679, %681 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 v_interp_p1_f32 v12, v0, 0, 0, [m0] ; C8300000 v_interp_p2_f32 v12, [v12], v1, 0, 0, [m0] ; C8310001 v_interp_p1_f32 v13, v0, 1, 0, [m0] ; C8340100 v_interp_p2_f32 v13, [v13], v1, 1, 0, [m0] ; C8350101 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s9, s[12:15], 0x57 ; C2048D57 s_buffer_load_dword s8, s[12:15], 0x78 ; C2040D78 s_buffer_load_dword s0, s[12:15], 0x168 ; C2000CFF 00000168 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v2, v0, 0, 2, [m0] ; C8080800 v_interp_p2_f32 v2, [v2], v1, 0, 2, [m0] ; C8090801 v_interp_p1_f32 v3, v0, 1, 2, [m0] ; C80C0900 v_interp_p2_f32 v3, [v3], v1, 1, 2, [m0] ; C80D0901 v_interp_p1_f32 v4, v0, 2, 2, [m0] ; C8100A00 v_interp_p2_f32 v4, [v4], v1, 2, 2, [m0] ; C8110A01 v_interp_p1_f32 v9, v0, 3, 2, [m0] ; C8240B00 s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 v_interp_p2_f32 v9, [v9], v1, 3, 2, [m0] ; C8250B01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v10, v0, 1, 3, [m0] ; C8280D00 v_interp_p2_f32 v10, [v10], v1, 1, 3, [m0] ; C8290D01 v_interp_p1_f32 v11, v0, 2, 3, [m0] ; C82C0E00 v_interp_p2_f32 v11, [v11], v1, 2, 3, [m0] ; C82D0E01 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[16:23], s[24:27] ; F0800F00 00C40C0C v_cmp_ne_i32_e64 s[0:1], 0, s0 ; D10A0000 00000080 v_mov_b32_e32 v16, 1.0 ; 7E2002F2 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[10:11], s[0:1] ; BE8A2400 s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E s_cbranch_execz BB0_4 ; BF880000 v_add_f32_e32 v16, 0, v8 ; 06201080 s_buffer_load_dword s1, s[12:15], 0x10e ; C2008CFF 0000010E s_buffer_load_dword s0, s[12:15], 0x10f ; C2000CFF 0000010F s_buffer_load_dword s16, s[12:15], 0x110 ; C2080CFF 00000110 s_buffer_load_dword s2, s[12:15], 0x111 ; C2010CFF 00000111 s_buffer_load_dword s3, s[12:15], 0x114 ; C2018CFF 00000114 s_buffer_load_dword s17, s[12:15], 0x128 ; C2088CFF 00000128 s_buffer_load_dword s18, s[12:15], 0x129 ; C2090CFF 00000129 s_buffer_load_dword s20, s[12:15], 0x12a ; C20A0CFF 0000012A s_buffer_load_dword s21, s[12:15], 0x12b ; C20A8CFF 0000012B s_buffer_load_dword s22, s[12:15], 0x134 ; C20B0CFF 00000134 s_buffer_load_dword s23, s[12:15], 0x135 ; C20B8CFF 00000135 s_buffer_load_dword s24, s[12:15], 0x136 ; C20C0CFF 00000136 s_buffer_load_dword s25, s[12:15], 0x137 ; C20C8CFF 00000137 s_buffer_load_dword s26, s[12:15], 0x138 ; C20D0CFF 00000138 s_buffer_load_dword s27, s[12:15], 0x139 ; C20D8CFF 00000139 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s2 ; 7E020202 s_buffer_load_dword s28, s[12:15], 0x13a ; C20E0CFF 0000013A s_buffer_load_dword s30, s[12:15], 0x13b ; C20F0CFF 0000013B s_buffer_load_dword s2, s[12:15], 0x154 ; C2010CFF 00000154 s_buffer_load_dword s50, s[12:15], 0x155 ; C2190CFF 00000155 s_buffer_load_dword s29, s[12:15], 0x156 ; C20E8CFF 00000156 s_buffer_load_dword s33, s[12:15], 0x15c ; C2108CFF 0000015C s_buffer_load_dword s35, s[12:15], 0x15d ; C2118CFF 0000015D s_buffer_load_dword s37, s[12:15], 0x15e ; C2128CFF 0000015E s_buffer_load_dword s38, s[12:15], 0x15f ; C2130CFF 0000015F s_buffer_load_dword s19, s[12:15], 0x164 ; C2098CFF 00000164 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 s_buffer_load_dword s51, s[12:15], 0x157 ; C2198CFF 00000157 s_buffer_load_dword s31, s[12:15], 0x158 ; C20F8CFF 00000158 s_buffer_load_dword s32, s[12:15], 0x159 ; C2100CFF 00000159 s_buffer_load_dword s34, s[12:15], 0x15a ; C2110CFF 0000015A s_buffer_load_dword s36, s[12:15], 0x15b ; C2120CFF 0000015B v_add_f32_e32 v19, 0, v10 ; 06261480 v_add_f32_e32 v18, 0, v11 ; 06241680 v_mad_f32 v17, 0, v8, 1.0 ; D2820011 03CA1080 v_mul_f32_e32 v20, s18, v19 ; 10282612 v_mac_f32_e32 v20, s17, v16 ; 3E282011 v_mac_f32_e32 v20, s20, v18 ; 3E282414 v_mac_f32_e32 v20, s21, v17 ; 3E282215 v_add_f32_e64 v21, 0, v20 clamp ; D2060815 00022880 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v22, s29 ; 7E2C021D s_buffer_load_dword s52, s[12:15], 0x11a ; C21A0CFF 0000011A s_buffer_load_dword s53, s[12:15], 0x11b ; C21A8CFF 0000011B s_buffer_load_dword s54, s[12:15], 0x11c ; C21B0CFF 0000011C s_buffer_load_dword s55, s[12:15], 0x11d ; C21B8CFF 0000011D s_buffer_load_dword s56, s[12:15], 0x11e ; C21C0CFF 0000011E s_buffer_load_dword s39, s[12:15], 0x115 ; C2138CFF 00000115 s_buffer_load_dword s57, s[12:15], 0x116 ; C21C8CFF 00000116 s_buffer_load_dword s58, s[12:15], 0x117 ; C21D0CFF 00000117 s_buffer_load_dword s59, s[12:15], 0x118 ; C21D8CFF 00000118 s_buffer_load_dword s60, s[12:15], 0x119 ; C21E0CFF 00000119 s_buffer_load_dword s29, s[12:15], 0x125 ; C20E8CFF 00000125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v23, s39, v19 ; 102E2627 v_mac_f32_e32 v23, s3, v16 ; 3E2E2003 s_buffer_load_dword s39, s[12:15], 0x124 ; C2138CFF 00000124 s_buffer_load_dword s48, s[12:15], 0x126 ; C2180CFF 00000126 s_buffer_load_dword s49, s[12:15], 0x127 ; C2188CFF 00000127 v_mac_f32_e32 v23, s57, v18 ; 3E2E2439 v_mac_f32_e32 v23, s58, v17 ; 3E2E223A v_mul_f32_e32 v24, s60, v19 ; 1030263C v_mac_f32_e32 v24, s59, v16 ; 3E30203B v_mac_f32_e32 v24, s52, v18 ; 3E302434 v_mac_f32_e32 v24, s53, v17 ; 3E302235 v_mov_b32_e32 v25, s51 ; 7E320233 v_mul_f32_e32 v26, s29, v19 ; 1034261D s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v26, s39, v16 ; 3E342027 v_mac_f32_e32 v26, s48, v18 ; 3E342430 v_mac_f32_e32 v26, s49, v17 ; 3E342231 v_add_f32_e64 v27, 0, v26 clamp ; D206081B 00023480 v_subrev_f32_e32 v27, v26, v27 ; 0A36371A v_subrev_f32_e32 v21, v20, v21 ; 0A2A2B14 v_add_f32_e32 v21, v21, v27 ; 062A3715 v_mul_f32_e32 v27, s23, v19 ; 10362617 v_mac_f32_e32 v27, s22, v16 ; 3E362016 v_mac_f32_e32 v27, s24, v18 ; 3E362418 v_mac_f32_e32 v27, s25, v17 ; 3E362219 v_mul_f32_e32 v28, s27, v19 ; 1038261B v_mac_f32_e32 v28, s26, v16 ; 3E38201A v_mac_f32_e32 v28, s28, v18 ; 3E38241C v_mac_f32_e32 v28, s30, v17 ; 3E38221E v_mov_b32_e32 v29, 0x80000000 ; 7E3A02FF 80000000 v_cmp_le_f32_e64 vcc, |v21|, v29 ; D006016A 00023B15 v_cndmask_b32_e32 v21, v27, v26 ; 002A351B v_cndmask_b32_e32 v20, v28, v20 ; 0028291C v_cndmask_b32_e64 v26, 2.0, 1.0, vcc ; D200001A 01A9E4F4 v_add_f32_e64 v27, 0, v23 clamp ; D206081B 00022E80 v_add_f32_e64 v28, 0, v24 clamp ; D206081C 00023080 v_subrev_f32_e32 v27, v23, v27 ; 0A363717 v_subrev_f32_e32 v28, v24, v28 ; 0A383918 v_add_f32_e32 v27, v28, v27 ; 0636371C v_cmp_le_f32_e64 vcc, |v27|, v29 ; D006016A 00023B1B v_cndmask_b32_e32 v23, v21, v23 ; 002E2F15 v_cndmask_b32_e32 v24, v20, v24 ; 00303114 v_cndmask_b32_e64 v26, v26, 0, vcc ; D200001A 01A9011A v_mov_b32_e32 v20, s2 ; 7E280202 v_mov_b32_e32 v21, s50 ; 7E2A0232 v_cmp_le_f32_e64 vcc, |v26|, v29 ; D006016A 00023B1A v_cndmask_b32_e32 v22, 0, v22 ; 002C2C80 v_cndmask_b32_e32 v25, 0, v25 ; 00323280 v_cndmask_b32_e32 v20, 0, v20 ; 00282880 v_cndmask_b32_e32 v21, 0, v21 ; 002A2A80 s_buffer_load_dword s2, s[12:15], 0x11f ; C2010CFF 0000011F v_add_f32_e32 v27, -1.0, v26 ; 063634F3 v_mov_b32_e32 v28, s34 ; 7E380222 v_cmp_le_f32_e64 vcc, |v27|, v29 ; D006016A 00023B1B v_cndmask_b32_e32 v22, v22, v28 ; 002C3916 v_mov_b32_e32 v27, s36 ; 7E360224 v_cndmask_b32_e32 v25, v25, v27 ; 00323719 v_mov_b32_e32 v27, s31 ; 7E36021F v_cndmask_b32_e32 v27, v20, v27 ; 00363714 v_mov_b32_e32 v20, s32 ; 7E280220 v_cndmask_b32_e32 v28, v21, v20 ; 00382915 v_mul_f32_e32 v20, s55, v19 ; 10282637 v_mac_f32_e32 v20, s54, v16 ; 3E282036 v_mac_f32_e32 v20, s56, v18 ; 3E282438 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v20, s2, v17 ; 3E282202 v_add_f32_e32 v21, -2.0, v26 ; 062A34F5 v_mov_b32_e32 v30, s37 ; 7E3C0225 v_cmp_le_f32_e64 vcc, |v21|, v29 ; D006016A 00023B15 v_cndmask_b32_e32 v29, v22, v30 ; 003A3D16 v_mov_b32_e32 v21, s38 ; 7E2A0226 v_cndmask_b32_e32 v25, v25, v21 ; 00322B19 v_mov_b32_e32 v21, s33 ; 7E2A0221 v_cndmask_b32_e32 v21, v27, v21 ; 002A2B1B v_mov_b32_e32 v22, s35 ; 7E2C0223 v_cndmask_b32_e32 v22, v28, v22 ; 002C2D1C v_add_f32_e64 v27, 0, v23 clamp ; D206081B 00022E80 v_add_f32_e64 v28, 0, v24 clamp ; D206081C 00023080 v_mac_f32_e32 v21, v29, v27 ; 3E2A371D v_mac_f32_e32 v22, v25, v28 ; 3E2C3919 v_mov_b32_e32 v25, 0x3a000000 ; 7E3202FF 3A000000 v_add_f32_e32 v28, v25, v21 ; 06382B19 v_add_f32_e32 v29, v25, v22 ; 063A2D19 v_add_f32_e32 v27, 0, v20 ; 06362880 s_mov_b32 s50, 0 ; BEB20380 s_load_dwordx4 s[52:55], s[4:5], 0x4 ; C09A0504 v_mov_b32_e32 v30, s50 ; 7E3C0232 v_mov_b32_e32 v25, 0xba000000 ; 7E3202FF BA000000 v_add_f32_e32 v31, v25, v21 ; 063E2B19 v_mov_b32_e32 v32, v27 ; 7E40031B v_mov_b32_e32 v33, v28 ; 7E42031C v_mov_b32_e32 v34, v29 ; 7E44031D v_mov_b32_e32 v35, v30 ; 7E46031E v_mov_b32_e32 v33, v31 ; 7E42031F v_add_f32_e32 v23, -0.5, v23 ; 062E2EF1 v_add_f32_e32 v24, -0.5, v24 ; 063030F1 v_add_f32_e32 v25, v25, v22 ; 06322D19 v_mov_b32_e32 v36, v27 ; 7E48031B v_mov_b32_e32 v37, v28 ; 7E4A031C v_mov_b32_e32 v38, v29 ; 7E4C031D v_mov_b32_e32 v39, v30 ; 7E4E031E v_sub_f32_e64 v23, |v23|, s1 ; D2080117 00000317 v_mov_b32_e32 v34, v29 ; 7E44031D v_mov_b32_e32 v38, v25 ; 7E4C0319 v_sub_f32_e64 v24, |v24|, s1 ; D2080118 00000318 v_mov_b32_e32 v35, s50 ; 7E460232 v_mov_b32_e32 v39, s50 ; 7E4E0232 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_c_l v31, 1, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[52:55] ; F0B00100 01AA1F1B image_sample_c_l v40, 1, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[40:47], s[52:55] ; F0B00100 01AA2820 v_mov_b32_e32 v34, v25 ; 7E440319 image_sample_c_l v36, 1, 0, 0, 0, 0, 0, 0, 0, v[36:39], s[40:47], s[52:55] ; F0B00100 01AA2424 v_mov_b32_e32 v35, s50 ; 7E460232 image_sample_c_l v37, 1, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[40:47], s[52:55] ; F0B00100 01AA2520 v_add_f32_e32 v34, 0, v22 ; 06442C80 v_mov_b32_e32 v41, v27 ; 7E52031B v_mov_b32_e32 v42, v28 ; 7E54031C v_mov_b32_e32 v43, v29 ; 7E56031D v_mov_b32_e32 v44, v30 ; 7E58031E v_mul_f32_e32 v23, s0, v23 ; 102E2E00 v_mov_b32_e32 v43, v34 ; 7E560322 v_add_f32_e32 v28, 0, v21 ; 06382A80 v_mul_f32_e32 v24, s0, v24 ; 10303000 v_mov_b32_e32 v44, s50 ; 7E580232 v_mov_b32_e32 v45, v27 ; 7E5A031B v_mov_b32_e32 v46, v28 ; 7E5C031C v_mov_b32_e32 v47, v29 ; 7E5E031D v_mov_b32_e32 v48, v30 ; 7E60031E image_sample_c_l v38, 1, 0, 0, 0, 0, 0, 0, 0, v[41:44], s[40:47], s[52:55] ; F0B00100 01AA2629 v_mov_b32_e32 v47, v25 ; 7E5E0319 v_mov_b32_e32 v35, s50 ; 7E460232 image_sample_c_l v25, 1, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[40:47], s[52:55] ; F0B00100 01AA1920 v_mov_b32_e32 v48, s50 ; 7E600232 image_sample_c_l v32, 1, 0, 0, 0, 0, 0, 0, 0, v[45:48], s[40:47], s[52:55] ; F0B00100 01AA202D v_mov_b32_e32 v30, s50 ; 7E3C0232 image_sample_c_l v27, 1, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[52:55] ; F0B00100 01AA1B1B v_mov_b32_e32 v28, 0x3d800000 ; 7E3802FF 3D800000 s_waitcnt vmcnt(6) ; BF8C0776 v_mul_f32_e32 v29, v28, v40 ; 103A511C v_mac_f32_e32 v29, v28, v31 ; 3E3A3F1C s_waitcnt vmcnt(5) ; BF8C0775 v_mac_f32_e32 v29, v28, v36 ; 3E3A491C s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v29, v28, v37 ; 3E3A4B1C v_mov_b32_e32 v28, 0x3e000000 ; 7E3802FF 3E000000 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v30, v28, v25 ; 103C331C v_mac_f32_e32 v30, v28, v38 ; 3E3C4D1C s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v30, v28, v32 ; 3E3C411C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v30, v28, v27 ; 3E3C371C s_buffer_load_dword s56, s[12:15], 0x165 ; C21C0CFF 00000165 s_buffer_load_dword s51, s[12:15], 0x166 ; C2198CFF 00000166 v_add_f32_e64 v23, 0, v23 clamp ; D2060817 00022E80 v_add_f32_e64 v24, 0, v24 clamp ; D2060818 00023080 v_sub_f32_e32 v23, 1.0, v23 ; 082E2EF2 v_mad_f32 v25, -v24, v23, v23 ; D2820019 245E2F18 v_mov_b32_e32 v23, 0 ; 7E2E0280 v_add_f32_e32 v24, v29, v30 ; 06303D1D image_sample_c_l v21, 1, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[40:47], s[52:55] ; F0B00100 01AA1514 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v24, v21, v24, 0x3e800000 ; 40303115 3E800000 v_cmp_gt_f32_e32 vcc, 1.0, v25 ; 7C0832F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[58:59], vcc ; BEBA246A s_xor_b64 s[58:59], exec, s[58:59] ; 89BA3A7E s_cbranch_execz BB0_5 ; BF880000 v_mov_b32_e32 v21, s39 ; 7E2A0227 v_mov_b32_e32 v22, s29 ; 7E2C021D v_mov_b32_e32 v23, s48 ; 7E2E0230 v_mov_b32_e32 v27, s49 ; 7E360231 v_mov_b32_e32 v28, s17 ; 7E380211 v_mov_b32_e32 v29, s18 ; 7E3A0212 v_mov_b32_e32 v30, s20 ; 7E3C0214 v_mov_b32_e32 v31, s21 ; 7E3E0215 v_mov_b32_e32 v32, s22 ; 7E400216 v_mov_b32_e32 v33, s23 ; 7E420217 v_mov_b32_e32 v34, s24 ; 7E440218 v_mov_b32_e32 v35, s25 ; 7E460219 v_mov_b32_e32 v36, s26 ; 7E48021A s_buffer_load_dword s0, s[12:15], 0x144 ; C2000CFF 00000144 s_buffer_load_dword s2, s[12:15], 0x145 ; C2010CFF 00000145 s_buffer_load_dword s3, s[12:15], 0x146 ; C2018CFF 00000146 s_buffer_load_dword s57, s[12:15], 0x147 ; C21C8CFF 00000147 s_buffer_load_dword s60, s[12:15], 0x148 ; C21E0CFF 00000148 s_buffer_load_dword s61, s[12:15], 0x149 ; C21E8CFF 00000149 s_buffer_load_dword s62, s[12:15], 0x14a ; C21F0CFF 0000014A s_buffer_load_dword s63, s[12:15], 0x14b ; C21F8CFF 0000014B s_buffer_load_dword s64, s[12:15], 0x160 ; C2200CFF 00000160 s_buffer_load_dword s65, s[12:15], 0x161 ; C2208CFF 00000161 s_buffer_load_dword s66, s[12:15], 0x162 ; C2210CFF 00000162 s_buffer_load_dword s67, s[12:15], 0x163 ; C2218CFF 00000163 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v37, s0 ; 7E4A0200 v_add_f32_e32 v38, 0, v26 ; 064C3480 v_mov_b32_e32 v39, 0x80000000 ; 7E4E02FF 80000000 v_cmp_le_f32_e64 vcc, |v38|, v39 ; D006016A 00024F26 v_add_f32_e32 v38, -1.0, v26 ; 064C34F3 v_cmp_le_f32_e64 s[0:1], |v38|, v39 ; D0060100 00024F26 v_cndmask_b32_e32 v21, 0, v21 ; 002A2A80 v_cndmask_b32_e64 v21, v21, v32, s[0:1] ; D2000015 00024115 v_cndmask_b32_e32 v22, 0, v22 ; 002C2C80 v_cndmask_b32_e64 v22, v22, v33, s[0:1] ; D2000016 00024316 v_cndmask_b32_e32 v23, 0, v23 ; 002E2E80 v_cndmask_b32_e64 v23, v23, v34, s[0:1] ; D2000017 00024517 v_mov_b32_e32 v32, s2 ; 7E400202 v_cndmask_b32_e32 v27, 0, v27 ; 00363680 v_cndmask_b32_e64 v27, v27, v35, s[0:1] ; D200001B 0002471B v_cndmask_b32_e32 v28, 0, v28 ; 00383880 v_cndmask_b32_e64 v28, v28, v36, s[0:1] ; D200001C 0002491C v_mov_b32_e32 v33, s27 ; 7E42021B v_cndmask_b32_e32 v29, 0, v29 ; 003A3A80 v_cndmask_b32_e64 v29, v29, v33, s[0:1] ; D200001D 0002431D v_mov_b32_e32 v33, s28 ; 7E42021C v_cndmask_b32_e32 v30, 0, v30 ; 003C3C80 v_cndmask_b32_e64 v30, v30, v33, s[0:1] ; D200001E 0002431E v_mov_b32_e32 v33, s30 ; 7E42021E v_add_f32_e32 v26, -2.0, v26 ; 063434F5 v_cndmask_b32_e32 v31, 0, v31 ; 003E3E80 v_cndmask_b32_e64 v31, v31, v33, s[0:1] ; D200001F 0002431F v_mov_b32_e32 v33, s3 ; 7E420203 v_mov_b32_e32 v34, s57 ; 7E440239 v_cmp_le_f32_e64 s[2:3], |v26|, v39 ; D0060102 00024F1A v_mov_b32_e32 v35, s60 ; 7E46023C v_cndmask_b32_e64 v21, v21, v37, s[2:3] ; D2000015 000A4B15 v_mov_b32_e32 v36, s61 ; 7E48023D v_cndmask_b32_e64 v22, v22, v32, s[2:3] ; D2000016 000A4116 v_mov_b32_e32 v32, s31 ; 7E40021F v_cndmask_b32_e64 v23, v23, v33, s[2:3] ; D2000017 000A4317 v_mov_b32_e32 v33, s32 ; 7E420220 v_cndmask_b32_e64 v27, v27, v34, s[2:3] ; D200001B 000A451B v_mov_b32_e32 v34, s34 ; 7E440222 v_cndmask_b32_e64 v28, v28, v35, s[2:3] ; D200001C 000A471C v_mov_b32_e32 v35, s36 ; 7E460224 v_cndmask_b32_e64 v29, v29, v36, s[2:3] ; D200001D 000A491D v_mov_b32_e32 v36, s62 ; 7E48023E v_cndmask_b32_e64 v30, v30, v36, s[2:3] ; D200001E 000A491E v_mov_b32_e32 v36, s63 ; 7E48023F v_cndmask_b32_e64 v31, v31, v36, s[2:3] ; D200001F 000A491F v_mov_b32_e32 v36, s33 ; 7E480221 v_mul_f32_e32 v22, v22, v19 ; 102C2716 v_mac_f32_e32 v22, v21, v16 ; 3E2C2115 v_mov_b32_e32 v21, s35 ; 7E2A0223 v_mac_f32_e32 v22, v23, v18 ; 3E2C2517 v_mov_b32_e32 v23, s37 ; 7E2E0225 v_mac_f32_e32 v22, v27, v17 ; 3E2C231B v_mov_b32_e32 v27, s38 ; 7E360226 v_add_f32_e64 v37, 0, v22 clamp ; D2060825 00022C80 v_mul_f32_e32 v19, v29, v19 ; 1026271D v_mac_f32_e32 v19, v28, v16 ; 3E26211C v_mac_f32_e32 v19, v30, v18 ; 3E26251E v_mac_f32_e32 v19, v31, v17 ; 3E26231F v_add_f32_e64 v16, 0, v19 clamp ; D2060810 00022680 v_cndmask_b32_e32 v17, 0, v34 ; 00224480 v_cndmask_b32_e32 v18, 0, v35 ; 00244680 v_cndmask_b32_e32 v19, 0, v32 ; 00264080 v_cndmask_b32_e32 v22, 0, v33 ; 002C4280 v_cndmask_b32_e64 v17, v17, v23, s[0:1] ; D2000011 00022F11 v_cndmask_b32_e64 v18, v18, v27, s[0:1] ; D2000012 00023712 v_cndmask_b32_e64 v19, v19, v36, s[0:1] ; D2000013 00024913 v_cndmask_b32_e64 v22, v22, v21, s[0:1] ; D2000016 00022B16 v_mov_b32_e32 v21, s66 ; 7E2A0242 v_cndmask_b32_e64 v17, v17, v21, s[2:3] ; D2000011 000A2B11 v_mov_b32_e32 v21, s67 ; 7E2A0243 v_cndmask_b32_e64 v18, v18, v21, s[2:3] ; D2000012 000A2B12 v_mov_b32_e32 v21, s64 ; 7E2A0240 v_cndmask_b32_e64 v21, v19, v21, s[2:3] ; D2000015 000A2B13 v_mov_b32_e32 v19, s65 ; 7E260241 v_cndmask_b32_e64 v22, v22, v19, s[2:3] ; D2000016 000A2716 v_mac_f32_e32 v21, v17, v37 ; 3E2A4B11 v_mac_f32_e32 v22, v18, v16 ; 3E2C2112 v_mov_b32_e32 v16, 0x3a000000 ; 7E2002FF 3A000000 v_add_f32_e32 v28, v16, v21 ; 06382B10 v_add_f32_e32 v29, v16, v22 ; 063A2D10 v_add_f32_e32 v27, 0, v20 ; 06362880 v_mov_b32_e32 v30, s50 ; 7E3C0232 v_mov_b32_e32 v16, 0xba000000 ; 7E2002FF BA000000 v_add_f32_e32 v17, v16, v21 ; 06222B10 v_mov_b32_e32 v31, v27 ; 7E3E031B v_mov_b32_e32 v32, v28 ; 7E40031C v_mov_b32_e32 v33, v29 ; 7E42031D v_mov_b32_e32 v34, v30 ; 7E44031E v_mov_b32_e32 v32, v17 ; 7E400311 v_add_f32_e32 v16, v16, v22 ; 06202D10 v_mov_b32_e32 v33, v29 ; 7E42031D v_mov_b32_e32 v35, v27 ; 7E46031B v_mov_b32_e32 v36, v28 ; 7E48031C v_mov_b32_e32 v37, v29 ; 7E4A031D v_mov_b32_e32 v38, v30 ; 7E4C031E image_sample_c_l v17, 1, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[52:55] ; F0B00100 01AA111B v_mov_b32_e32 v34, s50 ; 7E440232 v_mov_b32_e32 v37, v16 ; 7E4A0310 image_sample_c_l v18, 1, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[40:47], s[52:55] ; F0B00100 01AA121F v_mov_b32_e32 v38, s50 ; 7E4C0232 v_mov_b32_e32 v33, v16 ; 7E420310 image_sample_c_l v19, 1, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[40:47], s[52:55] ; F0B00100 01AA1323 v_mov_b32_e32 v34, s50 ; 7E440232 image_sample_c_l v23, 1, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[40:47], s[52:55] ; F0B00100 01AA171F v_mov_b32_e32 v30, 0x3d800000 ; 7E3C02FF 3D800000 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v18, v30, v18 ; 1024251E v_add_f32_e32 v33, 0, v22 ; 06422C80 v_mov_b32_e32 v34, v27 ; 7E44031B v_mov_b32_e32 v35, v28 ; 7E46031C v_mov_b32_e32 v36, v29 ; 7E48031D v_mov_b32_e32 v37, v30 ; 7E4A031E v_mac_f32_e32 v18, v30, v17 ; 3E24231E v_mov_b32_e32 v36, v33 ; 7E480321 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v18, v30, v19 ; 3E24271E v_mov_b32_e32 v37, s50 ; 7E4A0232 v_add_f32_e32 v28, 0, v21 ; 06382A80 image_sample_c_l v17, 1, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[40:47], s[52:55] ; F0B00100 01AA1122 v_mov_b32_e32 v35, v27 ; 7E46031B v_mov_b32_e32 v36, v28 ; 7E48031C v_mov_b32_e32 v37, v29 ; 7E4A031D v_mov_b32_e32 v38, v30 ; 7E4C031E s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v18, v30, v23 ; 3E242F1E v_mov_b32_e32 v34, s50 ; 7E440232 v_mov_b32_e32 v37, v16 ; 7E4A0310 image_sample_c_l v16, 1, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[40:47], s[52:55] ; F0B00100 01AA101F v_mov_b32_e32 v38, s50 ; 7E4C0232 image_sample_c_l v19, 1, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[40:47], s[52:55] ; F0B00100 01AA1323 v_mov_b32_e32 v30, s50 ; 7E3C0232 image_sample_c_l v23, 1, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[52:55] ; F0B00100 01AA171B v_mov_b32_e32 v27, 0x3e000000 ; 7E3602FF 3E000000 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v16, v27, v16 ; 1020211B v_mac_f32_e32 v16, v27, v17 ; 3E20231B s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v16, v27, v19 ; 3E20271B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v16, v27, v23 ; 3E202F1B v_mov_b32_e32 v23, 0 ; 7E2E0280 image_sample_c_l v17, 1, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[40:47], s[52:55] ; F0B00100 01AA1114 v_add_f32_e32 v16, v18, v16 ; 06202112 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v16, v17, v16, 0x3e800000 ; 40202111 3E800000 v_cmp_le_f32_e32 vcc, 0, v26 ; 7C063480 v_cndmask_b32_e64 v16, v16, 1.0, vcc ; D2000010 01A9E510 v_mad_f32 v16, -v25, v16, v16 ; D2820010 24422119 v_mac_f32_e32 v16, v25, v24 ; 3E203119 v_mov_b32_e32 v24, v16 ; 7E300310 s_or_b64 exec, exec, s[58:59] ; 88FE3A7E v_subrev_f32_e32 v16, s19, v8 ; 0A201013 v_subrev_f32_e32 v17, s56, v10 ; 0A221438 v_subrev_f32_e32 v18, s51, v11 ; 0A241633 v_mul_f32_e32 v16, v16, v16 ; 10202110 v_mac_f32_e32 v16, v17, v17 ; 3E202311 v_mac_f32_e32 v16, v18, v18 ; 3E202512 v_mad_f32 v1, v1, v16, s16 ; D2820001 00422101 v_add_f32_e64 v16, 0, v1 clamp ; D2060810 00020280 v_sub_f32_e32 v1, 1.0, v16 ; 080220F2 v_mac_f32_e32 v16, v1, v24 ; 3E203101 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E s_buffer_load_dword s20, s[12:15], 0x4 ; C20A0D04 s_buffer_load_dword s21, s[12:15], 0x5 ; C20A8D05 s_buffer_load_dword s22, s[12:15], 0x6 ; C20B0D06 s_buffer_load_dword s18, s[12:15], 0x7 ; C2090D07 s_buffer_load_dword s23, s[12:15], 0x30 ; C20B8D30 s_buffer_load_dword s0, s[12:15], 0x31 ; C2000D31 s_buffer_load_dword s19, s[12:15], 0x33 ; C2098D33 s_buffer_load_dword s7, s[12:15], 0x50 ; C2038D50 s_buffer_load_dword s16, s[12:15], 0x51 ; C2080D51 s_buffer_load_dword s17, s[12:15], 0x52 ; C2088D52 s_buffer_load_dword s24, s[12:15], 0x53 ; C20C0D53 s_buffer_load_dword s5, s[12:15], 0x54 ; C2028D54 s_buffer_load_dword s4, s[12:15], 0x56 ; C2020D56 s_buffer_load_dword s1, s[12:15], 0x74 ; C2008D74 s_buffer_load_dword s2, s[12:15], 0x75 ; C2010D75 s_buffer_load_dword s3, s[12:15], 0x76 ; C2018D76 s_buffer_load_dword s6, s[12:15], 0x77 ; C2030D77 v_mov_b32_e32 v17, s9 ; 7E220209 v_mov_b32_e32 v1, s8 ; 7E020208 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v2, v16, v5 ; 3E040B10 v_mac_f32_e32 v3, v16, v6 ; 3E060D10 v_mac_f32_e32 v4, v16, v7 ; 3E080F10 v_mad_f32 v5, v15, s24, -s24 ; D2820005 8060310F v_add_f32_e32 v6, s23, v15 ; 060C1E17 v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 v_mad_f32 v7, s20, v6, -v6 ; D2820007 841A0C14 v_mad_f32 v15, s21, v6, -v6 ; D282000F 841A0C15 v_mad_f32 v6, s22, v6, -v6 ; D2820006 841A0C16 v_mac_f32_e32 v2, v2, v7 ; 3E040F02 v_mac_f32_e32 v3, v3, v15 ; 3E061F03 v_mac_f32_e32 v4, v4, v6 ; 3E080D04 v_mad_f32 v5, v5, s18, s18 ; D2820005 00482505 v_mad_f32 v6, v5, v9, -v5 ; D2820006 84161305 v_mac_f32_e32 v5, s19, v6 ; 3E0A0C13 v_mul_f32_e32 v2, v2, v12 ; 10041902 v_mul_f32_e32 v3, v3, v13 ; 10061B03 v_mul_f32_e32 v4, v4, v14 ; 10081D04 v_sub_f32_e32 v6, s7, v8 ; 080C1007 v_sub_f32_e32 v7, s16, v10 ; 080E1410 v_sub_f32_e32 v8, s17, v11 ; 08101611 v_mul_f32_e32 v6, v6, v6 ; 100C0D06 v_mac_f32_e32 v6, v7, v7 ; 3E0C0F07 v_mac_f32_e32 v6, v8, v8 ; 3E0C1108 v_sqrt_f32_e32 v6, v6 ; 7E0C6706 v_mad_f32 v6, v17, v6, s5 ; D2820006 00160D11 v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 v_min_f32_e32 v6, s4, v6 ; 1E0C0C04 v_mul_f32_e32 v7, s8, v2 ; 100E0408 v_mul_f32_e32 v8, s8, v3 ; 10100608 v_mul_f32_e32 v9, s8, v4 ; 10120808 v_mul_f32_e32 v0, s6, v0 ; 10000006 v_mov_b32_e32 v10, 0x80000000 ; 7E1402FF 80000000 v_cmp_le_f32_e64 vcc, |s0|, v10 ; D006016A 00021400 v_cndmask_b32_e32 v0, v0, v5 ; 00000B00 v_mul_f32_e32 v5, v6, v6 ; 100A0D06 v_mad_f32 v2, -v2, v1, s1 ; D2820002 20060302 v_mad_f32 v3, -v3, v1, s2 ; D2820003 200A0303 v_mad_f32 v1, -v4, v1, s3 ; D2820001 200E0304 v_mac_f32_e32 v7, v2, v5 ; 3E0E0B02 v_mac_f32_e32 v8, v3, v5 ; 3E100B03 v_mac_f32_e32 v9, v1, v5 ; 3E120B01 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 v_cvt_pkrtz_f16_f32_e32 v0, v9, v0 ; 5E000109 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 52 Code Size: 2724 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 1020.0400, 2.1000} IMM[1] FLT32 { 3.1000, 0.1000, 1.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MAD TEMP[1].x, IMM[0].zzzz, IN[1].zzzz, IMM[0].wwww 2: F2I TEMP[1].x, TEMP[1].xxxx 3: UARL ADDR[0].x, TEMP[1].xxxx 4: UARL ADDR[0].x, TEMP[1].xxxx 5: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 6: MAD TEMP[1].x, IMM[0].zzzz, IN[1].zzzz, IMM[1].xxxx 7: F2I TEMP[1].x, TEMP[1].xxxx 8: UARL ADDR[0].x, TEMP[1].xxxx 9: DP4 TEMP[1].x, IN[2], CONST[ADDR[0].x] 10: MOV TEMP[0].y, TEMP[1].xxxx 11: MAD TEMP[1].x, IMM[0].zzzz, IN[1].zzzz, IMM[1].yyyy 12: F2I TEMP[1].x, TEMP[1].xxxx 13: UARL ADDR[0].x, TEMP[1].xxxx 14: MOV TEMP[1], CONST[ADDR[0].x] 15: MAD TEMP[2].x, IMM[0].zzzz, IN[1].zzzz, IMM[1].zzzz 16: F2I TEMP[2].x, TEMP[2].xxxx 17: UARL ADDR[0].x, TEMP[2].xxxx 18: MOV TEMP[2], CONST[ADDR[0].x] 19: MOV OUT[2], IN[1] 20: MOV OUT[3], TEMP[1] 21: MOV OUT[1], IN[0] 22: MOV OUT[4], TEMP[2] 23: MOV OUT[0], TEMP[0] 24: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 %16 = add i32 %5, %8 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %16) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = add i32 %5, %8 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = extractelement <4 x float> %25, i32 3 %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %28, 0x408FE051E0000000 %39 = fadd float %38, 0x4000CCCCC0000000 %40 = fptosi float %39 to i32 %41 = shl i32 %40, 4 %42 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %41) %43 = shl i32 %40, 4 %44 = or i32 %43, 4 %45 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %44) %46 = shl i32 %40, 4 %47 = or i32 %46, 8 %48 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %47) %49 = shl i32 %40, 4 %50 = or i32 %49, 12 %51 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %50) %52 = fmul float %34, %42 %53 = fmul float %35, %45 %54 = fadd float %52, %53 %55 = fmul float %36, %48 %56 = fadd float %54, %55 %57 = fmul float %37, %51 %58 = fadd float %56, %57 %59 = fmul float %28, 0x408FE051E0000000 %60 = fadd float %59, 0x4008CCCCC0000000 %61 = fptosi float %60 to i32 %62 = shl i32 %61, 4 %63 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %62) %64 = shl i32 %61, 4 %65 = or i32 %64, 4 %66 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %65) %67 = shl i32 %61, 4 %68 = or i32 %67, 8 %69 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %68) %70 = shl i32 %61, 4 %71 = or i32 %70, 12 %72 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %71) %73 = fmul float %34, %63 %74 = fmul float %35, %66 %75 = fadd float %73, %74 %76 = fmul float %36, %69 %77 = fadd float %75, %76 %78 = fmul float %37, %72 %79 = fadd float %77, %78 %80 = fmul float %28, 0x408FE051E0000000 %81 = fadd float %80, 0x3FB99999A0000000 %82 = fptosi float %81 to i32 %83 = shl i32 %82, 4 %84 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %83) %85 = shl i32 %82, 4 %86 = or i32 %85, 4 %87 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %86) %88 = shl i32 %82, 4 %89 = or i32 %88, 8 %90 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %89) %91 = shl i32 %82, 4 %92 = or i32 %91, 12 %93 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %92) %94 = fmul float %28, 0x408FE051E0000000 %95 = fadd float %94, 0x3FF19999A0000000 %96 = fptosi float %95 to i32 %97 = shl i32 %96, 4 %98 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %97) %99 = shl i32 %96, 4 %100 = or i32 %99, 4 %101 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %100) %102 = shl i32 %96, 4 %103 = or i32 %102, 8 %104 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %103) %105 = shl i32 %96, 4 %106 = or i32 %105, 12 %107 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %106) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %26, float %27, float %28, float %29) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %84, float %87, float %90, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %98, float %101, float %104, float %107) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %79, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_mov_b32_e32 v1, 0x447f028f ; 7E0202FF 447F028F v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[3:6], v0, s[4:7], 0 idxen ; E00C2000 80010300 buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00 s_waitcnt vmcnt(2) ; BF8C0772 exp 15, 32, 0, 0, 0, v3, v4, v5, v6 ; F800020F 06050403 s_waitcnt vmcnt(1) ; BF8C0771 v_madak_f32_e32 v0, v9, v1, 0x40066666 ; 42000309 40066666 s_waitcnt expcnt(0) ; BF8C070F v_madak_f32_e32 v3, v9, v1, 0x40466666 ; 42060309 40466666 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v3, v3 ; 7E061103 v_madak_f32_e32 v4, v9, v1, 0x3dcccccd ; 42080309 3DCCCCCD v_madak_f32_e32 v1, v9, v1, 0x3f8ccccd ; 42020309 3F8CCCCD v_cvt_i32_f32_e32 v4, v4 ; 7E081104 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v3, 4, v3 ; 34060684 v_lshlrev_b32_e32 v4, 4, v4 ; 34080884 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 buffer_load_dword v5, v0, s[0:3], 0 offen ; E0301000 80000500 v_or_b32_e32 v6, 4, v0 ; 380C0084 v_or_b32_e32 v15, 8, v0 ; 381E0088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v16, v3, s[0:3], 0 offen ; E0301000 80001003 v_or_b32_e32 v17, 4, v3 ; 38220684 v_or_b32_e32 v18, 8, v3 ; 38240688 v_or_b32_e32 v3, 12, v3 ; 3806068C buffer_load_dword v19, v4, s[0:3], 0 offen ; E0301000 80001304 v_or_b32_e32 v20, 4, v4 ; 38280884 v_or_b32_e32 v21, 8, v4 ; 382A0888 v_or_b32_e32 v4, 12, v4 ; 3808088C buffer_load_dword v22, v1, s[0:3], 0 offen ; E0301000 80001601 v_or_b32_e32 v23, 4, v1 ; 382E0284 v_or_b32_e32 v24, 8, v1 ; 38300288 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F buffer_load_dword v18, v18, s[0:3], 0 offen ; E0301000 80001212 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 exp 15, 33, 0, 0, 0, v7, v8, v9, v10 ; F800021F 0A090807 s_waitcnt vmcnt(9) ; BF8C0779 exp 15, 34, 0, 0, 0, v19, v20, v21, v4 ; F800022F 04151413 s_waitcnt vmcnt(8) expcnt(0) ; BF8C0708 v_mul_f32_e32 v4, v6, v12 ; 10081906 s_waitcnt vmcnt(7) ; BF8C0777 v_mul_f32_e32 v6, v17, v12 ; 100C1911 v_mac_f32_e32 v4, v5, v11 ; 3E081705 v_mac_f32_e32 v6, v16, v11 ; 3E0C1710 s_waitcnt vmcnt(4) ; BF8C0774 exp 15, 35, 0, 0, 0, v22, v23, v24, v1 ; F800023F 01181716 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v4, v15, v13 ; 3E081B0F s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v6, v18, v13 ; 3E0C1B12 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v4, v0, v14 ; 3E081D00 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v6, v3, v14 ; 3E0C1D03 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v4, v6, v0, v2 ; F80008CF 02000604 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 28 Code Size: 424 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MAD TEMP[0], IN[0], IN[3], IN[2] 1: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %24 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %25 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %27 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) %36 = fmul float %23, %32 %37 = fadd float %36, %28 %38 = fmul float %24, %33 %39 = fadd float %38, %29 %40 = fmul float %25, %34 %41 = fadd float %40, %30 %42 = fmul float %26, %35 %43 = fadd float %42, %31 %44 = fmul float %43, %27 %45 = call i32 @llvm.SI.packf16(float %37, float %39) %46 = bitcast i32 %45 to float %47 = call i32 @llvm.SI.packf16(float %41, float %44) %48 = bitcast i32 %47 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %46, float %48, float %46, float %48) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 2, 3, [m0] ; C8340E00 v_interp_p2_f32 v13, [v13], v1, 2, 3, [m0] ; C8350E01 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 v_mac_f32_e32 v7, v11, v2 ; 3E0E050B v_mac_f32_e32 v8, v12, v3 ; 3E10070C v_mac_f32_e32 v9, v13, v4 ; 3E12090D v_mac_f32_e32 v10, v0, v5 ; 3E140B00 v_mul_f32_e32 v0, v6, v10 ; 10001506 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 v_cvt_pkrtz_f16_f32_e32 v0, v9, v0 ; 5E000109 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 148 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[2] 2: DP4 TEMP[1].x, IN[2], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MAD TEMP[1], IN[0], CONST[1], CONST[0] 5: MOV TEMP[2].xy, IN[1].xyxx 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: MOV OUT[2], TEMP[2] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 %46 = add i32 %5, %8 %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = fmul float %48, %22 %53 = fmul float %49, %23 %54 = fadd float %52, %53 %55 = fmul float %50, %24 %56 = fadd float %54, %55 %57 = fmul float %51, %25 %58 = fadd float %56, %57 %59 = fmul float %48, %26 %60 = fmul float %49, %27 %61 = fadd float %59, %60 %62 = fmul float %50, %28 %63 = fadd float %61, %62 %64 = fmul float %51, %29 %65 = fadd float %63, %64 %66 = fmul float %34, %18 %67 = fadd float %66, %14 %68 = fmul float %35, %19 %69 = fadd float %68, %15 %70 = fmul float %36, %20 %71 = fadd float %70, %16 %72 = fmul float %37, %21 %73 = fadd float %72, %17 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %67, float %69, float %71, float %73) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %42, float %43, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %65, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103 s_buffer_load_dword s20, s[0:3], 0x4 ; C20A0104 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s12, s[0:3], 0x9 ; C2060109 s_buffer_load_dword s13, s[0:3], 0xa ; C206810A s_buffer_load_dword s14, s[0:3], 0xb ; C207010B s_buffer_load_dword s15, s[0:3], 0xc ; C207810C s_buffer_load_dword s16, s[0:3], 0xd ; C208010D s_buffer_load_dword s17, s[0:3], 0xe ; C208810E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s8 ; 7E000208 v_mov_b32_e32 v12, s9 ; 7E180209 v_mov_b32_e32 v13, s10 ; 7E1A020A v_mov_b32_e32 v14, s11 ; 7E1C020B v_mac_f32_e32 v0, s20, v2 ; 3E000414 v_mac_f32_e32 v12, s4, v3 ; 3E180604 v_mac_f32_e32 v13, s5, v4 ; 3E1A0805 v_mac_f32_e32 v14, s6, v5 ; 3E1C0A06 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, s12, v9 ; 1004120C exp 15, 32, 0, 0, 0, v0, v12, v13, v14 ; F800020F 0E0D0C00 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s16, v9 ; 10001210 v_mac_f32_e32 v2, s7, v8 ; 3E041007 v_mac_f32_e32 v0, s15, v8 ; 3E00100F v_mac_f32_e32 v2, s13, v10 ; 3E04140D v_mac_f32_e32 v0, s17, v10 ; 3E001411 exp 15, 33, 0, 0, 0, v6, v7, v0, v0 ; F800021F 00000706 v_mac_f32_e32 v2, s14, v11 ; 3E04160E s_waitcnt expcnt(0) ; BF8C070F v_mac_f32_e32 v0, s0, v11 ; 3E001600 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 12, 0, 1, 0, v2, v0, v3, v1 ; F80008CF 01030002 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 232 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MOV TEMP[1].xy, IN[1].xyyy 2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 3: MUL TEMP[1].x, IN[0].wwww, TEMP[1].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %33 = bitcast float %31 to i32 %34 = bitcast float %32 to i32 %35 = insertelement <2 x i32> undef, i32 %33, i32 0 %36 = insertelement <2 x i32> %35, i32 %34, i32 1 %37 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %36, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %38 = extractelement <4 x float> %37, i32 3 %39 = fmul float %30, %38 %40 = call i32 @llvm.SI.packf16(float %27, float %28) %41 = bitcast i32 %40 to float %42 = call i32 @llvm.SI.packf16(float %29, float %39) %43 = bitcast i32 %42 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %41, float %43, float %41, float %43) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800800 00020006 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 104 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..47] DCL TEMP[0..1], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 510.0200, 0.1000} IMM[1] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MAD TEMP[1].x, IMM[0].zzzz, IN[1].zzzz, IMM[0].wwww 2: F2I TEMP[1].x, TEMP[1].xxxx 3: UARL ADDR[0].x, TEMP[1].xxxx 4: UARL ADDR[0].x, TEMP[1].xxxx 5: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 6: MAD TEMP[1].x, IMM[0].zzzz, IN[1].zzzz, IMM[1].xxxx 7: F2I TEMP[1].x, TEMP[1].xxxx 8: UARL ADDR[0].x, TEMP[1].xxxx 9: DP4 TEMP[1].x, IN[2], CONST[ADDR[0].x] 10: MOV TEMP[0].y, TEMP[1].xxxx 11: MOV OUT[2], IN[1] 12: MOV OUT[1], IN[0] 13: MOV OUT[0], TEMP[0] 14: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 %16 = add i32 %5, %8 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %16) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = add i32 %5, %8 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = extractelement <4 x float> %25, i32 3 %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %28, 0x407FE051E0000000 %39 = fadd float %38, 0x3FB99999A0000000 %40 = fptosi float %39 to i32 %41 = shl i32 %40, 4 %42 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %41) %43 = shl i32 %40, 4 %44 = or i32 %43, 4 %45 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %44) %46 = shl i32 %40, 4 %47 = or i32 %46, 8 %48 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %47) %49 = shl i32 %40, 4 %50 = or i32 %49, 12 %51 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %50) %52 = fmul float %34, %42 %53 = fmul float %35, %45 %54 = fadd float %52, %53 %55 = fmul float %36, %48 %56 = fadd float %54, %55 %57 = fmul float %37, %51 %58 = fadd float %56, %57 %59 = fmul float %28, 0x407FE051E0000000 %60 = fadd float %59, 0x3FF19999A0000000 %61 = fptosi float %60 to i32 %62 = shl i32 %61, 4 %63 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %62) %64 = shl i32 %61, 4 %65 = or i32 %64, 4 %66 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %65) %67 = shl i32 %61, 4 %68 = or i32 %67, 8 %69 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %68) %70 = shl i32 %61, 4 %71 = or i32 %70, 12 %72 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %71) %73 = fmul float %34, %63 %74 = fmul float %35, %66 %75 = fadd float %73, %74 %76 = fmul float %36, %69 %77 = fadd float %75, %76 %78 = fmul float %37, %72 %79 = fadd float %77, %78 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %26, float %27, float %28, float %29) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %79, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_mov_b32_e32 v1, 0x43ff028f ; 7E0202FF 43FF028F v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[3:6], v0, s[4:7], 0 idxen ; E00C2000 80010300 buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00 s_waitcnt vmcnt(2) ; BF8C0772 exp 15, 32, 0, 0, 0, v3, v4, v5, v6 ; F800020F 06050403 s_waitcnt vmcnt(1) ; BF8C0771 v_madak_f32_e32 v0, v9, v1, 0x3dcccccd ; 42000309 3DCCCCCD v_madak_f32_e32 v1, v9, v1, 0x3f8ccccd ; 42020309 3F8CCCCD v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 s_waitcnt expcnt(0) ; BF8C070F buffer_load_dword v3, v0, s[0:3], 0 offen ; E0301000 80000300 v_or_b32_e32 v4, 4, v0 ; 38080084 v_or_b32_e32 v5, 8, v0 ; 380A0088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v6, v1, s[0:3], 0 offen ; E0301000 80000601 v_or_b32_e32 v15, 4, v1 ; 381E0284 buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F v_or_b32_e32 v16, 8, v1 ; 38200288 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 exp 15, 33, 0, 0, 0, v7, v8, v9, v10 ; F800021F 0A090807 s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v4, v4, v12 ; 10081904 s_waitcnt vmcnt(4) expcnt(0) ; BF8C0704 v_mul_f32_e32 v7, v15, v12 ; 100E190F v_mac_f32_e32 v4, v3, v11 ; 3E081703 v_mac_f32_e32 v7, v6, v11 ; 3E0E1706 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v4, v5, v13 ; 3E081B05 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v7, v16, v13 ; 3E0E1B10 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v4, v0, v14 ; 3E081D00 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v7, v1, v14 ; 3E0E1D01 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v4, v7, v0, v2 ; F80008CF 02000704 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 20 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MUL TEMP[1].x, IN[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %24 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %25 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %27 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %28 = fmul float %26, %27 %29 = call i32 @llvm.SI.packf16(float %23, float %24) %30 = bitcast i32 %29 to float %31 = call i32 @llvm.SI.packf16(float %25, float %28) %32 = bitcast i32 %31 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %30, float %32, float %30, float %32) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 6.0000, 1.0000, -0.5800, 10.0000} IMM[1] FLT32 { -2.0000, 3.0000, -1.0000, -0.0000} IMM[2] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xw, TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[1].xyyy 3: TEX TEMP[1].y, TEMP[1], SAMP[1], 2D 4: MAD TEMP[0].x, TEMP[1].yyyy, TEMP[0].xxxx, TEMP[0].wwww 5: MAD TEMP[2].x, CONST[3].wwww, IMM[0].xxxx, IMM[0].yyyy 6: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx, IMM[0].zzzz 7: MUL TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 8: MOV_SAT TEMP[2].x, TEMP[0].xxxx 9: MAD TEMP[3].x, TEMP[2].xxxx, IMM[1].xxxx, IMM[1].yyyy 10: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[2].xxxx 11: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx 12: MOV TEMP[2].xy, IN[0].xyyy 13: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 14: MAD TEMP[2].xyz, CONST[3].yyyy, IMM[1].zwww, TEMP[2].xyzz 15: MUL TEMP[1].xyz, IMM[2].xyyy, CONST[3].yyyy 16: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[2].xyzz, TEMP[1].xyzz 17: MOV TEMP[0].w, IMM[0].yyyy 18: MOV OUT[0], TEMP[0] 19: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %27 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 %29 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %30 = load <4 x i32>, <4 x i32> addrspace(2)* %29, align 16, !tbaa !0 %31 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %32 = load <8 x i32>, <8 x i32> addrspace(2)* %31, align 32, !tbaa !0 %33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %34 = load <4 x i32>, <4 x i32> addrspace(2)* %33, align 16, !tbaa !0 %35 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %36 = load <8 x i32>, <8 x i32> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !tbaa !0 %39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %41 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %42 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %43 = bitcast float %39 to i32 %44 = bitcast float %40 to i32 %45 = insertelement <2 x i32> undef, i32 %43, i32 0 %46 = insertelement <2 x i32> %45, i32 %44, i32 1 %47 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %46, <8 x i32> %28, <4 x i32> %30, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 3 %50 = bitcast float %41 to i32 %51 = bitcast float %42 to i32 %52 = insertelement <2 x i32> undef, i32 %50, i32 0 %53 = insertelement <2 x i32> %52, i32 %51, i32 1 %54 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %53, <8 x i32> %32, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %55 = extractelement <4 x float> %54, i32 1 %56 = fmul float %55, %48 %57 = fadd float %56, %49 %58 = fmul float %26, 6.000000e+00 %59 = fadd float %58, 1.000000e+00 %60 = fmul float %57, %59 %61 = fadd float %60, 0xBFE28F5C20000000 %62 = fmul float %61, 1.000000e+01 %63 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00) %64 = fmul float %63, -2.000000e+00 %65 = fadd float %64, 3.000000e+00 %66 = fmul float %63, %63 %67 = fmul float %66, %65 %68 = bitcast float %39 to i32 %69 = bitcast float %40 to i32 %70 = insertelement <2 x i32> undef, i32 %68, i32 0 %71 = insertelement <2 x i32> %70, i32 %69, i32 1 %72 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %71, <8 x i32> %36, <4 x i32> %38, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = extractelement <4 x float> %72, i32 2 %76 = fsub float %73, %25 %77 = fmul float %25, -0.000000e+00 %78 = fadd float %77, %74 %79 = fmul float %25, -0.000000e+00 %80 = fadd float %79, %75 %81 = fmul float %25, 0.000000e+00 %82 = fmul float %25, 0.000000e+00 %83 = fmul float %67, %76 %84 = fadd float %83, %25 %85 = fmul float %67, %78 %86 = fadd float %85, %81 %87 = fmul float %67, %80 %88 = fadd float %87, %82 %89 = call i32 @llvm.SI.packf16(float %84, float %86) %90 = bitcast i32 %89 to float %91 = call i32 @llvm.SI.packf16(float %88, float 1.000000e+00) %92 = bitcast i32 %91 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %90, float %92, float %90, float %92) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0xf ; C202010F v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 image_sample v[0:1], 9, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800900 00450002 image_sample v4, 2, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[28:35], s[12:15] ; F0800200 00670404 image_sample v[5:7], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[16:19] ; F0800700 00890502 s_buffer_load_dword s0, s[0:3], 0xd ; C200010D s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v1, v0, v4 ; 3E020900 v_mov_b32_e32 v0, 0x40c00000 ; 7E0002FF 40C00000 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, 1.0 ; D2820000 03C80900 v_madak_f32_e32 v0, v1, v0, 0xbf147ae1 ; 42000101 BF147AE1 v_mul_f32_e32 v0, 0x41200000, v0 ; 100000FF 41200000 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_madak_f32_e32 v1, -2.0, v0, 0x40400000 ; 420200F5 40400000 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v0, v1, v0 ; 10000101 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v1, s0, v5 ; 0A020A00 v_mov_b32_e32 v2, 0x80000000 ; 7E0402FF 80000000 v_mad_f32 v3, s0, v2, v6 ; D2820003 041A0400 v_mac_f32_e32 v7, s0, v2 ; 3E0E0400 v_mul_f32_e64 v2, 0, s0 ; D2100002 00000080 v_mad_f32 v1, v0, v1, s0 ; D2820001 00020300 v_mad_f32 v3, v3, v0, v2 ; D2820003 040A0103 v_mac_f32_e32 v2, v7, v0 ; 3E040107 v_cvt_pkrtz_f16_f32_e32 v0, v1, v3 ; 5E000701 v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 8 Code Size: 252 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { -0.5000, -1.0000, 10.0000, -10.0000} IMM[1] FLT32 { -2.0000, 3.0000, 6.0000, 1.0000} IMM[2] FLT32 { -0.9900, 100.0000, 1.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xw, TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[1].xyyy 3: TEX TEMP[1].y, TEMP[1], SAMP[1], 2D 4: MAD TEMP[0].x, TEMP[1].yyyy, TEMP[0].xxxx, TEMP[0].wwww 5: MOV TEMP[1].xy, IN[0].zwww 6: TEX TEMP[1].w, TEMP[1], SAMP[4], 2D 7: ADD TEMP[2].xy, TEMP[1].wwww, IMM[0].xyyy 8: ADD TEMP[3].x, TEMP[1].wwww, TEMP[1].wwww 9: MOV_SAT TEMP[3].x, TEMP[3].xxxx 10: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[0].zwww 11: MOV_SAT TEMP[2].xy, TEMP[2].xyyy 12: MAD TEMP[1].xy, TEMP[2].xyyy, IMM[1].xxxx, IMM[1].yyyy 13: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[2].xyyy 14: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[1].xyyy 15: MUL TEMP[1].x, TEMP[1].yyyy, TEMP[1].xxxx 16: MAD TEMP[2].x, CONST[3].wwww, IMM[1].zzzz, IMM[1].wwww 17: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx, TEMP[1].xxxx 18: ADD TEMP[0].x, TEMP[0].xxxx, IMM[2].xxxx 19: MUL TEMP[0].x, TEMP[0].xxxx, IMM[2].yyyy 20: MOV_SAT TEMP[1].x, TEMP[0].xxxx 21: MAD TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx, IMM[1].yyyy 22: MUL TEMP[0].x, TEMP[1].xxxx, TEMP[1].xxxx 23: MAD TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx, IMM[1].wwww 24: MOV TEMP[1].xy, IN[0].xyyy 25: TEX TEMP[1].xyz, TEMP[1], SAMP[3], 2D 26: ADD TEMP[0].x, TEMP[0].xxxx, -TEMP[1].xxxx 27: MOV_SAT TEMP[0].x, TEMP[0].xxxx 28: MAX TEMP[1].x, TEMP[1].yyyy, TEMP[1].zzzz 29: MAX TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx 30: MOV TEMP[2].xy, IN[0].xyyy 31: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 32: ADD TEMP[4].x, -TEMP[3].xxxx, TEMP[2].xxxx 33: MAD TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx, TEMP[3].xxxx 34: ADD TEMP[3].x, TEMP[2].yyyy, IMM[0].yyyy 35: MAD TEMP[2].xy, TEMP[2].zzzz, IMM[2].zwww, IMM[2].wzzz 36: MOV TEMP[1].zw, TEMP[2].yyxy 37: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx, IMM[1].wwww 38: MOV TEMP[1].y, TEMP[0].xxxx 39: MOV OUT[0], TEMP[1] 40: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %26 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %27 = load <8 x i32>, <8 x i32> addrspace(2)* %26, align 32, !tbaa !0 %28 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %29 = load <4 x i32>, <4 x i32> addrspace(2)* %28, align 16, !tbaa !0 %30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, align 32, !tbaa !0 %32 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %35 = load <8 x i32>, <8 x i32> addrspace(2)* %34, align 32, !tbaa !0 %36 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %37 = load <4 x i32>, <4 x i32> addrspace(2)* %36, align 16, !tbaa !0 %38 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !tbaa !0 %40 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %41 = load <4 x i32>, <4 x i32> addrspace(2)* %40, align 16, !tbaa !0 %42 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %43 = load <8 x i32>, <8 x i32> addrspace(2)* %42, align 32, !tbaa !0 %44 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %45 = load <4 x i32>, <4 x i32> addrspace(2)* %44, align 16, !tbaa !0 %46 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %47 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %48 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %49 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %50 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %51 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %52 = bitcast float %46 to i32 %53 = bitcast float %47 to i32 %54 = insertelement <2 x i32> undef, i32 %52, i32 0 %55 = insertelement <2 x i32> %54, i32 %53, i32 1 %56 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %55, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 3 %59 = bitcast float %50 to i32 %60 = bitcast float %51 to i32 %61 = insertelement <2 x i32> undef, i32 %59, i32 0 %62 = insertelement <2 x i32> %61, i32 %60, i32 1 %63 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %62, <8 x i32> %31, <4 x i32> %33, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %64 = extractelement <4 x float> %63, i32 1 %65 = fmul float %64, %57 %66 = fadd float %65, %58 %67 = bitcast float %48 to i32 %68 = bitcast float %49 to i32 %69 = insertelement <2 x i32> undef, i32 %67, i32 0 %70 = insertelement <2 x i32> %69, i32 %68, i32 1 %71 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %70, <8 x i32> %43, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %72 = extractelement <4 x float> %71, i32 3 %73 = fadd float %72, -5.000000e-01 %74 = fadd float %72, -1.000000e+00 %75 = fadd float %72, %72 %76 = call float @llvm.AMDIL.clamp.(float %75, float 0.000000e+00, float 1.000000e+00) %77 = fmul float %73, 1.000000e+01 %78 = fmul float %74, -1.000000e+01 %79 = call float @llvm.AMDIL.clamp.(float %77, float 0.000000e+00, float 1.000000e+00) %80 = call float @llvm.AMDIL.clamp.(float %78, float 0.000000e+00, float 1.000000e+00) %81 = fmul float %79, -2.000000e+00 %82 = fadd float %81, 3.000000e+00 %83 = fmul float %80, -2.000000e+00 %84 = fadd float %83, 3.000000e+00 %85 = fmul float %79, %79 %86 = fmul float %80, %80 %87 = fmul float %85, %82 %88 = fmul float %86, %84 %89 = fmul float %88, %87 %90 = fmul float %25, 6.000000e+00 %91 = fadd float %90, 1.000000e+00 %92 = fmul float %66, %91 %93 = fadd float %92, %89 %94 = fadd float %93, 0xBFEFAE1480000000 %95 = fmul float %94, 1.000000e+02 %96 = call float @llvm.AMDIL.clamp.(float %95, float 0.000000e+00, float 1.000000e+00) %97 = fmul float %96, -2.000000e+00 %98 = fadd float %97, 3.000000e+00 %99 = fmul float %96, %96 %100 = fmul float %98, %99 %101 = fadd float %100, 1.000000e+00 %102 = bitcast float %46 to i32 %103 = bitcast float %47 to i32 %104 = insertelement <2 x i32> undef, i32 %102, i32 0 %105 = insertelement <2 x i32> %104, i32 %103, i32 1 %106 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %105, <8 x i32> %39, <4 x i32> %41, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = extractelement <4 x float> %106, i32 2 %110 = fsub float %101, %107 %111 = call float @llvm.AMDIL.clamp.(float %110, float 0.000000e+00, float 1.000000e+00) %112 = call float @llvm.maxnum.f32(float %108, float %109) %113 = call float @llvm.maxnum.f32(float %112, float %111) %114 = bitcast float %46 to i32 %115 = bitcast float %47 to i32 %116 = insertelement <2 x i32> undef, i32 %114, i32 0 %117 = insertelement <2 x i32> %116, i32 %115, i32 1 %118 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %117, <8 x i32> %35, <4 x i32> %37, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %119 = extractelement <4 x float> %118, i32 0 %120 = extractelement <4 x float> %118, i32 1 %121 = extractelement <4 x float> %118, i32 2 %122 = fsub float %119, %76 %123 = fmul float %113, %122 %124 = fadd float %123, %76 %125 = fadd float %120, -1.000000e+00 %126 = fadd float %121, 0.000000e+00 %127 = fmul float %121, 0.000000e+00 %128 = fadd float %127, 1.000000e+00 %129 = fmul float %111, %125 %130 = fadd float %129, 1.000000e+00 %131 = call i32 @llvm.SI.packf16(float %124, float %130) %132 = bitcast i32 %131 to float %133 = call i32 @llvm.SI.packf16(float %126, float %128) %134 = bitcast i32 %133 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %132, float %134, float %132, float %134) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A v_mov_b32_e32 v2, 0x41200000 ; 7E0402FF 41200000 v_mov_b32_e32 v3, 0x40400000 ; 7E0602FF 40400000 v_mov_b32_e32 v4, 0x40c00000 ; 7E0802FF 40C00000 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 v_interp_p1_f32 v6, v0, 1, 0, [m0] ; C8180100 v_interp_p2_f32 v6, [v6], v1, 1, 0, [m0] ; C8190101 v_interp_p1_f32 v7, v0, 2, 0, [m0] ; C81C0200 v_interp_p2_f32 v7, [v7], v1, 2, 0, [m0] ; C81D0201 v_interp_p1_f32 v8, v0, 3, 0, [m0] ; C8200300 v_interp_p2_f32 v8, [v8], v1, 3, 0, [m0] ; C8210301 v_interp_p1_f32 v9, v0, 0, 1, [m0] ; C8240400 v_interp_p2_f32 v9, [v9], v1, 0, 1, [m0] ; C8250401 v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500 v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501 s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508 s_load_dwordx4 s[16:19], s[4:5], 0xc ; C088050C s_load_dwordx4 s[40:43], s[4:5], 0x10 ; C0940510 s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700 s_load_dwordx8 s[52:59], s[6:7], 0x8 ; C0DA0708 s_load_dwordx8 s[8:15], s[6:7], 0x10 ; C0C40710 s_load_dwordx8 s[20:27], s[6:7], 0x18 ; C0CA0718 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[28:31], 0xf ; C2021D0F s_load_dwordx8 s[60:67], s[6:7], 0x20 ; C0DE0720 image_sample v[0:1], 9, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[44:51], s[32:35] ; F0800900 010B0005 image_sample v9, 2, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[52:59], s[36:39] ; F0800200 012D0909 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v1, v0, v9 ; 3E021300 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, 1.0 ; D2820000 03C80904 image_sample v4, 8, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[60:67], s[40:43] ; F0800800 014F0407 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v7, -0.5, v4 ; 060E08F1 v_add_f32_e32 v8, v4, v4 ; 06100904 v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_mul_f32_e32 v7, v2, v7 ; 100E0F02 v_madmk_f32_e32 v2, v4, v2, 0xc1200000 ; 40040504 C1200000 v_add_f32_e64 v4, 0, v7 clamp ; D2060804 00020E80 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mad_f32 v7, -2.0, v4, v3 ; D2820007 040E08F5 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mad_f32 v7, -2.0, v2, v3 ; D2820007 040E04F5 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_madak_f32_e32 v2, v2, v4, 0xbf7d70a4 ; 42040902 BF7D70A4 v_mac_f32_e32 v2, v0, v1 ; 3E040300 v_mul_f32_e32 v0, 0x42c80000, v2 ; 100004FF 42C80000 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mac_f32_e32 v3, -2.0, v0 ; 3E0600F5 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mad_f32 v0, v3, v0, 1.0 ; D2820000 03CA0103 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[20:27], s[16:19] ; F0800700 00850105 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v0, v1, v0 ; 0A000101 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_max3_f32 v1, v2, v3, v0 ; D2A80001 04020702 image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[0:3] ; F0800700 00020205 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v2, v8, v2 ; 0A040508 v_mac_f32_e32 v8, v2, v1 ; 3E100302 v_add_f32_e32 v1, -1.0, v3 ; 060206F3 v_add_f32_e32 v2, 0, v4 ; 06040880 v_mad_f32 v3, 0, v4, 1.0 ; D2820003 03CA0880 v_mad_f32 v0, v0, v1, 1.0 ; D2820000 03CA0300 v_cvt_pkrtz_f16_f32_e32 v0, v8, v0 ; 5E000108 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 12 Code Size: 396 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL SVIEW[6], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { -0.5000, -1.0000, 10.0000, -10.0000} IMM[1] FLT32 { -2.0000, 3.0000, 1.0000, 6.0000} IMM[2] FLT32 { -0.5800, 100.0000, 0.0500, 0.7500} IMM[3] FLT32 { 0.2500, 0.3000, 0.5900, 0.1100} IMM[4] FLT32 { -0.0800, 14.2857, 0.0050, 2.0000} 0: ADD TEMP[0].xyz, -CONST[0].xyzz, CONST[1].xyzz 1: MOV TEMP[1].xy, IN[0].zwww 2: TEX TEMP[1], TEMP[1], SAMP[6], 2D 3: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[0].xyzz 4: LRP TEMP[2].xyz, TEMP[1].yyyy, CONST[2].xyzz, TEMP[0].xyzz 5: MOV TEMP[0].x, CONST[0].wwww 6: MOV TEMP[0].y, CONST[1].wwww 7: MOV TEMP[0].z, CONST[2].wwww 8: LRP TEMP[3].xyz, TEMP[1].zzzz, TEMP[0].xyzz, TEMP[2].xyzz 9: ADD TEMP[1].xy, TEMP[1].wwww, IMM[0].xyyy 10: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].zwww 11: MOV_SAT TEMP[1].xy, TEMP[1].xyyy 12: MOV TEMP[4].xy, IN[0].xyyy 13: TEX TEMP[4].xyz, TEMP[4], SAMP[4], 2D 14: LRP TEMP[5].xyz, TEMP[4].yyyy, CONST[2].xyzz, TEMP[3].xyzz 15: LRP TEMP[3].xyz, TEMP[4].zzzz, TEMP[0].xyzz, TEMP[5].xyzz 16: MAD TEMP[0].xy, TEMP[1].xyyy, IMM[1].xxxx, IMM[1].yyyy 17: MUL TEMP[1].xy, TEMP[1].xyyy, TEMP[1].xyyy 18: MUL TEMP[0].xy, TEMP[1].xyyy, TEMP[0].xyyy 19: MUL TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 20: MAD TEMP[1].x, CONST[3].wwww, IMM[1].wwww, IMM[1].zzzz 21: MOV TEMP[6].xy, IN[0].xyyy 22: TEX TEMP[6], TEMP[6], SAMP[0], 2D 23: MOV TEMP[7].xy, IN[1].xyyy 24: TEX TEMP[7].y, TEMP[7], SAMP[1], 2D 25: MAD TEMP[7].x, TEMP[7].yyyy, TEMP[6].xxxx, TEMP[6].wwww 26: MAD TEMP[0].x, TEMP[7].xxxx, TEMP[1].xxxx, TEMP[0].xxxx 27: ADD TEMP[0].x, TEMP[0].xxxx, IMM[2].xxxx 28: MUL TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz 29: MOV_SAT TEMP[1].x, TEMP[0].xxxx 30: MAD TEMP[7].x, TEMP[1].xxxx, IMM[1].xxxx, IMM[1].yyyy 31: MUL TEMP[0].x, TEMP[1].xxxx, TEMP[1].xxxx 32: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[7].xxxx 33: MAD TEMP[0].x, TEMP[7].xxxx, TEMP[0].xxxx, IMM[1].zzzz 34: ADD TEMP[0].x, -TEMP[4].xxxx, TEMP[0].xxxx 35: MOV_SAT TEMP[0].x, TEMP[0].xxxx 36: MUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy 37: MOV_SAT TEMP[1].x, TEMP[1].xxxx 38: MAD TEMP[4].x, TEMP[1].xxxx, IMM[1].xxxx, IMM[1].yyyy 39: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx 40: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx 41: LRP TEMP[2].xyz, TEMP[1].xxxx, IMM[2].zzzz, TEMP[3].xyzz 42: MUL TEMP[3].x, IMM[2].wwww, CONST[3].wwww 43: ADD TEMP[4].x, -TEMP[6].xxxx, IMM[1].zzzz 44: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx 45: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx 46: MAD TEMP[3].x, TEMP[4].xxxx, IMM[3].xxxx, TEMP[3].xxxx 47: MOV TEMP[4].xy, IN[1].zwww 48: TEX TEMP[4], TEMP[4], SAMP[5], 2D 49: ADD TEMP[4], TEMP[4], IMM[0].yyyy 50: MAD TEMP[5], TEMP[3].xxxx, TEMP[4], IMM[1].zzzz 51: LRP TEMP[3].xyz, TEMP[1].xxxx, IMM[1].zzzz, TEMP[5].xyzz 52: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[2].xyzz 53: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[3].yzww 54: ADD TEMP[4].x, TEMP[4].xxxx, IMM[4].xxxx 55: MUL TEMP[4].x, TEMP[4].xxxx, IMM[4].yyyy 56: MOV_SAT TEMP[4].x, TEMP[4].xxxx 57: MAD TEMP[7].x, TEMP[4].xxxx, IMM[1].xxxx, IMM[1].yyyy 58: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx 59: MAD TEMP[4].x, TEMP[7].xxxx, -TEMP[4].xxxx, IMM[1].zzzz 60: MAD TEMP[2].xyz, TEMP[4].xxxx, IMM[4].zzzz, TEMP[3].xyzz 61: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].zzzz 62: MAD TEMP[3].xyz, TEMP[2].xyzz, IMM[4].wwww, TEMP[3].xyzz 63: MOV_SAT TEMP[3].xyz, TEMP[3].xyzz 64: MUL TEMP[2].xyz, TEMP[6].yyyy, TEMP[3].xyzz 65: MOV TEMP[4].xy, IN[0].xyyy 66: TEX TEMP[4], TEMP[4], SAMP[3], 2D 67: MAD TEMP[3].xyz, TEMP[3].xyzz, -TEMP[6].yyyy, TEMP[4].xyzz 68: MUL TEMP[6].x, TEMP[6].yyyy, CONST[3].zzzz 69: MUL TEMP[5].x, TEMP[5].wwww, TEMP[6].xxxx 70: MAD TEMP[3].xyz, TEMP[0].xxxx, TEMP[3].xyzz, TEMP[2].xyzz 71: LRP TEMP[2].x, TEMP[1].xxxx, IMM[1].zzzz, CONST[3].zzzz 72: MUL TEMP[1].x, TEMP[5].xxxx, TEMP[2].xxxx 73: MOV TEMP[2].xy, IN[0].xyyy 74: TEX TEMP[2].y, TEMP[2], SAMP[2], 2D 75: MUL TEMP[2].x, TEMP[0].xxxx, TEMP[2].yyyy 76: ADD TEMP[5].x, IMM[0].yyyy, CONST[3].xxxx 77: MAD TEMP[2].x, TEMP[2].xxxx, TEMP[5].xxxx, IMM[1].zzzz 78: MAD TEMP[2].x, TEMP[4].wwww, TEMP[2].xxxx, -TEMP[1].xxxx 79: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx, TEMP[1].xxxx 80: MOV TEMP[3].w, TEMP[0].xxxx 81: MOV OUT[0], TEMP[3] 82: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 48) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 56) %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %40 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %41 = load <8 x i32>, <8 x i32> addrspace(2)* %40, align 32, !tbaa !0 %42 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %43 = load <4 x i32>, <4 x i32> addrspace(2)* %42, align 16, !tbaa !0 %44 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %45 = load <8 x i32>, <8 x i32> addrspace(2)* %44, align 32, !tbaa !0 %46 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 %52 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 %54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %55 = load <4 x i32>, <4 x i32> addrspace(2)* %54, align 16, !tbaa !0 %56 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %59 = load <4 x i32>, <4 x i32> addrspace(2)* %58, align 16, !tbaa !0 %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0 %64 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %65 = load <8 x i32>, <8 x i32> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %67 = load <4 x i32>, <4 x i32> addrspace(2)* %66, align 16, !tbaa !0 %68 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %69 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %70 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %71 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %72 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %73 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %74 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %75 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %76 = fsub float %29, %25 %77 = fsub float %30, %26 %78 = fsub float %31, %27 %79 = bitcast float %70 to i32 %80 = bitcast float %71 to i32 %81 = insertelement <2 x i32> undef, i32 %79, i32 0 %82 = insertelement <2 x i32> %81, i32 %80, i32 1 %83 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %82, <8 x i32> %65, <4 x i32> %67, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = fmul float %84, %76 %89 = fadd float %88, %25 %90 = fmul float %84, %77 %91 = fadd float %90, %26 %92 = fmul float %84, %78 %93 = fadd float %92, %27 %94 = fsub float 1.000000e+00, %85 %95 = fmul float %33, %85 %96 = fmul float %89, %94 %97 = fadd float %95, %96 %98 = fsub float 1.000000e+00, %85 %99 = fmul float %34, %85 %100 = fmul float %91, %98 %101 = fadd float %99, %100 %102 = fsub float 1.000000e+00, %85 %103 = fmul float %35, %85 %104 = fmul float %93, %102 %105 = fadd float %103, %104 %106 = fsub float 1.000000e+00, %86 %107 = fmul float %28, %86 %108 = fmul float %97, %106 %109 = fadd float %107, %108 %110 = fsub float 1.000000e+00, %86 %111 = fmul float %32, %86 %112 = fmul float %101, %110 %113 = fadd float %111, %112 %114 = fsub float 1.000000e+00, %86 %115 = fmul float %36, %86 %116 = fmul float %105, %114 %117 = fadd float %115, %116 %118 = fadd float %87, -5.000000e-01 %119 = fadd float %87, -1.000000e+00 %120 = fmul float %118, 1.000000e+01 %121 = fmul float %119, -1.000000e+01 %122 = call float @llvm.AMDIL.clamp.(float %120, float 0.000000e+00, float 1.000000e+00) %123 = call float @llvm.AMDIL.clamp.(float %121, float 0.000000e+00, float 1.000000e+00) %124 = bitcast float %68 to i32 %125 = bitcast float %69 to i32 %126 = insertelement <2 x i32> undef, i32 %124, i32 0 %127 = insertelement <2 x i32> %126, i32 %125, i32 1 %128 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %127, <8 x i32> %57, <4 x i32> %59, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %129 = extractelement <4 x float> %128, i32 0 %130 = extractelement <4 x float> %128, i32 1 %131 = extractelement <4 x float> %128, i32 2 %132 = fsub float 1.000000e+00, %130 %133 = fmul float %33, %130 %134 = fmul float %109, %132 %135 = fadd float %133, %134 %136 = fsub float 1.000000e+00, %130 %137 = fmul float %34, %130 %138 = fmul float %113, %136 %139 = fadd float %137, %138 %140 = fsub float 1.000000e+00, %130 %141 = fmul float %35, %130 %142 = fmul float %117, %140 %143 = fadd float %141, %142 %144 = fsub float 1.000000e+00, %131 %145 = fmul float %28, %131 %146 = fmul float %135, %144 %147 = fadd float %145, %146 %148 = fsub float 1.000000e+00, %131 %149 = fmul float %32, %131 %150 = fmul float %139, %148 %151 = fadd float %149, %150 %152 = fsub float 1.000000e+00, %131 %153 = fmul float %36, %131 %154 = fmul float %143, %152 %155 = fadd float %153, %154 %156 = fmul float %122, -2.000000e+00 %157 = fadd float %156, 3.000000e+00 %158 = fmul float %123, -2.000000e+00 %159 = fadd float %158, 3.000000e+00 %160 = fmul float %122, %122 %161 = fmul float %123, %123 %162 = fmul float %160, %157 %163 = fmul float %161, %159 %164 = fmul float %163, %162 %165 = fmul float %39, 6.000000e+00 %166 = fadd float %165, 1.000000e+00 %167 = bitcast float %68 to i32 %168 = bitcast float %69 to i32 %169 = insertelement <2 x i32> undef, i32 %167, i32 0 %170 = insertelement <2 x i32> %169, i32 %168, i32 1 %171 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %170, <8 x i32> %41, <4 x i32> %43, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %172 = extractelement <4 x float> %171, i32 0 %173 = extractelement <4 x float> %171, i32 1 %174 = extractelement <4 x float> %171, i32 2 %175 = extractelement <4 x float> %171, i32 3 %176 = bitcast float %72 to i32 %177 = bitcast float %73 to i32 %178 = insertelement <2 x i32> undef, i32 %176, i32 0 %179 = insertelement <2 x i32> %178, i32 %177, i32 1 %180 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %179, <8 x i32> %45, <4 x i32> %47, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %181 = extractelement <4 x float> %180, i32 1 %182 = fmul float %181, %172 %183 = fadd float %182, %175 %184 = fmul float %183, %166 %185 = fadd float %184, %164 %186 = fadd float %185, 0xBFE28F5C20000000 %187 = fmul float %186, 1.000000e+01 %188 = call float @llvm.AMDIL.clamp.(float %187, float 0.000000e+00, float 1.000000e+00) %189 = fmul float %188, -2.000000e+00 %190 = fadd float %189, 3.000000e+00 %191 = fmul float %188, %188 %192 = fmul float %191, %190 %193 = fmul float %190, %191 %194 = fadd float %193, 1.000000e+00 %195 = fsub float %194, %129 %196 = call float @llvm.AMDIL.clamp.(float %195, float 0.000000e+00, float 1.000000e+00) %197 = fmul float %192, 1.000000e+02 %198 = call float @llvm.AMDIL.clamp.(float %197, float 0.000000e+00, float 1.000000e+00) %199 = fmul float %198, -2.000000e+00 %200 = fadd float %199, 3.000000e+00 %201 = fmul float %198, %198 %202 = fmul float %201, %200 %203 = fsub float 1.000000e+00, %202 %204 = fmul float %202, 0x3FA99999A0000000 %205 = fmul float %147, %203 %206 = fadd float %204, %205 %207 = fsub float 1.000000e+00, %202 %208 = fmul float %202, 0x3FA99999A0000000 %209 = fmul float %151, %207 %210 = fadd float %208, %209 %211 = fsub float 1.000000e+00, %202 %212 = fmul float %202, 0x3FA99999A0000000 %213 = fmul float %155, %211 %214 = fadd float %212, %213 %215 = fmul float %39, 7.500000e-01 %216 = fsub float 1.000000e+00, %172 %217 = fmul float %216, %216 %218 = fmul float %217, %217 %219 = fmul float %218, 2.500000e-01 %220 = fadd float %219, %215 %221 = bitcast float %74 to i32 %222 = bitcast float %75 to i32 %223 = insertelement <2 x i32> undef, i32 %221, i32 0 %224 = insertelement <2 x i32> %223, i32 %222, i32 1 %225 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %224, <8 x i32> %61, <4 x i32> %63, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %226 = extractelement <4 x float> %225, i32 0 %227 = extractelement <4 x float> %225, i32 1 %228 = extractelement <4 x float> %225, i32 2 %229 = extractelement <4 x float> %225, i32 3 %230 = fadd float %226, -1.000000e+00 %231 = fadd float %227, -1.000000e+00 %232 = fadd float %228, -1.000000e+00 %233 = fadd float %229, -1.000000e+00 %234 = fmul float %220, %230 %235 = fadd float %234, 1.000000e+00 %236 = fmul float %220, %231 %237 = fadd float %236, 1.000000e+00 %238 = fmul float %220, %232 %239 = fadd float %238, 1.000000e+00 %240 = fmul float %220, %233 %241 = fadd float %240, 1.000000e+00 %242 = fsub float 1.000000e+00, %202 %243 = fmul float %235, %242 %244 = fadd float %202, %243 %245 = fsub float 1.000000e+00, %202 %246 = fmul float %237, %245 %247 = fadd float %202, %246 %248 = fsub float 1.000000e+00, %202 %249 = fmul float %239, %248 %250 = fadd float %202, %249 %251 = fmul float %244, %206 %252 = fmul float %247, %210 %253 = fmul float %250, %214 %254 = fmul float %251, 0x3FD3333340000000 %255 = fmul float %252, 0x3FE2E147A0000000 %256 = fadd float %255, %254 %257 = fmul float %253, 0x3FBC28F5C0000000 %258 = fadd float %256, %257 %259 = fadd float %258, 0xBFB47AE140000000 %260 = fmul float %259, 0x402C924920000000 %261 = call float @llvm.AMDIL.clamp.(float %260, float 0.000000e+00, float 1.000000e+00) %262 = fmul float %261, -2.000000e+00 %263 = fadd float %262, 3.000000e+00 %264 = fmul float %261, %261 %265 = fmul float %264, %263 %266 = fsub float 1.000000e+00, %265 %267 = fmul float %266, 0x3F747AE140000000 %268 = fadd float %267, %251 %269 = fmul float %266, 0x3F747AE140000000 %270 = fadd float %269, %252 %271 = fmul float %266, 0x3F747AE140000000 %272 = fadd float %271, %253 %273 = fmul float %268, %174 %274 = fmul float %270, %174 %275 = fmul float %272, %174 %276 = fmul float %273, 2.000000e+00 %277 = fadd float %276, %251 %278 = fmul float %274, 2.000000e+00 %279 = fadd float %278, %252 %280 = fmul float %275, 2.000000e+00 %281 = fadd float %280, %253 %282 = call float @llvm.AMDIL.clamp.(float %277, float 0.000000e+00, float 1.000000e+00) %283 = call float @llvm.AMDIL.clamp.(float %279, float 0.000000e+00, float 1.000000e+00) %284 = call float @llvm.AMDIL.clamp.(float %281, float 0.000000e+00, float 1.000000e+00) %285 = fmul float %173, %282 %286 = fmul float %173, %283 %287 = fmul float %173, %284 %288 = bitcast float %68 to i32 %289 = bitcast float %69 to i32 %290 = insertelement <2 x i32> undef, i32 %288, i32 0 %291 = insertelement <2 x i32> %290, i32 %289, i32 1 %292 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %291, <8 x i32> %53, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %293 = extractelement <4 x float> %292, i32 0 %294 = extractelement <4 x float> %292, i32 1 %295 = extractelement <4 x float> %292, i32 2 %296 = extractelement <4 x float> %292, i32 3 %297 = fmul float %173, %282 %298 = fsub float %293, %297 %299 = fmul float %173, %283 %300 = fsub float %294, %299 %301 = fmul float %173, %284 %302 = fsub float %295, %301 %303 = fmul float %173, %38 %304 = fmul float %241, %303 %305 = fmul float %196, %298 %306 = fadd float %305, %285 %307 = fmul float %196, %300 %308 = fadd float %307, %286 %309 = fmul float %196, %302 %310 = fadd float %309, %287 %311 = fsub float 1.000000e+00, %202 %312 = fmul float %38, %311 %313 = fadd float %202, %312 %314 = fmul float %304, %313 %315 = bitcast float %68 to i32 %316 = bitcast float %69 to i32 %317 = insertelement <2 x i32> undef, i32 %315, i32 0 %318 = insertelement <2 x i32> %317, i32 %316, i32 1 %319 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %318, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %320 = extractelement <4 x float> %319, i32 1 %321 = fmul float %196, %320 %322 = fadd float %37, -1.000000e+00 %323 = fmul float %321, %322 %324 = fadd float %323, 1.000000e+00 %325 = fmul float %296, %324 %326 = fsub float %325, %314 %327 = fmul float %196, %326 %328 = fadd float %327, %314 %329 = call i32 @llvm.SI.packf16(float %306, float %308) %330 = bitcast i32 %329 to float %331 = call i32 @llvm.SI.packf16(float %310, float %328) %332 = bitcast i32 %331 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %330, float %332, float %330, float %332) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A v_mov_b32_e32 v2, 0x41200000 ; 7E0402FF 41200000 v_mov_b32_e32 v3, 0x40400000 ; 7E0602FF 40400000 v_mov_b32_e32 v4, 0x40c00000 ; 7E0802FF 40C00000 v_mov_b32_e32 v5, 0x3f400000 ; 7E0A02FF 3F400000 v_mov_b32_e32 v6, 0xbda3d70a ; 7E0C02FF BDA3D70A v_mov_b32_e32 v7, 0x3ba3d70a ; 7E0E02FF 3BA3D70A v_interp_p1_f32 v8, v0, 0, 0, [m0] ; C8200000 v_interp_p2_f32 v8, [v8], v1, 0, 0, [m0] ; C8210001 v_interp_p1_f32 v9, v0, 1, 0, [m0] ; C8240100 v_interp_p2_f32 v9, [v9], v1, 1, 0, [m0] ; C8250101 v_interp_p1_f32 v10, v0, 2, 0, [m0] ; C8280200 v_interp_p2_f32 v10, [v10], v1, 2, 0, [m0] ; C8290201 v_interp_p1_f32 v11, v0, 3, 0, [m0] ; C82C0300 v_interp_p2_f32 v11, [v11], v1, 3, 0, [m0] ; C82D0301 v_interp_p1_f32 v12, v0, 0, 1, [m0] ; C8300400 v_interp_p2_f32 v12, [v12], v1, 0, 1, [m0] ; C8310401 v_interp_p1_f32 v13, v0, 1, 1, [m0] ; C8340500 v_interp_p2_f32 v13, [v13], v1, 1, 1, [m0] ; C8350501 v_interp_p1_f32 v14, v0, 2, 1, [m0] ; C8380600 v_interp_p2_f32 v14, [v14], v1, 2, 1, [m0] ; C8390601 v_interp_p1_f32 v15, v0, 3, 1, [m0] ; C83C0700 v_interp_p2_f32 v15, [v15], v1, 3, 1, [m0] ; C83D0701 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 s_load_dwordx4 s[40:43], s[4:5], 0x0 ; C0940500 s_load_dwordx4 s[44:47], s[4:5], 0x4 ; C0960504 s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508 s_load_dwordx4 s[16:19], s[4:5], 0xc ; C088050C s_load_dwordx4 s[48:51], s[4:5], 0x10 ; C0980510 s_load_dwordx4 s[28:31], s[4:5], 0x14 ; C08E0514 s_load_dwordx4 s[68:71], s[4:5], 0x18 ; C0A20518 s_load_dwordx8 s[60:67], s[6:7], 0x0 ; C0DE0700 s_load_dwordx8 s[52:59], s[6:7], 0x8 ; C0DA0708 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s5, s[8:11], 0x0 ; C2028900 s_buffer_load_dword s72, s[8:11], 0x1 ; C2240901 s_buffer_load_dword s73, s[8:11], 0x2 ; C2248902 s_buffer_load_dword s74, s[8:11], 0x3 ; C2250903 s_buffer_load_dword s12, s[8:11], 0x4 ; C2060904 s_buffer_load_dword s13, s[8:11], 0x5 ; C2068905 s_buffer_load_dword s14, s[8:11], 0x6 ; C2070906 s_buffer_load_dword s75, s[8:11], 0x7 ; C2258907 s_buffer_load_dword s76, s[8:11], 0x8 ; C2260908 s_buffer_load_dword s77, s[8:11], 0x9 ; C2268909 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 s_buffer_load_dword s78, s[8:11], 0xa ; C227090A v_mov_b32_e32 v1, s72 ; 7E020248 s_buffer_load_dword s79, s[8:11], 0xb ; C227890B v_mov_b32_e32 v16, s73 ; 7E200249 s_buffer_load_dword s80, s[8:11], 0xc ; C228090C s_buffer_load_dword s4, s[8:11], 0xe ; C202090E s_buffer_load_dword s81, s[8:11], 0xf ; C228890F v_sub_f32_e32 v0, s12, v0 ; 0800000C v_sub_f32_e32 v1, s13, v1 ; 0802020D v_sub_f32_e32 v16, s14, v16 ; 0820200E s_load_dwordx8 s[8:15], s[6:7], 0x10 ; C0C40710 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v29, s8, 0 ; 043B0008 v_writelane_b32 v29, s9, 1 ; 043B0209 v_writelane_b32 v29, s10, 2 ; 043B040A v_writelane_b32 v29, s11, 3 ; 043B060B v_writelane_b32 v29, s12, 4 ; 043B080C v_writelane_b32 v29, s13, 5 ; 043B0A0D v_writelane_b32 v29, s14, 6 ; 043B0C0E v_writelane_b32 v29, s15, 7 ; 043B0E0F s_load_dwordx8 s[20:27], s[6:7], 0x18 ; C0CA0718 s_load_dwordx8 s[84:91], s[6:7], 0x20 ; C0EA0720 s_load_dwordx8 s[32:39], s[6:7], 0x28 ; C0D00728 v_add_f32_e64 v17, -1.0, s80 ; D2060011 0000A0F3 s_load_dwordx8 s[8:15], s[6:7], 0x30 ; C0C40730 v_mad_f32 v4, v4, s81, 1.0 ; D2820004 03C8A304 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[8:15], s[68:71] ; F0800F00 0222120A image_sample v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[84:91], s[48:51] ; F0800700 01951608 image_sample v[25:28], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[60:67], s[40:43] ; F0800F00 014F1908 image_sample v10, 2, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[52:59], s[44:47] ; F0800200 016D0A0C s_waitcnt vmcnt(3) ; BF8C0773 v_mad_f32 v0, v18, v0, s5 ; D2820000 00160112 v_mad_f32 v1, v18, v1, s72 ; D2820001 01220312 v_mad_f32 v11, v18, v16, s73 ; D282000B 01262112 v_mad_f32 v0, -v19, v0, v0 ; D2820000 24020113 v_mac_f32_e32 v0, s76, v19 ; 3E00264C v_mad_f32 v0, -v20, v0, v0 ; D2820000 24020114 v_mac_f32_e32 v0, s74, v20 ; 3E00284A s_waitcnt vmcnt(2) ; BF8C0772 v_mad_f32 v0, -v23, v0, v0 ; D2820000 24020117 v_mac_f32_e32 v0, s76, v23 ; 3E002E4C v_mad_f32 v1, -v19, v1, v1 ; D2820001 24060313 v_mac_f32_e32 v1, s77, v19 ; 3E02264D v_mad_f32 v1, -v20, v1, v1 ; D2820001 24060314 v_mac_f32_e32 v1, s75, v20 ; 3E02284B v_mad_f32 v1, -v23, v1, v1 ; D2820001 24060317 v_mac_f32_e32 v1, s77, v23 ; 3E022E4D v_mad_f32 v11, -v19, v11, v11 ; D282000B 242E1713 v_mac_f32_e32 v11, s78, v19 ; 3E16264E v_mad_f32 v11, -v20, v11, v11 ; D282000B 242E1714 v_mac_f32_e32 v11, s79, v20 ; 3E16284F v_mad_f32 v11, -v23, v11, v11 ; D282000B 242E1717 v_mac_f32_e32 v11, s78, v23 ; 3E162E4E v_mad_f32 v0, -v24, v0, v0 ; D2820000 24020118 v_mac_f32_e32 v0, s74, v24 ; 3E00304A v_mad_f32 v1, -v24, v1, v1 ; D2820001 24060318 v_mac_f32_e32 v1, s75, v24 ; 3E02304B v_mad_f32 v11, -v24, v11, v11 ; D282000B 242E1718 v_mac_f32_e32 v11, s79, v24 ; 3E16304F v_mul_f32_e32 v5, s81, v5 ; 100A0A51 v_add_f32_e32 v12, -0.5, v21 ; 06182AF1 v_madmk_f32_e32 v13, v21, v2, 0xc1200000 ; 401A0515 C1200000 v_mul_f32_e32 v12, v2, v12 ; 10181902 v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_mad_f32 v16, -2.0, v12, v3 ; D2820010 040E18F5 v_mul_f32_e32 v12, v12, v12 ; 1018190C v_mul_f32_e32 v12, v16, v12 ; 10181910 v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80 v_mad_f32 v16, -2.0, v13, v3 ; D2820010 040E1AF5 v_mul_f32_e32 v13, v13, v13 ; 101A1B0D v_mul_f32_e32 v13, v16, v13 ; 101A1B10 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v28, v25, v10 ; 3E381519 v_madak_f32_e32 v10, v13, v12, 0xbf147ae1 ; 4214190D BF147AE1 v_mac_f32_e32 v10, v4, v28 ; 3E143904 v_mul_f32_e32 v2, v2, v10 ; 10041502 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mad_f32 v4, -2.0, v2, v3 ; D2820004 040E04F5 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mad_f32 v10, v2, v4, 1.0 ; D282000A 03CA0902 v_subrev_f32_e32 v10, v22, v10 ; 0A141516 v_sub_f32_e32 v12, 1.0, v25 ; 081832F2 v_mad_f32 v12, -v25, v12, v12 ; D282000C 24321919 v_mul_f32_e32 v12, v12, v12 ; 1018190C v_madmk_f32_e32 v5, v12, v5, 0x3e800000 ; 400A0B0C 3E800000 image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[32:39], s[28:31] ; F0800F00 00E80C0E s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v12, -1.0, v12 ; 061818F3 v_add_f32_e32 v13, -1.0, v13 ; 061A1AF3 v_add_f32_e32 v14, -1.0, v14 ; 061C1CF3 v_mad_f32 v15, v15, v5, -v5 ; D282000F 84160B0F v_mul_f32_e32 v2, v4, v2 ; 10040504 v_add_f32_e64 v4, 0, v10 clamp ; D2060804 00021480 v_mul_f32_e32 v2, 0x42c80000, v2 ; 100404FF 42C80000 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mad_f32 v10, -2.0, v2, v3 ; D282000A 040E04F5 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mul_f32_e32 v16, v10, v2 ; 1020050A v_mad_f32 v2, -v2, v10, 1.0 ; D2820002 23CA1502 v_mul_f32_e32 v10, 0x3d4ccccd, v16 ; 101420FF 3D4CCCCD v_mad_f32 v0, v2, v0, v10 ; D2820000 042A0102 v_mad_f32 v1, v2, v1, v10 ; D2820001 042A0302 v_mad_f32 v10, v2, v11, v10 ; D282000A 042A1702 v_mad_f32 v11, v5, v12, 1.0 ; D282000B 03CA1905 v_mad_f32 v12, v5, v13, 1.0 ; D282000C 03CA1B05 v_mad_f32 v5, v5, v14, 1.0 ; D2820005 03CA1D05 v_mad_f32 v11, v2, v11, v16 ; D282000B 04421702 v_mad_f32 v12, v2, v12, v16 ; D282000C 04421902 v_mad_f32 v5, v2, v5, v16 ; D2820005 04420B02 v_mul_f32_e32 v13, v0, v11 ; 101A1700 v_mul_f32_e32 v14, v1, v12 ; 101C1901 v_mul_f32_e32 v13, 0x3e99999a, v13 ; 101A1AFF 3E99999A v_madmk_f32_e32 v13, v14, v13, 0x3f170a3d ; 401A1B0E 3F170A3D v_mul_f32_e32 v14, v10, v5 ; 101C0B0A v_madmk_f32_e32 v13, v14, v13, 0x3de147ae ; 401A1B0E 3DE147AE v_add_f32_e32 v6, v13, v6 ; 060C0D0D v_mul_f32_e32 v6, 0x41649249, v6 ; 100C0CFF 41649249 v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 v_mac_f32_e32 v3, -2.0, v6 ; 3E060CF5 v_mul_f32_e32 v6, v6, v6 ; 100C0D06 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mad_f32 v3, -v3, v7, v7 ; D2820003 241E0F03 v_mad_f32 v6, v0, v11, v3 ; D2820006 040E1700 v_mad_f32 v7, v1, v12, v3 ; D2820007 040E1901 v_mad_f32 v3, v10, v5, v3 ; D2820003 040E0B0A v_mul_f32_e32 v13, v27, v6 ; 101A0D1B v_mul_f32_e32 v14, v27, v7 ; 101C0F1B v_mac_f32_e32 v13, v27, v6 ; 3E1A0D1B v_mul_f32_e32 v6, v27, v3 ; 100C071B v_mac_f32_e32 v13, v0, v11 ; 3E1A1700 v_mac_f32_e32 v14, v27, v7 ; 3E1C0F1B v_mac_f32_e32 v14, v1, v12 ; 3E1C1901 v_mac_f32_e32 v6, v27, v3 ; 3E0C071B v_mac_f32_e32 v6, v10, v5 ; 3E0C0B0A v_add_f32_e64 v0, 0, v13 clamp ; D2060800 00021A80 v_add_f32_e64 v1, 0, v14 clamp ; D2060801 00021C80 v_add_f32_e64 v3, 0, v6 clamp ; D2060803 00020C80 v_mul_f32_e32 v5, v0, v26 ; 100A3500 v_mul_f32_e32 v6, v1, v26 ; 100C3501 v_mul_f32_e32 v7, v3, v26 ; 100E3503 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[20:27], s[16:19] ; F0800F00 00850A08 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, -v26, v0, v10 ; D2820000 242A011A v_mad_f32 v1, -v26, v1, v11 ; D2820001 242E031A v_mad_f32 v3, -v26, v3, v12 ; D2820003 2432071A v_mul_f32_e32 v10, s4, v26 ; 10143404 v_mac_f32_e32 v10, v10, v15 ; 3E141F0A v_mac_f32_e32 v5, v0, v4 ; 3E0A0900 v_mac_f32_e32 v6, v1, v4 ; 3E0C0901 v_mac_f32_e32 v7, v3, v4 ; 3E0E0903 v_mac_f32_e32 v16, s4, v2 ; 3E200404 v_mul_f32_e32 v0, v16, v10 ; 10001510 v_readlane_b32 s4, v29, 0 ; 0209011D v_readlane_b32 s5, v29, 1 ; 020B031D v_readlane_b32 s6, v29, 2 ; 020D051D v_readlane_b32 s7, v29, 3 ; 020F071D v_readlane_b32 s8, v29, 4 ; 0211091D v_readlane_b32 s9, v29, 5 ; 02130B1D v_readlane_b32 s10, v29, 6 ; 02150D1D v_readlane_b32 s11, v29, 7 ; 02170F1D image_sample v1, 2, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[4:11], s[0:3] ; F0800200 00010108 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mad_f32 v1, v1, v17, 1.0 ; D2820001 03CA2301 v_mad_f32 v1, v13, v1, -v0 ; D2820001 8402030D v_mac_f32_e32 v0, v1, v4 ; 3E000901 v_cvt_pkrtz_f16_f32_e32 v1, v5, v6 ; 5E020D05 v_cvt_pkrtz_f16_f32_e32 v0, v7, v0 ; 5E000107 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 96 VGPRS: 32 Code Size: 1172 bytes LDS: 0 blocks Scratch: 3072 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL CONST[0..54] DCL TEMP[0..2], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[0].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[0].xxxx 8: MOV TEMP[0].xw, IN[0].xxxw 9: MOV TEMP[1].xy, IN[1].xyxx 10: MAD TEMP[2].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 11: MOV TEMP[0].z, TEMP[2].xxxx 12: MOV TEMP[0].y, -IN[0].yyyy 13: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 14: MOV OUT[2], TEMP[1] 15: MOV OUT[0], TEMP[0] 16: MOV OUT[1], IN[0] 17: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = add i32 %5, %8 %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %25) %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = extractelement <4 x float> %26, i32 2 %30 = extractelement <4 x float> %26, i32 3 %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = add i32 %5, %8 %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %33) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = fmul float %15, %35 %38 = fmul float %16, %36 %39 = fadd float %38, %37 %40 = fadd float %39, %17 %41 = fmul float %18, %35 %42 = fmul float %19, %36 %43 = fadd float %42, %41 %44 = fadd float %43, %20 %45 = fmul float %29, %14 %46 = fsub float %45, %30 %47 = fmul float %21, %30 %48 = fadd float %47, %27 %49 = fmul float %22, %30 %50 = fsub float %49, %28 %51 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = call float @llvm.SI.load.const(<16 x i8> %52, i32 0) %54 = fmul float %53, %27 %55 = call float @llvm.SI.load.const(<16 x i8> %52, i32 4) %56 = fmul float %55, %28 %57 = fadd float %54, %56 %58 = call float @llvm.SI.load.const(<16 x i8> %52, i32 8) %59 = fmul float %58, %29 %60 = fadd float %57, %59 %61 = call float @llvm.SI.load.const(<16 x i8> %52, i32 12) %62 = fmul float %61, %30 %63 = fadd float %60, %62 %64 = call float @llvm.SI.load.const(<16 x i8> %52, i32 16) %65 = fmul float %64, %27 %66 = call float @llvm.SI.load.const(<16 x i8> %52, i32 20) %67 = fmul float %66, %28 %68 = fadd float %65, %67 %69 = call float @llvm.SI.load.const(<16 x i8> %52, i32 24) %70 = fmul float %69, %29 %71 = fadd float %68, %70 %72 = call float @llvm.SI.load.const(<16 x i8> %52, i32 28) %73 = fmul float %72, %30 %74 = fadd float %71, %73 %75 = call float @llvm.SI.load.const(<16 x i8> %52, i32 32) %76 = fmul float %75, %27 %77 = call float @llvm.SI.load.const(<16 x i8> %52, i32 36) %78 = fmul float %77, %28 %79 = fadd float %76, %78 %80 = call float @llvm.SI.load.const(<16 x i8> %52, i32 40) %81 = fmul float %80, %29 %82 = fadd float %79, %81 %83 = call float @llvm.SI.load.const(<16 x i8> %52, i32 44) %84 = fmul float %83, %30 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %52, i32 48) %87 = fmul float %86, %27 %88 = call float @llvm.SI.load.const(<16 x i8> %52, i32 52) %89 = fmul float %88, %28 %90 = fadd float %87, %89 %91 = call float @llvm.SI.load.const(<16 x i8> %52, i32 56) %92 = fmul float %91, %29 %93 = fadd float %90, %92 %94 = call float @llvm.SI.load.const(<16 x i8> %52, i32 60) %95 = fmul float %94, %30 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %52, i32 64) %98 = fmul float %97, %27 %99 = call float @llvm.SI.load.const(<16 x i8> %52, i32 68) %100 = fmul float %99, %28 %101 = fadd float %98, %100 %102 = call float @llvm.SI.load.const(<16 x i8> %52, i32 72) %103 = fmul float %102, %29 %104 = fadd float %101, %103 %105 = call float @llvm.SI.load.const(<16 x i8> %52, i32 76) %106 = fmul float %105, %30 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %52, i32 80) %109 = fmul float %108, %27 %110 = call float @llvm.SI.load.const(<16 x i8> %52, i32 84) %111 = fmul float %110, %28 %112 = fadd float %109, %111 %113 = call float @llvm.SI.load.const(<16 x i8> %52, i32 88) %114 = fmul float %113, %29 %115 = fadd float %112, %114 %116 = call float @llvm.SI.load.const(<16 x i8> %52, i32 92) %117 = fmul float %116, %30 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %52, i32 96) %120 = fmul float %119, %27 %121 = call float @llvm.SI.load.const(<16 x i8> %52, i32 100) %122 = fmul float %121, %28 %123 = fadd float %120, %122 %124 = call float @llvm.SI.load.const(<16 x i8> %52, i32 104) %125 = fmul float %124, %29 %126 = fadd float %123, %125 %127 = call float @llvm.SI.load.const(<16 x i8> %52, i32 108) %128 = fmul float %127, %30 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %52, i32 112) %131 = fmul float %130, %27 %132 = call float @llvm.SI.load.const(<16 x i8> %52, i32 116) %133 = fmul float %132, %28 %134 = fadd float %131, %133 %135 = call float @llvm.SI.load.const(<16 x i8> %52, i32 120) %136 = fmul float %135, %29 %137 = fadd float %134, %136 %138 = call float @llvm.SI.load.const(<16 x i8> %52, i32 124) %139 = fmul float %138, %30 %140 = fadd float %137, %139 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %35, float %36, float %40, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %48, float %50, float %46, float %30) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %63, float %74, float %85, float %96) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %107, float %118, float %129, float %140) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xd8 ; C20785D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s16, s[0:3], 0x3 ; C2080103 s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 s_buffer_load_dword s18, s[0:3], 0x5 ; C2090105 s_buffer_load_dword s19, s[0:3], 0x6 ; C2098106 s_buffer_load_dword s20, s[0:3], 0x7 ; C20A0107 s_buffer_load_dword s21, s[0:3], 0x8 ; C20A8108 s_buffer_load_dword s22, s[0:3], 0x9 ; C20B0109 s_buffer_load_dword s23, s[0:3], 0xa ; C20B810A s_buffer_load_dword s24, s[0:3], 0xb ; C20C010B s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C s_buffer_load_dword s26, s[0:3], 0xd ; C20D010D s_buffer_load_dword s27, s[0:3], 0xe ; C20D810E s_buffer_load_dword s28, s[0:3], 0xf ; C20E010F s_buffer_load_dword s29, s[0:3], 0x10 ; C20E8110 s_buffer_load_dword s30, s[0:3], 0x11 ; C20F0111 s_buffer_load_dword s31, s[0:3], 0x12 ; C20F8112 s_buffer_load_dword s32, s[0:3], 0x13 ; C2100113 s_buffer_load_dword s33, s[0:3], 0x14 ; C2108114 s_buffer_load_dword s34, s[0:3], 0x15 ; C2110115 s_buffer_load_dword s35, s[0:3], 0x16 ; C2118116 s_buffer_load_dword s36, s[0:3], 0x17 ; C2120117 s_buffer_load_dword s37, s[0:3], 0x18 ; C2128118 s_buffer_load_dword s38, s[0:3], 0x19 ; C2130119 s_buffer_load_dword s39, s[0:3], 0x1a ; C213811A s_buffer_load_dword s40, s[0:3], 0x1b ; C214011B s_buffer_load_dword s41, s[0:3], 0x1c ; C214811C s_buffer_load_dword s42, s[0:3], 0x1d ; C215011D s_buffer_load_dword s43, s[0:3], 0x1e ; C215811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mul_f32_e32 v8, s6, v2 ; 10100406 v_mul_f32_e32 v9, s18, v2 ; 10120412 v_mul_f32_e32 v10, s22, v2 ; 10140416 v_mul_f32_e32 v11, s26, v2 ; 1016041A v_mul_f32_e32 v12, s30, v2 ; 1018041E v_mul_f32_e32 v13, s34, v2 ; 101A0422 v_mul_f32_e32 v14, s38, v2 ; 101C0426 v_mul_f32_e32 v15, s42, v2 ; 101E042A v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s5, v1 ; 3E100205 v_mac_f32_e32 v9, s17, v1 ; 3E120211 v_mac_f32_e32 v10, s21, v1 ; 3E140215 v_mac_f32_e32 v11, s25, v1 ; 3E160219 v_mac_f32_e32 v12, s29, v1 ; 3E18021D v_mac_f32_e32 v13, s33, v1 ; 3E1A0221 v_mac_f32_e32 v14, s37, v1 ; 3E1C0225 v_mac_f32_e32 v15, s41, v1 ; 3E1E0229 v_mac_f32_e32 v8, s7, v3 ; 3E100607 v_mac_f32_e32 v9, s19, v3 ; 3E120613 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s23, v3 ; 3E140617 v_mac_f32_e32 v11, s27, v3 ; 3E16061B v_mac_f32_e32 v12, s31, v3 ; 3E18061F s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v1, s15, v4, v1 ; D2820001 0406080F v_mad_f32 v2, s4, v4, -v2 ; D2820002 840A0804 v_mac_f32_e32 v13, s35, v3 ; 3E1A0623 v_mac_f32_e32 v14, s39, v3 ; 3E1C0627 v_mac_f32_e32 v15, s43, v3 ; 3E1E062B v_mac_f32_e32 v8, s16, v4 ; 3E100810 v_mac_f32_e32 v9, s20, v4 ; 3E120814 v_mac_f32_e32 v10, s24, v4 ; 3E140818 v_mac_f32_e32 v11, s28, v4 ; 3E16081C v_mac_f32_e32 v12, s32, v4 ; 3E180820 v_mac_f32_e32 v13, s36, v4 ; 3E1A0824 v_mac_f32_e32 v14, s40, v4 ; 3E1C0828 v_mac_f32_e32 v15, s0, v4 ; 3E1E0800 exp 15, 12, 0, 0, 0, v1, v2, v0, v4 ; F80000CF 04000201 exp 15, 13, 0, 0, 0, v8, v9, v10, v11 ; F80000DF 0B0A0908 exp 15, 14, 0, 1, 0, v12, v13, v14, v15 ; F80008EF 0F0E0D0C s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 16 Code Size: 432 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.9960, 1.0000, 0.0000, -1.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D 2: ADD TEMP[0].x, -TEMP[0].wwww, IMM[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1].x, TEMP[1], SAMP[2], 2D 5: ADD TEMP[1].x, -TEMP[1].xxxx, IMM[0].yyyy 6: FSGE TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz 7: UIF TEMP[0].xxxx :0 8: MOV TEMP[0].x, TEMP[1].xxxx 9: ELSE :0 10: MOV TEMP[0].x, IMM[0].yyyy 11: ENDIF 12: MOV TEMP[1].xy, IN[0].xyyy 13: TEX TEMP[1].xyz, TEMP[1], SAMP[1], 2D 14: ADD TEMP[2].x, TEMP[1].xxxx, -CONST[3].yyyy 15: MAD TEMP[2].x, TEMP[0].xxxx, TEMP[2].xxxx, CONST[3].yyyy 16: ADD TEMP[3].x, TEMP[1].yyyy, IMM[0].wwww 17: MAD TEMP[1].xy, TEMP[1].zzzz, IMM[0].yzzz, IMM[0].zyyy 18: MOV TEMP[2].zw, TEMP[1].yyxy 19: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx, IMM[0].yyyy 20: MOV TEMP[2].y, TEMP[0].xxxx 21: MOV OUT[0], TEMP[2] 22: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 52) %26 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %27 = load <8 x i32>, <8 x i32> addrspace(2)* %26, align 32, !tbaa !0 %28 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %29 = load <4 x i32>, <4 x i32> addrspace(2)* %28, align 16, !tbaa !0 %30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, align 32, !tbaa !0 %32 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %35 = load <8 x i32>, <8 x i32> addrspace(2)* %34, align 32, !tbaa !0 %36 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %37 = load <4 x i32>, <4 x i32> addrspace(2)* %36, align 16, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %40 = bitcast float %38 to i32 %41 = bitcast float %39 to i32 %42 = insertelement <2 x i32> undef, i32 %40, i32 0 %43 = insertelement <2 x i32> %42, i32 %41, i32 1 %44 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %43, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %45 = extractelement <4 x float> %44, i32 3 %46 = fsub float 0x3FEFDF3B60000000, %45 %47 = bitcast float %38 to i32 %48 = bitcast float %39 to i32 %49 = insertelement <2 x i32> undef, i32 %47, i32 0 %50 = insertelement <2 x i32> %49, i32 %48, i32 1 %51 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %50, <8 x i32> %35, <4 x i32> %37, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %52 = extractelement <4 x float> %51, i32 0 %53 = fsub float 1.000000e+00, %52 %54 = fcmp oge float %46, 0.000000e+00 %. = select i1 %54, float %53, float 1.000000e+00 %55 = bitcast float %38 to i32 %56 = bitcast float %39 to i32 %57 = insertelement <2 x i32> undef, i32 %55, i32 0 %58 = insertelement <2 x i32> %57, i32 %56, i32 1 %59 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %58, <8 x i32> %31, <4 x i32> %33, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %60 = extractelement <4 x float> %59, i32 0 %61 = extractelement <4 x float> %59, i32 1 %62 = extractelement <4 x float> %59, i32 2 %63 = fsub float %60, %25 %64 = fmul float %., %63 %65 = fadd float %64, %25 %66 = fadd float %61, -1.000000e+00 %67 = fadd float %62, 0.000000e+00 %68 = fmul float %62, 0.000000e+00 %69 = fadd float %68, 1.000000e+00 %70 = fmul float %., %66 %71 = fadd float %70, 1.000000e+00 %72 = call i32 @llvm.SI.packf16(float %65, float %71) %73 = bitcast i32 %72 to float %74 = call i32 @llvm.SI.packf16(float %67, float %69) %75 = bitcast i32 %74 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %73, float %75, float %73, float %75) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx8 s[20:27], s[6:7], 0x10 ; C0CA0710 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 s_mov_b32 m0, s10 ; BEFC030A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0xd ; C200010D v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[28:31] ; F0800800 00E30002 image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[36:39] ; F0800100 01250102 image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[40:47], s[32:35] ; F0800700 010A0202 s_waitcnt vmcnt(2) ; BF8C0772 v_sub_f32_e32 v0, 0x3f7ef9db, v0 ; 080000FF 3F7EF9DB s_waitcnt vmcnt(1) ; BF8C0771 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_cmp_le_f32_e32 vcc, 0, v0 ; 7C060080 v_cndmask_b32_e32 v0, 1.0, v1 ; 000002F2 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_subrev_f32_e32 v1, s0, v2 ; 0A020400 v_mad_f32 v1, v0, v1, s0 ; D2820001 00020300 v_add_f32_e32 v2, -1.0, v3 ; 060406F3 v_add_f32_e32 v3, 0, v4 ; 06060880 v_mad_f32 v4, 0, v4, 1.0 ; D2820004 03CA0880 v_mad_f32 v0, v0, v2, 1.0 ; D2820000 03CA0500 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 8 Code Size: 172 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 1.0000, 0.1000, 0.7500, 0.0800} IMM[1] FLT32 { 6.6667, -2.0000, 3.0000, -0.1000} IMM[2] FLT32 { 0.3000, 0.5900, 0.1100, 10.0000} IMM[3] FLT32 { 0.2500, -1.0000, -0.0800, 14.2857} IMM[4] FLT32 { 0.0050, 2.0000, 0.0000, 0.0000} 0: SQRT TEMP[0].x, CONST[3].wwww 1: ADD TEMP[1].x, -CONST[1].xxxx, CONST[0].wwww 2: ADD TEMP[2].x, -CONST[1].yyyy, CONST[1].wwww 3: MOV TEMP[1].y, TEMP[2].xxxx 4: ADD TEMP[3].x, -CONST[1].zzzz, CONST[2].wwww 5: MOV TEMP[1].z, TEMP[3].xxxx 6: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[1].xyzz, CONST[1].xyzz 7: ADD TEMP[1].xyz, -CONST[1].xyzz, CONST[2].xyzz 8: MAD TEMP[2].xyz, CONST[3].wwww, TEMP[1].xyzz, CONST[1].xyzz 9: MOV TEMP[3].xy, IN[1].zwww 10: TEX TEMP[3], TEMP[3], SAMP[4], 2D 11: MUL TEMP[4].x, TEMP[3].yyyy, TEMP[3].xxxx 12: MUL TEMP[5].xy, IMM[0].zyyy, CONST[3].wwww 13: MOV TEMP[6].xy, IN[0].xyyy 14: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 15: MAD TEMP[7].x, TEMP[6].xxxx, TEMP[6].yyyy, -TEMP[5].yyyy 16: MOV_SAT TEMP[7].x, TEMP[7].xxxx 17: MAD TEMP[4].x, TEMP[4].xxxx, -TEMP[3].zzzz, TEMP[7].xxxx 18: ADD TEMP[4].x, TEMP[4].xxxx, IMM[0].wwww 19: MUL TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx 20: MOV_SAT TEMP[4].x, TEMP[4].xxxx 21: MAD TEMP[7].x, TEMP[4].xxxx, IMM[1].yyyy, IMM[1].zzzz 22: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx 23: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[7].xxxx 24: LRP TEMP[7].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[0].xyzz 25: MOV TEMP[8].xy, IN[0].zwww 26: TEX TEMP[8].xyz, TEMP[8], SAMP[5], 2D 27: MUL TEMP[0].xyz, TEMP[8].xyzz, TEMP[7].xyzz 28: DP3 TEMP[8].x, TEMP[8].xyzz, IMM[2].xyzz 29: MAD TEMP[2].xyz, CONST[0].xyzz, TEMP[8].xxxx, -TEMP[0].xyzz 30: MOV TEMP[8].xy, IN[1].xyyy 31: TEX TEMP[8].y, TEMP[8], SAMP[1], 2D 32: MUL TEMP[8].x, TEMP[6].yyyy, TEMP[8].yyyy 33: MUL TEMP[9].x, TEMP[6].xxxx, TEMP[6].xxxx 34: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 35: MAD TEMP[8].x, TEMP[8].xxxx, CONST[3].wwww, IMM[1].wwww 36: MUL TEMP[8].x, TEMP[8].xxxx, IMM[2].wwww 37: MOV_SAT TEMP[8].x, TEMP[8].xxxx 38: MAD TEMP[9].x, TEMP[8].xxxx, IMM[1].yyyy, IMM[1].zzzz 39: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[8].xxxx 40: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 41: MAD TEMP[0].xyz, TEMP[8].xxxx, TEMP[2].xyzz, TEMP[0].xyzz 42: ADD TEMP[2].x, -TEMP[6].xxxx, IMM[0].xxxx 43: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 44: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 45: MAD TEMP[2].x, TEMP[2].xxxx, IMM[3].xxxx, TEMP[5].xxxx 46: ADD TEMP[3], TEMP[3], IMM[3].yyyy 47: MAD TEMP[7], TEMP[2].xxxx, TEMP[3], IMM[0].xxxx 48: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xyzz 49: DP3 TEMP[3].x, TEMP[0].xyzz, IMM[2].xyzz 50: ADD TEMP[2].x, TEMP[3].xxxx, IMM[3].zzzz 51: MUL TEMP[2].x, TEMP[2].xxxx, IMM[3].wwww 52: MOV_SAT TEMP[3].x, TEMP[2].xxxx 53: MAD TEMP[5].x, TEMP[3].xxxx, IMM[1].yyyy, IMM[1].zzzz 54: MUL TEMP[2].x, TEMP[3].xxxx, TEMP[3].xxxx 55: MAD TEMP[2].x, TEMP[5].xxxx, -TEMP[2].xxxx, IMM[0].xxxx 56: MAD TEMP[2].xyz, TEMP[2].xxxx, IMM[4].xxxx, TEMP[0].xyzz 57: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].zzzz 58: MAD TEMP[0].xyz, TEMP[2].xyzz, IMM[4].yyyy, TEMP[0].xyzz 59: MOV_SAT TEMP[3].xyz, TEMP[0].xyzz 60: MUL TEMP[2].xyz, TEMP[6].yyyy, TEMP[3].xyzz 61: MOV TEMP[5].xy, IN[0].xyyy 62: TEX TEMP[5], TEMP[5], SAMP[2], 2D 63: MAD TEMP[0].xyz, TEMP[3].xyzz, -TEMP[6].yyyy, TEMP[5].xyzz 64: MUL TEMP[1].x, TEMP[6].yyyy, CONST[3].zzzz 65: MUL TEMP[1].x, TEMP[7].wwww, TEMP[1].xxxx 66: MOV TEMP[3].xy, IN[0].xyyy 67: TEX TEMP[3].x, TEMP[3], SAMP[3], 2D 68: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].xxxx 69: MAD TEMP[2].xyz, TEMP[3].xxxx, TEMP[0].xyzz, TEMP[2].xyzz 70: MAD TEMP[0].x, TEMP[8].xxxx, -CONST[3].wwww, IMM[0].xxxx 71: MUL TEMP[6].x, TEMP[0].xxxx, TEMP[4].xxxx 72: MAD TEMP[0].x, TEMP[4].xxxx, -TEMP[0].xxxx, IMM[0].xxxx 73: MAD TEMP[0].x, TEMP[8].xxxx, TEMP[0].xxxx, TEMP[6].xxxx 74: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx 75: MAD TEMP[1].x, TEMP[0].xxxx, -CONST[3].xxxx, TEMP[5].wwww 76: MUL TEMP[0].x, TEMP[0].xxxx, CONST[3].xxxx 77: MAD TEMP[0].x, TEMP[3].xxxx, TEMP[1].xxxx, TEMP[0].xxxx 78: MOV TEMP[2].w, TEMP[0].xxxx 79: MOV OUT[0], TEMP[2] 80: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 48) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 56) %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %40 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %41 = load <8 x i32>, <8 x i32> addrspace(2)* %40, align 32, !tbaa !0 %42 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %43 = load <4 x i32>, <4 x i32> addrspace(2)* %42, align 16, !tbaa !0 %44 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %45 = load <8 x i32>, <8 x i32> addrspace(2)* %44, align 32, !tbaa !0 %46 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 %52 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 %54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %55 = load <4 x i32>, <4 x i32> addrspace(2)* %54, align 16, !tbaa !0 %56 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %59 = load <4 x i32>, <4 x i32> addrspace(2)* %58, align 16, !tbaa !0 %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0 %64 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %65 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %66 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %67 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %68 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %69 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %70 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %71 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %72 = call float @llvm.sqrt.f32(float %39) %73 = fsub float %28, %29 %74 = fsub float %32, %30 %75 = fsub float %36, %31 %76 = fmul float %72, %73 %77 = fadd float %76, %29 %78 = fmul float %72, %74 %79 = fadd float %78, %30 %80 = fmul float %72, %75 %81 = fadd float %80, %31 %82 = fsub float %33, %29 %83 = fsub float %34, %30 %84 = fsub float %35, %31 %85 = fmul float %39, %82 %86 = fadd float %85, %29 %87 = fmul float %39, %83 %88 = fadd float %87, %30 %89 = fmul float %39, %84 %90 = fadd float %89, %31 %91 = bitcast float %70 to i32 %92 = bitcast float %71 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %94, <8 x i32> %57, <4 x i32> %59, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = extractelement <4 x float> %95, i32 2 %99 = extractelement <4 x float> %95, i32 3 %100 = fmul float %97, %96 %101 = fmul float %39, 7.500000e-01 %102 = fmul float %39, 0x3FB99999A0000000 %103 = bitcast float %64 to i32 %104 = bitcast float %65 to i32 %105 = insertelement <2 x i32> undef, i32 %103, i32 0 %106 = insertelement <2 x i32> %105, i32 %104, i32 1 %107 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %106, <8 x i32> %41, <4 x i32> %43, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %108 = extractelement <4 x float> %107, i32 0 %109 = extractelement <4 x float> %107, i32 1 %110 = extractelement <4 x float> %107, i32 2 %111 = fmul float %108, %109 %112 = fsub float %111, %102 %113 = call float @llvm.AMDIL.clamp.(float %112, float 0.000000e+00, float 1.000000e+00) %114 = fmul float %98, %100 %115 = fsub float %113, %114 %116 = fadd float %115, 0x3FB47AE140000000 %117 = fmul float %116, 0x401AAAAAA0000000 %118 = call float @llvm.AMDIL.clamp.(float %117, float 0.000000e+00, float 1.000000e+00) %119 = fmul float %118, -2.000000e+00 %120 = fadd float %119, 3.000000e+00 %121 = fmul float %118, %118 %122 = fmul float %121, %120 %123 = fsub float 1.000000e+00, %122 %124 = fmul float %86, %122 %125 = fmul float %77, %123 %126 = fadd float %124, %125 %127 = fsub float 1.000000e+00, %122 %128 = fmul float %88, %122 %129 = fmul float %79, %127 %130 = fadd float %128, %129 %131 = fsub float 1.000000e+00, %122 %132 = fmul float %90, %122 %133 = fmul float %81, %131 %134 = fadd float %132, %133 %135 = bitcast float %66 to i32 %136 = bitcast float %67 to i32 %137 = insertelement <2 x i32> undef, i32 %135, i32 0 %138 = insertelement <2 x i32> %137, i32 %136, i32 1 %139 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %138, <8 x i32> %61, <4 x i32> %63, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %140 = extractelement <4 x float> %139, i32 0 %141 = extractelement <4 x float> %139, i32 1 %142 = extractelement <4 x float> %139, i32 2 %143 = fmul float %140, %126 %144 = fmul float %141, %130 %145 = fmul float %142, %134 %146 = fmul float %140, 0x3FD3333340000000 %147 = fmul float %141, 0x3FE2E147A0000000 %148 = fadd float %147, %146 %149 = fmul float %142, 0x3FBC28F5C0000000 %150 = fadd float %148, %149 %151 = fmul float %25, %150 %152 = fsub float %151, %143 %153 = fmul float %26, %150 %154 = fsub float %153, %144 %155 = fmul float %27, %150 %156 = fsub float %155, %145 %157 = bitcast float %68 to i32 %158 = bitcast float %69 to i32 %159 = insertelement <2 x i32> undef, i32 %157, i32 0 %160 = insertelement <2 x i32> %159, i32 %158, i32 1 %161 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %160, <8 x i32> %45, <4 x i32> %47, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %162 = extractelement <4 x float> %161, i32 1 %163 = fmul float %109, %162 %164 = fmul float %108, %108 %165 = fmul float %163, %164 %166 = fmul float %165, %39 %167 = fadd float %166, 0xBFB99999A0000000 %168 = fmul float %167, 1.000000e+01 %169 = call float @llvm.AMDIL.clamp.(float %168, float 0.000000e+00, float 1.000000e+00) %170 = fmul float %169, -2.000000e+00 %171 = fadd float %170, 3.000000e+00 %172 = fmul float %169, %169 %173 = fmul float %172, %171 %174 = fmul float %173, %152 %175 = fadd float %174, %143 %176 = fmul float %173, %154 %177 = fadd float %176, %144 %178 = fmul float %173, %156 %179 = fadd float %178, %145 %180 = fsub float 1.000000e+00, %108 %181 = fmul float %180, %180 %182 = fmul float %181, %181 %183 = fmul float %182, 2.500000e-01 %184 = fadd float %183, %101 %185 = fadd float %96, -1.000000e+00 %186 = fadd float %97, -1.000000e+00 %187 = fadd float %98, -1.000000e+00 %188 = fadd float %99, -1.000000e+00 %189 = fmul float %184, %185 %190 = fadd float %189, 1.000000e+00 %191 = fmul float %184, %186 %192 = fadd float %191, 1.000000e+00 %193 = fmul float %184, %187 %194 = fadd float %193, 1.000000e+00 %195 = fmul float %184, %188 %196 = fadd float %195, 1.000000e+00 %197 = fmul float %175, %190 %198 = fmul float %177, %192 %199 = fmul float %179, %194 %200 = fmul float %197, 0x3FD3333340000000 %201 = fmul float %198, 0x3FE2E147A0000000 %202 = fadd float %201, %200 %203 = fmul float %199, 0x3FBC28F5C0000000 %204 = fadd float %202, %203 %205 = fadd float %204, 0xBFB47AE140000000 %206 = fmul float %205, 0x402C924920000000 %207 = call float @llvm.AMDIL.clamp.(float %206, float 0.000000e+00, float 1.000000e+00) %208 = fmul float %207, -2.000000e+00 %209 = fadd float %208, 3.000000e+00 %210 = fmul float %207, %207 %211 = fmul float %210, %209 %212 = fsub float 1.000000e+00, %211 %213 = fmul float %212, 0x3F747AE140000000 %214 = fadd float %213, %197 %215 = fmul float %212, 0x3F747AE140000000 %216 = fadd float %215, %198 %217 = fmul float %212, 0x3F747AE140000000 %218 = fadd float %217, %199 %219 = fmul float %214, %110 %220 = fmul float %216, %110 %221 = fmul float %218, %110 %222 = fmul float %219, 2.000000e+00 %223 = fadd float %222, %197 %224 = fmul float %220, 2.000000e+00 %225 = fadd float %224, %198 %226 = fmul float %221, 2.000000e+00 %227 = fadd float %226, %199 %228 = call float @llvm.AMDIL.clamp.(float %223, float 0.000000e+00, float 1.000000e+00) %229 = call float @llvm.AMDIL.clamp.(float %225, float 0.000000e+00, float 1.000000e+00) %230 = call float @llvm.AMDIL.clamp.(float %227, float 0.000000e+00, float 1.000000e+00) %231 = fmul float %109, %228 %232 = fmul float %109, %229 %233 = fmul float %109, %230 %234 = bitcast float %64 to i32 %235 = bitcast float %65 to i32 %236 = insertelement <2 x i32> undef, i32 %234, i32 0 %237 = insertelement <2 x i32> %236, i32 %235, i32 1 %238 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %237, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %239 = extractelement <4 x float> %238, i32 0 %240 = extractelement <4 x float> %238, i32 1 %241 = extractelement <4 x float> %238, i32 2 %242 = extractelement <4 x float> %238, i32 3 %243 = fmul float %109, %228 %244 = fsub float %239, %243 %245 = fmul float %109, %229 %246 = fsub float %240, %245 %247 = fmul float %109, %230 %248 = fsub float %241, %247 %249 = fmul float %109, %38 %250 = fmul float %196, %249 %251 = bitcast float %64 to i32 %252 = bitcast float %65 to i32 %253 = insertelement <2 x i32> undef, i32 %251, i32 0 %254 = insertelement <2 x i32> %253, i32 %252, i32 1 %255 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %254, <8 x i32> %53, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %256 = extractelement <4 x float> %255, i32 0 %257 = fsub float 1.000000e+00, %256 %258 = fmul float %257, %244 %259 = fadd float %258, %231 %260 = fmul float %257, %246 %261 = fadd float %260, %232 %262 = fmul float %257, %248 %263 = fadd float %262, %233 %264 = fmul float %39, %173 %265 = fsub float 1.000000e+00, %264 %266 = fmul float %265, %122 %267 = fmul float %265, %122 %268 = fsub float 1.000000e+00, %267 %269 = fmul float %173, %268 %270 = fadd float %269, %266 %271 = fmul float %270, %250 %272 = fmul float %37, %271 %273 = fsub float %242, %272 %274 = fmul float %271, %37 %275 = fmul float %257, %273 %276 = fadd float %275, %274 %277 = call i32 @llvm.SI.packf16(float %259, float %261) %278 = bitcast i32 %277 to float %279 = call i32 @llvm.SI.packf16(float %263, float %276) %280 = bitcast i32 %279 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %278, float %280, float %278, float %280) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A v_mov_b32_e32 v2, 0x3f400000 ; 7E0402FF 3F400000 v_mov_b32_e32 v3, 0xbdcccccd ; 7E0602FF BDCCCCCD v_mov_b32_e32 v4, 0x40400000 ; 7E0802FF 40400000 v_mov_b32_e32 v5, 0x3e99999a ; 7E0A02FF 3E99999A v_mov_b32_e32 v6, 0x3f170a3d ; 7E0C02FF 3F170A3D v_mov_b32_e32 v7, 0x3de147ae ; 7E0E02FF 3DE147AE v_mov_b32_e32 v8, 0xbda3d70a ; 7E1002FF BDA3D70A v_mov_b32_e32 v9, 0x3ba3d70a ; 7E1202FF 3BA3D70A v_interp_p1_f32 v10, v0, 0, 0, [m0] ; C8280000 v_interp_p2_f32 v10, [v10], v1, 0, 0, [m0] ; C8290001 v_interp_p1_f32 v11, v0, 1, 0, [m0] ; C82C0100 v_interp_p2_f32 v11, [v11], v1, 1, 0, [m0] ; C82D0101 v_interp_p1_f32 v12, v0, 2, 0, [m0] ; C8300200 v_interp_p2_f32 v12, [v12], v1, 2, 0, [m0] ; C8310201 v_interp_p1_f32 v13, v0, 3, 0, [m0] ; C8340300 v_interp_p2_f32 v13, [v13], v1, 3, 0, [m0] ; C8350301 v_interp_p1_f32 v14, v0, 0, 1, [m0] ; C8380400 v_interp_p2_f32 v14, [v14], v1, 0, 1, [m0] ; C8390401 v_interp_p1_f32 v15, v0, 1, 1, [m0] ; C83C0500 v_interp_p2_f32 v15, [v15], v1, 1, 1, [m0] ; C83D0501 v_interp_p1_f32 v16, v0, 2, 1, [m0] ; C8400600 v_interp_p2_f32 v16, [v16], v1, 2, 1, [m0] ; C8410601 v_interp_p1_f32 v17, v0, 3, 1, [m0] ; C8440700 v_interp_p2_f32 v17, [v17], v1, 3, 1, [m0] ; C8450701 s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[56:59], s[4:5], 0x0 ; C09C0500 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C s_load_dwordx4 s[24:27], s[4:5], 0x10 ; C08C0510 s_load_dwordx4 s[44:47], s[4:5], 0x14 ; C0960514 s_load_dwordx8 s[60:67], s[6:7], 0x0 ; C0DE0700 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[28:31], 0x0 ; C2009D00 s_buffer_load_dword s2, s[28:31], 0x1 ; C2011D01 s_buffer_load_dword s3, s[28:31], 0x2 ; C2019D02 s_buffer_load_dword s4, s[28:31], 0x3 ; C2021D03 s_buffer_load_dword s5, s[28:31], 0x4 ; C2029D04 s_buffer_load_dword s76, s[28:31], 0x5 ; C2261D05 s_buffer_load_dword s77, s[28:31], 0x6 ; C2269D06 s_buffer_load_dword s78, s[28:31], 0x7 ; C2271D07 s_buffer_load_dword s79, s[28:31], 0x8 ; C2279D08 s_buffer_load_dword s80, s[28:31], 0x9 ; C2281D09 s_buffer_load_dword s81, s[28:31], 0xa ; C2289D0A s_buffer_load_dword s82, s[28:31], 0xb ; C2291D0B s_buffer_load_dword s0, s[28:31], 0xc ; C2001D0C s_buffer_load_dword s83, s[28:31], 0xe ; C2299D0E s_buffer_load_dword s84, s[28:31], 0xf ; C22A1D0F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_sub_f32_e32 v1, s4, v0 ; 08020004 v_mov_b32_e32 v18, s76 ; 7E24024C v_mov_b32_e32 v19, s77 ; 7E26024D s_load_dwordx8 s[68:75], s[6:7], 0x20 ; C0E20720 s_load_dwordx8 s[48:55], s[6:7], 0x28 ; C0D80728 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[68:75], s[24:27] ; F0800F00 00D11410 s_load_dwordx8 s[24:31], s[6:7], 0x18 ; C0CC0718 image_sample v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[60:67], s[56:59] ; F0800700 01CF180A image_sample v[27:29], 7, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[48:55], s[44:47] ; F0800700 016C1B0C image_sample v12, 2, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[36:43], s[32:35] ; F0800200 01090C0E v_sqrt_f32_e32 v13, s84 ; 7E1A6654 v_mad_f32 v1, v13, v1, s5 ; D2820001 0016030D v_sub_f32_e32 v14, s78, v18 ; 081C244E v_mad_f32 v14, v13, v14, s76 ; D282000E 01321D0D v_sub_f32_e32 v15, s82, v19 ; 081E2652 v_mad_f32 v13, v13, v15, s77 ; D282000D 01361F0D v_sub_f32_e32 v15, s79, v0 ; 081E004F v_mad_f32 v0, s84, v15, v0 ; D2820000 04021E54 v_sub_f32_e32 v15, s80, v18 ; 081E2450 v_mad_f32 v15, s84, v15, v18 ; D282000F 044A1E54 v_sub_f32_e32 v16, s81, v19 ; 08202651 v_mad_f32 v16, s84, v16, v19 ; D2820010 044E2054 v_mul_f32_e32 v17, s84, v3 ; 10220654 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v17, v25, v24 ; 3E223119 v_add_f32_e64 v17, 0, v17 clamp ; D2060811 00022280 v_mul_f32_e32 v18, v20, v21 ; 10242B14 v_mad_f32 v17, -v22, v18, v17 ; D2820011 24462516 v_add_f32_e32 v17, 0x3da3d70a, v17 ; 062222FF 3DA3D70A v_mul_f32_e32 v17, 0x40d55555, v17 ; 102222FF 40D55555 v_add_f32_e64 v17, 0, v17 clamp ; D2060811 00022280 v_mad_f32 v18, -2.0, v17, v4 ; D2820012 041222F5 v_mul_f32_e32 v17, v17, v17 ; 10222311 v_mul_f32_e32 v17, v18, v17 ; 10222312 v_mad_f32 v1, -v17, v1, v1 ; D2820001 24060311 v_mac_f32_e32 v1, v17, v0 ; 3E020111 v_mad_f32 v0, -v17, v14, v14 ; D2820000 243A1D11 v_mac_f32_e32 v0, v17, v15 ; 3E001F11 v_mad_f32 v13, -v17, v13, v13 ; D282000D 24361B11 v_mac_f32_e32 v13, v17, v16 ; 3E1A2111 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v1, v1, v27 ; 10023701 v_mul_f32_e32 v0, v0, v28 ; 10003900 v_mul_f32_e32 v13, v13, v29 ; 101A3B0D v_mul_f32_e32 v14, v5, v27 ; 101C3705 v_mac_f32_e32 v14, v6, v28 ; 3E1C3906 v_mac_f32_e32 v14, v7, v29 ; 3E1C3B07 v_mul_f32_e32 v2, s84, v2 ; 10040454 v_sub_f32_e32 v15, 1.0, v24 ; 081E30F2 v_mad_f32 v15, -v24, v15, v15 ; D282000F 243E1F18 v_mul_f32_e32 v15, v15, v15 ; 101E1F0F v_madmk_f32_e32 v2, v15, v2, 0x3e800000 ; 4004050F 3E800000 v_add_f32_e32 v15, -1.0, v20 ; 061E28F3 v_add_f32_e32 v16, -1.0, v21 ; 06202AF3 v_add_f32_e32 v18, -1.0, v22 ; 06242CF3 v_mad_f32 v19, v20, v2, -v2 ; D2820013 840A0514 v_mad_f32 v20, v21, v2, -v2 ; D2820014 840A0515 v_mad_f32 v21, v22, v2, -v2 ; D2820015 840A0516 v_mad_f32 v22, v23, v2, -v2 ; D2820016 840A0517 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, v12, v25 ; 1018330C v_mul_f32_e32 v23, v24, v24 ; 102E3118 v_mul_f32_e32 v12, v23, v12 ; 10181917 v_mac_f32_e32 v3, s84, v12 ; 3E061854 v_mul_f32_e32 v3, 0x41200000, v3 ; 100606FF 41200000 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mad_f32 v12, -2.0, v3, v4 ; D282000C 041206F5 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mul_f32_e32 v3, v12, v3 ; 1006070C v_mad_f32 v12, s1, v14, -v1 ; D282000C 84061C01 v_mac_f32_e32 v1, v12, v3 ; 3E02070C v_mad_f32 v12, s2, v14, -v0 ; D282000C 84021C02 v_mac_f32_e32 v0, v12, v3 ; 3E00070C v_mad_f32 v12, v1, v19, v1 ; D282000C 04062701 v_mul_f32_e32 v5, v5, v12 ; 100A1905 v_mad_f32 v12, v0, v20, v0 ; D282000C 04022900 v_mac_f32_e32 v5, v6, v12 ; 3E0A1906 v_mad_f32 v6, s3, v14, -v13 ; D2820006 84361C03 v_mac_f32_e32 v13, v6, v3 ; 3E1A0706 v_mad_f32 v6, v2, v15, 1.0 ; D2820006 03CA1F02 v_mad_f32 v12, v2, v16, 1.0 ; D282000C 03CA2102 v_mad_f32 v2, v2, v18, 1.0 ; D2820002 03CA2502 v_mad_f32 v14, v13, v21, v13 ; D282000E 04362B0D v_mac_f32_e32 v5, v7, v14 ; 3E0A1D07 v_add_f32_e32 v5, v5, v8 ; 060A1105 v_mul_f32_e32 v5, 0x41649249, v5 ; 100A0AFF 41649249 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mac_f32_e32 v4, -2.0, v5 ; 3E080AF5 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mad_f32 v4, -v4, v9, v9 ; D2820004 24261304 v_mad_f32 v5, v6, v1, v4 ; D2820005 04120306 v_mad_f32 v7, v12, v0, v4 ; D2820007 0412010C v_mad_f32 v4, v2, v13, v4 ; D2820004 04121B02 v_mul_f32_e32 v8, v26, v5 ; 10100B1A v_mul_f32_e32 v9, v26, v7 ; 10120F1A v_mul_f32_e32 v14, v26, v4 ; 101C091A v_mac_f32_e32 v8, v26, v5 ; 3E100B1A v_mac_f32_e32 v8, v6, v1 ; 3E100306 v_mac_f32_e32 v9, v26, v7 ; 3E120F1A v_mac_f32_e32 v9, v12, v0 ; 3E12010C v_mac_f32_e32 v14, v26, v4 ; 3E1C091A v_mac_f32_e32 v14, v2, v13 ; 3E1C1B02 v_add_f32_e64 v0, 0, v8 clamp ; D2060800 00021080 v_add_f32_e64 v1, 0, v9 clamp ; D2060801 00021280 v_add_f32_e64 v2, 0, v14 clamp ; D2060802 00021C80 v_mul_f32_e32 v4, v0, v25 ; 10083300 v_mul_f32_e32 v5, v1, v25 ; 100A3301 v_mul_f32_e32 v6, v2, v25 ; 100C3302 image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[16:23], s[12:15] ; F0800F00 00640C0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, -v25, v0, v12 ; D2820000 24320119 v_mad_f32 v1, -v25, v1, v13 ; D2820001 24360319 v_mad_f32 v2, -v25, v2, v14 ; D2820002 243A0519 v_mul_f32_e32 v7, s83, v25 ; 100E3253 v_mac_f32_e32 v7, v7, v22 ; 3E0E2D07 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[24:31], s[8:11] ; F0800100 0046080A s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v8, 1.0, v8 ; 081010F2 v_mac_f32_e32 v4, v0, v8 ; 3E081100 v_mac_f32_e32 v5, v1, v8 ; 3E0A1101 v_mac_f32_e32 v6, v2, v8 ; 3E0C1102 v_mul_f32_e32 v0, s84, v3 ; 10000654 v_mad_f32 v1, -s84, v3, 1.0 ; D2820001 23CA0654 v_mad_f32 v0, -v0, v17, v17 ; D2820000 24462300 v_mad_f32 v1, -v1, v17, 1.0 ; D2820001 23CA2301 v_mac_f32_e32 v0, v1, v3 ; 3E000701 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_mul_f32_e32 v1, s0, v0 ; 10020000 v_mad_f32 v0, -s0, v0, v15 ; D2820000 243E0000 v_mac_f32_e32 v1, v0, v8 ; 3E021100 v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 v_cvt_pkrtz_f16_f32_e32 v1, v6, v1 ; 5E020306 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 88 VGPRS: 32 Code Size: 1020 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { -0.5000, -1.0000, 10.0000, -10.0000} IMM[1] FLT32 { -2.0000, 3.0000, 6.0000, 1.0000} IMM[2] FLT32 { -0.5800, -1.0000, -0.0000, 1.0000} IMM[3] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xw, TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[1].xyyy 3: TEX TEMP[1].y, TEMP[1], SAMP[1], 2D 4: MAD TEMP[0].x, TEMP[1].yyyy, TEMP[0].xxxx, TEMP[0].wwww 5: MOV TEMP[1].xy, IN[0].zwww 6: TEX TEMP[1].w, TEMP[1], SAMP[3], 2D 7: ADD TEMP[2].xy, TEMP[1].wwww, IMM[0].xyyy 8: ADD TEMP[3].x, TEMP[1].wwww, TEMP[1].wwww 9: MOV_SAT TEMP[3].x, TEMP[3].xxxx 10: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[0].zwww 11: MOV_SAT TEMP[2].xy, TEMP[2].xyyy 12: MAD TEMP[1].xy, TEMP[2].xyyy, IMM[1].xxxx, IMM[1].yyyy 13: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[2].xyyy 14: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[1].xyyy 15: MUL TEMP[2].x, TEMP[2].yyyy, TEMP[2].xxxx 16: MAD TEMP[4].x, CONST[3].wwww, IMM[1].zzzz, IMM[1].wwww 17: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx, TEMP[2].xxxx 18: MAD TEMP[2].x, TEMP[3].xxxx, IMM[1].xxxx, IMM[1].yyyy 19: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx 20: MUL TEMP[2].x, TEMP[3].xxxx, TEMP[2].xxxx 21: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx, IMM[2].xxxx 22: MUL TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz 23: MOV_SAT TEMP[2].x, TEMP[0].xxxx 24: MAD TEMP[3].x, TEMP[2].xxxx, IMM[1].xxxx, IMM[1].yyyy 25: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[2].xxxx 26: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx 27: MOV TEMP[2].xy, IN[0].xyyy 28: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 29: MAD TEMP[2].xyz, CONST[3].yyyy, IMM[2].yzzz, TEMP[2].xyzz 30: MUL TEMP[1].xyz, IMM[3].xyyy, CONST[3].yyyy 31: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[2].xyzz, TEMP[1].xyzz 32: MOV TEMP[0].w, IMM[1].wwww 33: MOV OUT[0], TEMP[0] 34: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %27 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 %29 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %30 = load <4 x i32>, <4 x i32> addrspace(2)* %29, align 16, !tbaa !0 %31 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %32 = load <8 x i32>, <8 x i32> addrspace(2)* %31, align 32, !tbaa !0 %33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %34 = load <4 x i32>, <4 x i32> addrspace(2)* %33, align 16, !tbaa !0 %35 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %36 = load <8 x i32>, <8 x i32> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !tbaa !0 %39 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 %41 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 %43 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %44 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %45 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %46 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %47 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %48 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %49 = bitcast float %43 to i32 %50 = bitcast float %44 to i32 %51 = insertelement <2 x i32> undef, i32 %49, i32 0 %52 = insertelement <2 x i32> %51, i32 %50, i32 1 %53 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %52, <8 x i32> %28, <4 x i32> %30, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 3 %56 = bitcast float %47 to i32 %57 = bitcast float %48 to i32 %58 = insertelement <2 x i32> undef, i32 %56, i32 0 %59 = insertelement <2 x i32> %58, i32 %57, i32 1 %60 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %59, <8 x i32> %32, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %61 = extractelement <4 x float> %60, i32 1 %62 = fmul float %61, %54 %63 = fadd float %62, %55 %64 = bitcast float %45 to i32 %65 = bitcast float %46 to i32 %66 = insertelement <2 x i32> undef, i32 %64, i32 0 %67 = insertelement <2 x i32> %66, i32 %65, i32 1 %68 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %67, <8 x i32> %40, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %69 = extractelement <4 x float> %68, i32 3 %70 = fadd float %69, -5.000000e-01 %71 = fadd float %69, -1.000000e+00 %72 = fadd float %69, %69 %73 = call float @llvm.AMDIL.clamp.(float %72, float 0.000000e+00, float 1.000000e+00) %74 = fmul float %70, 1.000000e+01 %75 = fmul float %71, -1.000000e+01 %76 = call float @llvm.AMDIL.clamp.(float %74, float 0.000000e+00, float 1.000000e+00) %77 = call float @llvm.AMDIL.clamp.(float %75, float 0.000000e+00, float 1.000000e+00) %78 = fmul float %76, -2.000000e+00 %79 = fadd float %78, 3.000000e+00 %80 = fmul float %77, -2.000000e+00 %81 = fadd float %80, 3.000000e+00 %82 = fmul float %76, %76 %83 = fmul float %77, %77 %84 = fmul float %82, %79 %85 = fmul float %83, %81 %86 = fmul float %85, %84 %87 = fmul float %26, 6.000000e+00 %88 = fadd float %87, 1.000000e+00 %89 = fmul float %63, %88 %90 = fadd float %89, %86 %91 = fmul float %73, -2.000000e+00 %92 = fadd float %91, 3.000000e+00 %93 = fmul float %73, %73 %94 = fmul float %93, %92 %95 = fmul float %90, %94 %96 = fadd float %95, 0xBFE28F5C20000000 %97 = fmul float %96, 1.000000e+01 %98 = call float @llvm.AMDIL.clamp.(float %97, float 0.000000e+00, float 1.000000e+00) %99 = fmul float %98, -2.000000e+00 %100 = fadd float %99, 3.000000e+00 %101 = fmul float %98, %98 %102 = fmul float %101, %100 %103 = bitcast float %43 to i32 %104 = bitcast float %44 to i32 %105 = insertelement <2 x i32> undef, i32 %103, i32 0 %106 = insertelement <2 x i32> %105, i32 %104, i32 1 %107 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %106, <8 x i32> %36, <4 x i32> %38, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %108 = extractelement <4 x float> %107, i32 0 %109 = extractelement <4 x float> %107, i32 1 %110 = extractelement <4 x float> %107, i32 2 %111 = fsub float %108, %25 %112 = fmul float %25, -0.000000e+00 %113 = fadd float %112, %109 %114 = fmul float %25, -0.000000e+00 %115 = fadd float %114, %110 %116 = fmul float %25, 0.000000e+00 %117 = fmul float %25, 0.000000e+00 %118 = fmul float %102, %111 %119 = fadd float %118, %25 %120 = fmul float %102, %113 %121 = fadd float %120, %116 %122 = fmul float %102, %115 %123 = fadd float %122, %117 %124 = call i32 @llvm.SI.packf16(float %119, float %121) %125 = bitcast i32 %124 to float %126 = call i32 @llvm.SI.packf16(float %123, float 1.000000e+00) %127 = bitcast i32 %126 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %125, float %127, float %125, float %127) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_load_dwordx4 s[16:19], s[2:3], 0x0 ; C0880300 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504 s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508 s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 s_load_dwordx8 s[8:15], s[6:7], 0x10 ; C0C40710 s_load_dwordx8 s[48:55], s[6:7], 0x18 ; C0D80718 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[16:19], 0xd ; C202110D s_buffer_load_dword s5, s[16:19], 0xf ; C202910F image_sample v[0:1], 9, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[20:23] ; F0800900 00A80002 image_sample v6, 2, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[40:47], s[24:27] ; F0800200 00CA0606 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v1, v0, v6 ; 3E020D00 image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[48:55], s[28:31] ; F0800800 00EC0004 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, -0.5, v0 ; 060800F1 v_add_f32_e32 v5, v0, v0 ; 060A0100 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800700 00020602 v_mov_b32_e32 v2, 0x41200000 ; 7E0402FF 41200000 v_mul_f32_e32 v3, v2, v4 ; 10060902 v_madmk_f32_e32 v0, v0, v2, 0xc1200000 ; 40000500 C1200000 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mov_b32_e32 v4, 0x40400000 ; 7E0802FF 40400000 v_mad_f32 v9, -2.0, v3, v4 ; D2820009 041206F5 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mad_f32 v9, -2.0, v0, v4 ; D2820009 041200F5 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mov_b32_e32 v3, 0x40c00000 ; 7E0602FF 40C00000 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v3, s5, 1.0 ; D2820003 03C80B03 v_mac_f32_e32 v0, v3, v1 ; 3E000303 v_mad_f32 v1, -2.0, v5, v4 ; D2820001 04120AF5 v_mul_f32_e32 v3, v5, v5 ; 10060B05 v_mul_f32_e32 v1, v1, v3 ; 10020701 v_madak_f32_e32 v0, v0, v1, 0xbf147ae1 ; 42000300 BF147AE1 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mac_f32_e32 v4, -2.0, v0 ; 3E0800F5 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v0, v4, v0 ; 10000104 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v1, s4, v6 ; 0A020C04 v_mov_b32_e32 v2, 0x80000000 ; 7E0402FF 80000000 v_mad_f32 v3, s4, v2, v7 ; D2820003 041E0404 v_mac_f32_e32 v8, s4, v2 ; 3E100404 v_mul_f32_e64 v2, 0, s4 ; D2100002 00000880 v_mad_f32 v1, v0, v1, s4 ; D2820001 00120300 v_mad_f32 v3, v3, v0, v2 ; D2820003 040A0103 v_mac_f32_e32 v2, v8, v0 ; 3E040108 v_cvt_pkrtz_f16_f32_e32 v0, v1, v3 ; 5E000701 v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 12 Code Size: 396 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 6.0000, 1.0000, 0.7500, 0.2500} IMM[1] FLT32 { -1.0000, 0.3000, 0.5900, 0.1100} IMM[2] FLT32 { -0.0800, 14.2857, -2.0000, 3.0000} IMM[3] FLT32 { 0.0300, 0.5000, -0.5000, -1.0000} IMM[4] FLT32 { 10.0000, -10.0000, -0.5800, -100.0000} IMM[5] FLT32 { -0.9000, -0.0100, 0.0000, 0.0000} 0: MUL TEMP[0].x, IMM[0].zzzz, CONST[3].wwww 1: MOV TEMP[1].xy, IN[0].xyyy 2: TEX TEMP[1], TEMP[1], SAMP[0], 2D 3: ADD TEMP[2].x, -TEMP[1].xxxx, IMM[0].yyyy 4: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 5: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 6: MAD TEMP[0].x, TEMP[2].xxxx, IMM[0].wwww, TEMP[0].xxxx 7: MOV TEMP[2].xy, IN[1].zwww 8: TEX TEMP[2], TEMP[2], SAMP[3], 2D 9: ADD TEMP[2], TEMP[2], IMM[1].xxxx 10: MAD TEMP[2], TEMP[0].xxxx, TEMP[2], IMM[0].yyyy 11: MOV TEMP[3].xy, IN[0].zwww 12: TEX TEMP[3], TEMP[3], SAMP[4], 2D 13: MUL TEMP[4].xyz, TEMP[2].xyzz, TEMP[3].xyzz 14: DP3 TEMP[5].x, TEMP[4].xyzz, IMM[1].yzww 15: ADD TEMP[0].x, TEMP[5].xxxx, IMM[2].xxxx 16: MUL TEMP[0].x, TEMP[0].xxxx, IMM[2].yyyy 17: MOV_SAT TEMP[5].x, TEMP[0].xxxx 18: MAD TEMP[6].x, TEMP[5].xxxx, IMM[2].zzzz, IMM[2].wwww 19: MUL TEMP[0].x, TEMP[5].xxxx, TEMP[5].xxxx 20: MAD TEMP[0].x, TEMP[6].xxxx, -TEMP[0].xxxx, IMM[0].yyyy 21: MAD TEMP[2].xyz, TEMP[0].xxxx, IMM[3].xxxx, TEMP[4].xyzz 22: MUL TEMP[2].xyz, TEMP[1].zzzz, TEMP[2].xyzz 23: MAD TEMP[4].xyz, TEMP[2].xyzz, IMM[3].yyyy, TEMP[4].xyzz 24: MOV_SAT TEMP[5].xyz, TEMP[4].xyzz 25: MUL TEMP[2].xyz, TEMP[1].yyyy, TEMP[5].xyzz 26: MOV TEMP[6].xy, IN[0].xyyy 27: TEX TEMP[6], TEMP[6], SAMP[2], 2D 28: MAD TEMP[4].xyz, TEMP[5].xyzz, -TEMP[1].yyyy, TEMP[6].xyzz 29: ADD TEMP[5].xy, TEMP[3].wwww, IMM[3].zwww 30: ADD TEMP[7].x, TEMP[3].wwww, TEMP[3].wwww 31: MOV_SAT TEMP[7].x, TEMP[7].xxxx 32: MUL TEMP[5].xy, TEMP[5].xyyy, IMM[4].xyyy 33: MOV_SAT TEMP[5].xy, TEMP[5].xyyy 34: MAD TEMP[3].xy, TEMP[5].xyyy, IMM[2].zzzz, IMM[2].wwww 35: MUL TEMP[5].xy, TEMP[5].xyyy, TEMP[5].xyyy 36: MUL TEMP[3].xy, TEMP[5].xyyy, TEMP[3].xyyy 37: MUL TEMP[0].x, TEMP[3].yyyy, TEMP[3].xxxx 38: MOV TEMP[3].xy, IN[1].xyyy 39: TEX TEMP[3].y, TEMP[3], SAMP[1], 2D 40: MAD TEMP[3].x, TEMP[3].yyyy, TEMP[1].xxxx, TEMP[1].wwww 41: MUL TEMP[1].x, TEMP[1].yyyy, CONST[3].zzzz 42: MUL TEMP[1].x, TEMP[2].wwww, TEMP[1].xxxx 43: MAD TEMP[5].x, CONST[3].wwww, IMM[0].xxxx, IMM[0].yyyy 44: MAD TEMP[0].x, TEMP[3].xxxx, TEMP[5].xxxx, TEMP[0].xxxx 45: MAD TEMP[3].x, TEMP[7].xxxx, IMM[2].zzzz, IMM[2].wwww 46: MUL TEMP[5].x, TEMP[7].xxxx, TEMP[7].xxxx 47: MUL TEMP[3].x, TEMP[5].xxxx, TEMP[3].xxxx 48: MAD TEMP[3].x, TEMP[0].xxxx, TEMP[3].xxxx, IMM[4].zzzz 49: MUL TEMP[3].x, TEMP[3].xxxx, IMM[4].xxxx 50: MOV_SAT TEMP[3].x, TEMP[3].xxxx 51: MAD TEMP[0].x, TEMP[3].xxxx, IMM[2].zzzz, IMM[2].wwww 52: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx 53: MUL TEMP[5].x, TEMP[3].xxxx, TEMP[0].xxxx 54: MAD TEMP[0].xy, TEMP[0].xxxx, TEMP[3].xxxx, IMM[5].xyyy 55: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[4].xwww 56: MAD TEMP[2].xyz, TEMP[5].xxxx, TEMP[4].xyzz, TEMP[2].xyzz 57: MAX TEMP[3].x, TEMP[0].yyyy, IMM[5].zzzz 58: MOV_SAT TEMP[0].x, TEMP[0].xxxx 59: MAD TEMP[5].x, TEMP[3].xxxx, IMM[2].zzzz, IMM[2].wwww 60: MUL TEMP[4].x, TEMP[3].xxxx, TEMP[3].xxxx 61: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 62: MAD TEMP[3].x, TEMP[1].xxxx, -TEMP[4].xxxx, TEMP[6].wwww 63: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[1].xxxx 64: MAD TEMP[1].x, TEMP[0].xxxx, IMM[2].zzzz, IMM[2].wwww 65: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[0].xxxx 66: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx 67: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx, TEMP[4].xxxx 68: MOV TEMP[2].w, TEMP[0].xxxx 69: MOV OUT[0], TEMP[2] 70: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 56) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %27 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 %29 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %30 = load <4 x i32>, <4 x i32> addrspace(2)* %29, align 16, !tbaa !0 %31 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %32 = load <8 x i32>, <8 x i32> addrspace(2)* %31, align 32, !tbaa !0 %33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %34 = load <4 x i32>, <4 x i32> addrspace(2)* %33, align 16, !tbaa !0 %35 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %36 = load <8 x i32>, <8 x i32> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !tbaa !0 %39 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 %41 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 %43 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !tbaa !0 %47 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %48 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %49 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %50 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %51 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %52 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %53 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %54 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %55 = fmul float %26, 7.500000e-01 %56 = bitcast float %47 to i32 %57 = bitcast float %48 to i32 %58 = insertelement <2 x i32> undef, i32 %56, i32 0 %59 = insertelement <2 x i32> %58, i32 %57, i32 1 %60 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %59, <8 x i32> %28, <4 x i32> %30, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = extractelement <4 x float> %60, i32 3 %65 = fsub float 1.000000e+00, %61 %66 = fmul float %65, %65 %67 = fmul float %66, %66 %68 = fmul float %67, 2.500000e-01 %69 = fadd float %68, %55 %70 = bitcast float %53 to i32 %71 = bitcast float %54 to i32 %72 = insertelement <2 x i32> undef, i32 %70, i32 0 %73 = insertelement <2 x i32> %72, i32 %71, i32 1 %74 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %73, <8 x i32> %40, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = extractelement <4 x float> %74, i32 2 %78 = extractelement <4 x float> %74, i32 3 %79 = fadd float %75, -1.000000e+00 %80 = fadd float %76, -1.000000e+00 %81 = fadd float %77, -1.000000e+00 %82 = fadd float %78, -1.000000e+00 %83 = fmul float %69, %79 %84 = fadd float %83, 1.000000e+00 %85 = fmul float %69, %80 %86 = fadd float %85, 1.000000e+00 %87 = fmul float %69, %81 %88 = fadd float %87, 1.000000e+00 %89 = fmul float %69, %82 %90 = fadd float %89, 1.000000e+00 %91 = bitcast float %49 to i32 %92 = bitcast float %50 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %94, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = extractelement <4 x float> %95, i32 2 %99 = extractelement <4 x float> %95, i32 3 %100 = fmul float %84, %96 %101 = fmul float %86, %97 %102 = fmul float %88, %98 %103 = fmul float %100, 0x3FD3333340000000 %104 = fmul float %101, 0x3FE2E147A0000000 %105 = fadd float %104, %103 %106 = fmul float %102, 0x3FBC28F5C0000000 %107 = fadd float %105, %106 %108 = fadd float %107, 0xBFB47AE140000000 %109 = fmul float %108, 0x402C924920000000 %110 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00) %111 = fmul float %110, -2.000000e+00 %112 = fadd float %111, 3.000000e+00 %113 = fmul float %110, %110 %114 = fmul float %113, %112 %115 = fsub float 1.000000e+00, %114 %116 = fmul float %115, 0x3F9EB851E0000000 %117 = fadd float %116, %100 %118 = fmul float %115, 0x3F9EB851E0000000 %119 = fadd float %118, %101 %120 = fmul float %115, 0x3F9EB851E0000000 %121 = fadd float %120, %102 %122 = fmul float %63, %117 %123 = fmul float %63, %119 %124 = fmul float %63, %121 %125 = fmul float %122, 5.000000e-01 %126 = fadd float %125, %100 %127 = fmul float %123, 5.000000e-01 %128 = fadd float %127, %101 %129 = fmul float %124, 5.000000e-01 %130 = fadd float %129, %102 %131 = call float @llvm.AMDIL.clamp.(float %126, float 0.000000e+00, float 1.000000e+00) %132 = call float @llvm.AMDIL.clamp.(float %128, float 0.000000e+00, float 1.000000e+00) %133 = call float @llvm.AMDIL.clamp.(float %130, float 0.000000e+00, float 1.000000e+00) %134 = fmul float %62, %131 %135 = fmul float %62, %132 %136 = fmul float %62, %133 %137 = bitcast float %47 to i32 %138 = bitcast float %48 to i32 %139 = insertelement <2 x i32> undef, i32 %137, i32 0 %140 = insertelement <2 x i32> %139, i32 %138, i32 1 %141 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %140, <8 x i32> %36, <4 x i32> %38, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %142 = extractelement <4 x float> %141, i32 0 %143 = extractelement <4 x float> %141, i32 1 %144 = extractelement <4 x float> %141, i32 2 %145 = extractelement <4 x float> %141, i32 3 %146 = fmul float %62, %131 %147 = fsub float %142, %146 %148 = fmul float %62, %132 %149 = fsub float %143, %148 %150 = fmul float %62, %133 %151 = fsub float %144, %150 %152 = fadd float %99, -5.000000e-01 %153 = fadd float %99, -1.000000e+00 %154 = fadd float %99, %99 %155 = call float @llvm.AMDIL.clamp.(float %154, float 0.000000e+00, float 1.000000e+00) %156 = fmul float %152, 1.000000e+01 %157 = fmul float %153, -1.000000e+01 %158 = call float @llvm.AMDIL.clamp.(float %156, float 0.000000e+00, float 1.000000e+00) %159 = call float @llvm.AMDIL.clamp.(float %157, float 0.000000e+00, float 1.000000e+00) %160 = fmul float %158, -2.000000e+00 %161 = fadd float %160, 3.000000e+00 %162 = fmul float %159, -2.000000e+00 %163 = fadd float %162, 3.000000e+00 %164 = fmul float %158, %158 %165 = fmul float %159, %159 %166 = fmul float %164, %161 %167 = fmul float %165, %163 %168 = fmul float %167, %166 %169 = bitcast float %51 to i32 %170 = bitcast float %52 to i32 %171 = insertelement <2 x i32> undef, i32 %169, i32 0 %172 = insertelement <2 x i32> %171, i32 %170, i32 1 %173 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %172, <8 x i32> %32, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %174 = extractelement <4 x float> %173, i32 1 %175 = fmul float %174, %61 %176 = fadd float %175, %64 %177 = fmul float %62, %25 %178 = fmul float %90, %177 %179 = fmul float %26, 6.000000e+00 %180 = fadd float %179, 1.000000e+00 %181 = fmul float %176, %180 %182 = fadd float %181, %168 %183 = fmul float %155, -2.000000e+00 %184 = fadd float %183, 3.000000e+00 %185 = fmul float %155, %155 %186 = fmul float %185, %184 %187 = fmul float %182, %186 %188 = fadd float %187, 0xBFE28F5C20000000 %189 = fmul float %188, 1.000000e+01 %190 = call float @llvm.AMDIL.clamp.(float %189, float 0.000000e+00, float 1.000000e+00) %191 = fmul float %190, -2.000000e+00 %192 = fadd float %191, 3.000000e+00 %193 = fmul float %190, %190 %194 = fmul float %193, %192 %195 = fmul float %192, %193 %196 = fadd float %195, 0xBFECCCCCC0000000 %197 = fmul float %192, %193 %198 = fadd float %197, 0xBF847AE140000000 %199 = fmul float %196, 1.000000e+01 %200 = fmul float %198, -1.000000e+02 %201 = fmul float %194, %147 %202 = fadd float %201, %134 %203 = fmul float %194, %149 %204 = fadd float %203, %135 %205 = fmul float %194, %151 %206 = fadd float %205, %136 %207 = call float @llvm.maxnum.f32(float %200, float 0.000000e+00) %208 = call float @llvm.AMDIL.clamp.(float %199, float 0.000000e+00, float 1.000000e+00) %209 = fmul float %207, -2.000000e+00 %210 = fadd float %209, 3.000000e+00 %211 = fmul float %207, %207 %212 = fmul float %211, %210 %213 = fmul float %212, %178 %214 = fsub float %145, %213 %215 = fmul float %212, %178 %216 = fmul float %208, -2.000000e+00 %217 = fadd float %216, 3.000000e+00 %218 = fmul float %208, %208 %219 = fmul float %218, %217 %220 = fmul float %219, %214 %221 = fadd float %220, %215 %222 = call i32 @llvm.SI.packf16(float %202, float %204) %223 = bitcast i32 %222 to float %224 = call i32 @llvm.SI.packf16(float %206, float %221) %225 = bitcast i32 %224 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %223, float %225, float %223, float %225) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A v_mov_b32_e32 v2, 0x3f400000 ; 7E0402FF 3F400000 v_mov_b32_e32 v3, 0xbda3d70a ; 7E0602FF BDA3D70A v_mov_b32_e32 v4, 0x40400000 ; 7E0802FF 40400000 v_mov_b32_e32 v5, 0x3cf5c28f ; 7E0A02FF 3CF5C28F v_mov_b32_e32 v6, 0x41200000 ; 7E0C02FF 41200000 v_mov_b32_e32 v7, 0x40c00000 ; 7E0E02FF 40C00000 v_mov_b32_e32 v8, 0xc2c80000 ; 7E1002FF C2C80000 v_interp_p1_f32 v9, v0, 0, 0, [m0] ; C8240000 v_interp_p2_f32 v9, [v9], v1, 0, 0, [m0] ; C8250001 v_interp_p1_f32 v10, v0, 1, 0, [m0] ; C8280100 v_interp_p2_f32 v10, [v10], v1, 1, 0, [m0] ; C8290101 v_interp_p1_f32 v11, v0, 2, 0, [m0] ; C82C0200 v_interp_p2_f32 v11, [v11], v1, 2, 0, [m0] ; C82D0201 v_interp_p1_f32 v12, v0, 3, 0, [m0] ; C8300300 v_interp_p2_f32 v12, [v12], v1, 3, 0, [m0] ; C8310301 v_interp_p1_f32 v13, v0, 0, 1, [m0] ; C8340400 v_interp_p2_f32 v13, [v13], v1, 0, 1, [m0] ; C8350401 v_interp_p1_f32 v14, v0, 1, 1, [m0] ; C8380500 v_interp_p2_f32 v14, [v14], v1, 1, 1, [m0] ; C8390501 v_interp_p1_f32 v15, v0, 2, 1, [m0] ; C83C0600 v_interp_p2_f32 v15, [v15], v1, 2, 1, [m0] ; C83D0601 v_interp_p1_f32 v16, v0, 3, 1, [m0] ; C8400700 v_interp_p2_f32 v16, [v16], v1, 3, 1, [m0] ; C8410701 s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx4 s[36:39], s[4:5], 0xc ; C092050C s_load_dwordx4 s[40:43], s[4:5], 0x10 ; C0940510 s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700 s_load_dwordx8 s[8:15], s[6:7], 0x8 ; C0C40708 s_load_dwordx8 s[20:27], s[6:7], 0x10 ; C0CA0710 s_load_dwordx8 s[52:59], s[6:7], 0x18 ; C0DA0718 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[28:31], 0xe ; C2021D0E s_buffer_load_dword s5, s[28:31], 0xf ; C2029D0F s_load_dwordx8 s[60:67], s[6:7], 0x20 ; C0DE0720 image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[44:51], s[32:35] ; F0800F00 010B1109 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v0, 1.0, v17 ; 080022F2 v_mad_f32 v0, -v17, v0, v0 ; D2820000 24020111 image_sample v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[52:59], s[36:39] ; F0800F00 012D150F v_mul_f32_e32 v0, v0, v0 ; 10000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s5, v2 ; 10020405 image_sample v[25:28], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[60:67], s[40:43] ; F0800F00 014F190B v_madmk_f32_e32 v0, v0, v1, 0x3e800000 ; 40000300 3E800000 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v1, v21, v0, -v0 ; D2820001 84020115 v_mad_f32 v2, v22, v0, -v0 ; D2820002 84020116 v_mad_f32 v11, v23, v0, -v0 ; D282000B 84020117 v_mad_f32 v0, v24, v0, -v0 ; D2820000 84020118 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v25, v1, v25 ; D2820001 04660319 v_mad_f32 v2, v26, v2, v26 ; D2820002 046A051A v_mad_f32 v11, v27, v11, v27 ; D282000B 046E171B v_mul_f32_e32 v12, 0x3e99999a, v1 ; 101802FF 3E99999A v_madmk_f32_e32 v12, v2, v12, 0x3f170a3d ; 40181902 3F170A3D v_madmk_f32_e32 v12, v11, v12, 0x3de147ae ; 4018190B 3DE147AE v_add_f32_e32 v3, v12, v3 ; 0606070C v_mul_f32_e32 v3, 0x41649249, v3 ; 100606FF 41649249 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mad_f32 v12, -2.0, v3, v4 ; D282000C 041206F5 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mad_f32 v3, -v3, v12, 1.0 ; D2820003 23CA1903 v_mad_f32 v12, v5, v3, v1 ; D282000C 04060705 v_mad_f32 v15, v5, v3, v2 ; D282000F 040A0705 v_mad_f32 v3, v5, v3, v11 ; D2820003 042E0705 v_mul_f32_e32 v5, v12, v19 ; 100A270C v_mul_f32_e32 v12, v15, v19 ; 1018270F v_mul_f32_e32 v3, v3, v19 ; 10062703 v_mac_f32_e32 v1, 0.5, v5 ; 3E020AF0 v_mac_f32_e32 v2, 0.5, v12 ; 3E0418F0 v_mac_f32_e32 v11, 0.5, v3 ; 3E1606F0 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_add_f32_e64 v3, 0, v11 clamp ; D2060803 00021680 v_mul_f32_e32 v5, v1, v18 ; 100A2501 v_mul_f32_e32 v11, v2, v18 ; 10162502 v_add_f32_e32 v12, -0.5, v28 ; 061838F1 v_add_f32_e32 v15, v28, v28 ; 061E391C v_madmk_f32_e32 v16, v28, v6, 0xc1200000 ; 40200D1C C1200000 v_mul_f32_e32 v19, v3, v18 ; 10262503 v_mul_f32_e32 v12, v6, v12 ; 10181906 v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_mad_f32 v21, -2.0, v12, v4 ; D2820015 041218F5 v_mad_f32 v22, -2.0, v16, v4 ; D2820016 041220F5 v_mul_f32_e32 v12, v12, v12 ; 1018190C v_mul_f32_e32 v16, v16, v16 ; 10202110 v_mul_f32_e32 v12, v21, v12 ; 10181915 v_mul_f32_e32 v16, v22, v16 ; 10202116 image_sample v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[20:27], s[16:19] ; F0800F00 00851509 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, -v18, v1, v21 ; D2820001 24560312 v_mad_f32 v2, -v18, v2, v22 ; D2820002 245A0512 v_mad_f32 v3, -v18, v3, v23 ; D2820003 245E0712 v_add_f32_e64 v9, 0, v15 clamp ; D2060809 00021E80 v_mul_f32_e32 v10, v12, v16 ; 1014210C image_sample v12, 2, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[8:15], s[0:3] ; F0800200 00020C0D s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v20, v17, v12 ; 3E281911 v_mul_f32_e32 v12, s4, v18 ; 10182404 v_mac_f32_e32 v12, v12, v0 ; 3E18010C v_mad_f32 v0, v7, s5, 1.0 ; D2820000 03C80B07 v_mac_f32_e32 v10, v0, v20 ; 3E142900 v_mad_f32 v0, -2.0, v9, v4 ; D2820000 041212F5 v_mul_f32_e32 v7, v9, v9 ; 100E1309 v_mul_f32_e32 v0, v0, v7 ; 10000F00 v_madak_f32_e32 v0, v10, v0, 0xbf147ae1 ; 4200010A BF147AE1 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mad_f32 v7, -2.0, v0, v4 ; D2820007 041200F5 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v9, v7, v0 ; 10120107 v_madak_f32_e32 v10, v0, v7, 0xbf666666 ; 42140F00 BF666666 v_madak_f32_e32 v0, v0, v7, 0xbc23d70a ; 42000F00 BC23D70A v_mul_f32_e32 v6, v6, v10 ; 100C1506 v_mul_f32_e32 v0, v0, v8 ; 10001100 v_mac_f32_e32 v5, v1, v9 ; 3E0A1301 v_mac_f32_e32 v11, v2, v9 ; 3E161302 v_mac_f32_e32 v19, v3, v9 ; 3E261303 v_max_f32_e32 v0, 0, v0 ; 20000080 v_add_f32_e64 v1, 0, v6 clamp ; D2060801 00020C80 v_mad_f32 v2, -2.0, v0, v4 ; D2820002 041200F5 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mul_f32_e32 v2, v12, v0 ; 1004010C v_mad_f32 v0, -v0, v12, v24 ; D2820000 24621900 v_mac_f32_e32 v4, -2.0, v1 ; 3E0802F5 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mac_f32_e32 v2, v0, v1 ; 3E040300 v_cvt_pkrtz_f16_f32_e32 v0, v5, v11 ; 5E001705 v_cvt_pkrtz_f16_f32_e32 v1, v19, v2 ; 5E020513 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 32 Code Size: 772 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 6.0000, 1.0000, -0.9900, 100.0000} IMM[1] FLT32 { -2.0000, 3.0000, -1.0000, 0.0000} IMM[2] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xw, TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[1].xyyy 3: TEX TEMP[1].y, TEMP[1], SAMP[1], 2D 4: MAD TEMP[0].x, TEMP[1].yyyy, TEMP[0].xxxx, TEMP[0].wwww 5: MAD TEMP[1].x, CONST[3].wwww, IMM[0].xxxx, IMM[0].yyyy 6: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx, IMM[0].zzzz 7: MUL TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 8: MOV_SAT TEMP[1].x, TEMP[0].xxxx 9: MAD TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx, IMM[1].yyyy 10: MUL TEMP[0].x, TEMP[1].xxxx, TEMP[1].xxxx 11: MAD TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx, IMM[0].yyyy 12: MOV TEMP[1].xy, IN[0].xyyy 13: TEX TEMP[1].x, TEMP[1], SAMP[3], 2D 14: ADD TEMP[0].x, TEMP[0].xxxx, -TEMP[1].xxxx 15: MOV_SAT TEMP[0].x, TEMP[0].xxxx 16: MOV TEMP[1].xy, IN[0].xyyy 17: TEX TEMP[1].xyz, TEMP[1], SAMP[2], 2D 18: ADD TEMP[2].x, TEMP[1].xxxx, -CONST[3].yyyy 19: MAD TEMP[2].x, TEMP[0].xxxx, TEMP[2].xxxx, CONST[3].yyyy 20: ADD TEMP[3].x, TEMP[1].yyyy, IMM[1].zzzz 21: MAD TEMP[1].xy, TEMP[1].zzzz, IMM[2].xyyy, IMM[2].yxxx 22: MOV TEMP[2].zw, TEMP[1].yyxy 23: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx, IMM[0].yyyy 24: MOV TEMP[2].y, TEMP[0].xxxx 25: MOV OUT[0], TEMP[2] 26: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %27 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 %29 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %30 = load <4 x i32>, <4 x i32> addrspace(2)* %29, align 16, !tbaa !0 %31 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %32 = load <8 x i32>, <8 x i32> addrspace(2)* %31, align 32, !tbaa !0 %33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %34 = load <4 x i32>, <4 x i32> addrspace(2)* %33, align 16, !tbaa !0 %35 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %36 = load <8 x i32>, <8 x i32> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !tbaa !0 %39 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 %41 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 %43 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %44 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %45 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %46 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %47 = bitcast float %43 to i32 %48 = bitcast float %44 to i32 %49 = insertelement <2 x i32> undef, i32 %47, i32 0 %50 = insertelement <2 x i32> %49, i32 %48, i32 1 %51 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %50, <8 x i32> %28, <4 x i32> %30, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 3 %54 = bitcast float %45 to i32 %55 = bitcast float %46 to i32 %56 = insertelement <2 x i32> undef, i32 %54, i32 0 %57 = insertelement <2 x i32> %56, i32 %55, i32 1 %58 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %57, <8 x i32> %32, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %59 = extractelement <4 x float> %58, i32 1 %60 = fmul float %59, %52 %61 = fadd float %60, %53 %62 = fmul float %26, 6.000000e+00 %63 = fadd float %62, 1.000000e+00 %64 = fmul float %61, %63 %65 = fadd float %64, 0xBFEFAE1480000000 %66 = fmul float %65, 1.000000e+02 %67 = call float @llvm.AMDIL.clamp.(float %66, float 0.000000e+00, float 1.000000e+00) %68 = fmul float %67, -2.000000e+00 %69 = fadd float %68, 3.000000e+00 %70 = fmul float %67, %67 %71 = fmul float %69, %70 %72 = fadd float %71, 1.000000e+00 %73 = bitcast float %43 to i32 %74 = bitcast float %44 to i32 %75 = insertelement <2 x i32> undef, i32 %73, i32 0 %76 = insertelement <2 x i32> %75, i32 %74, i32 1 %77 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %76, <8 x i32> %40, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %78 = extractelement <4 x float> %77, i32 0 %79 = fsub float %72, %78 %80 = call float @llvm.AMDIL.clamp.(float %79, float 0.000000e+00, float 1.000000e+00) %81 = bitcast float %43 to i32 %82 = bitcast float %44 to i32 %83 = insertelement <2 x i32> undef, i32 %81, i32 0 %84 = insertelement <2 x i32> %83, i32 %82, i32 1 %85 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %84, <8 x i32> %36, <4 x i32> %38, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = fsub float %86, %25 %90 = fmul float %80, %89 %91 = fadd float %90, %25 %92 = fadd float %87, -1.000000e+00 %93 = fadd float %88, 0.000000e+00 %94 = fmul float %88, 0.000000e+00 %95 = fadd float %94, 1.000000e+00 %96 = fmul float %80, %92 %97 = fadd float %96, 1.000000e+00 %98 = call i32 @llvm.SI.packf16(float %91, float %97) %99 = bitcast i32 %98 to float %100 = call i32 @llvm.SI.packf16(float %93, float %95) %101 = bitcast i32 %100 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %99, float %101, float %99, float %101) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 s_load_dwordx4 s[28:31], s[4:5], 0x4 ; C08E0504 s_load_dwordx4 s[32:35], s[4:5], 0x8 ; C0900508 s_load_dwordx8 s[36:43], s[6:7], 0x18 ; C0D20718 s_load_dwordx4 s[44:47], s[4:5], 0xc ; C096050C s_load_dwordx8 s[48:55], s[6:7], 0x10 ; C0D80710 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0xf ; C202010F v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 image_sample v[0:1], 9, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[24:27] ; F0800900 00C20002 image_sample v4, 2, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[16:23], s[28:31] ; F0800200 00E40404 image_sample v5, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[44:47] ; F0800100 01690502 image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[48:55], s[32:35] ; F0800700 010C0602 s_buffer_load_dword s0, s[0:3], 0xd ; C200010D v_mov_b32_e32 v2, 0x40c00000 ; 7E0402FF 40C00000 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v2, s4, 1.0 ; D2820002 03C80902 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v1, v0, v4 ; 3E020900 v_madak_f32_e32 v0, v1, v2, 0xbf7d70a4 ; 42000501 BF7D70A4 v_mul_f32_e32 v0, 0x42c80000, v0 ; 100000FF 42C80000 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_madak_f32_e32 v1, -2.0, v0, 0x40400000 ; 420200F5 40400000 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mad_f32 v0, v1, v0, 1.0 ; D2820000 03CA0101 s_waitcnt vmcnt(1) ; BF8C0771 v_subrev_f32_e32 v0, v5, v0 ; 0A000105 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v1, s0, v6 ; 0A020C00 v_mad_f32 v1, v0, v1, s0 ; D2820001 00020300 v_add_f32_e32 v2, -1.0, v7 ; 06040EF3 v_add_f32_e32 v3, 0, v8 ; 06061080 v_mad_f32 v4, 0, v8, 1.0 ; D2820004 03CA1080 v_mad_f32 v0, v0, v2, 1.0 ; D2820000 03CA0500 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 12 Code Size: 268 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 6.0000, 1.0000, 0.7500, 0.2500} IMM[1] FLT32 { -1.0000, -0.5800, 10.0000, -2.0000} IMM[2] FLT32 { 3.0000, 100.0000, 0.0500, -0.0800} IMM[3] FLT32 { 0.3000, 0.5900, 0.1100, 14.2857} IMM[4] FLT32 { 0.0050, 2.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[1].zwww 1: TEX TEMP[0], TEMP[0], SAMP[4], 2D 2: MUL TEMP[1].x, IMM[0].zzzz, CONST[3].wwww 3: MOV TEMP[2].xy, IN[0].xyyy 4: TEX TEMP[2], TEMP[2], SAMP[0], 2D 5: ADD TEMP[3].x, -TEMP[2].xxxx, IMM[0].yyyy 6: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx 7: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx 8: MAD TEMP[1].x, TEMP[3].xxxx, IMM[0].wwww, TEMP[1].xxxx 9: ADD TEMP[3], TEMP[0], IMM[1].xxxx 10: MAD TEMP[1], TEMP[1].xxxx, TEMP[3], IMM[0].yyyy 11: MAD TEMP[0].x, CONST[3].wwww, IMM[0].xxxx, IMM[0].yyyy 12: MOV TEMP[3].xy, IN[1].xyyy 13: TEX TEMP[3].y, TEMP[3], SAMP[1], 2D 14: MAD TEMP[4].x, TEMP[3].yyyy, TEMP[2].xxxx, TEMP[2].wwww 15: MAD TEMP[0].x, TEMP[4].xxxx, TEMP[0].xxxx, IMM[1].yyyy 16: MUL TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz 17: MOV_SAT TEMP[4].x, TEMP[0].xxxx 18: MAD TEMP[5].x, TEMP[4].xxxx, IMM[1].wwww, IMM[2].xxxx 19: MUL TEMP[0].x, TEMP[4].xxxx, TEMP[4].xxxx 20: MUL TEMP[4].x, TEMP[0].xxxx, TEMP[5].xxxx 21: MAD TEMP[0].x, TEMP[5].xxxx, TEMP[0].xxxx, IMM[0].yyyy 22: MUL TEMP[4].x, TEMP[4].xxxx, IMM[2].yyyy 23: MOV_SAT TEMP[4].x, TEMP[4].xxxx 24: MAD TEMP[5].x, TEMP[4].xxxx, IMM[1].wwww, IMM[2].xxxx 25: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx 26: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 27: LRP TEMP[5].xyz, TEMP[4].xxxx, IMM[0].yyyy, TEMP[1].xyzz 28: LRP TEMP[3].xyz, TEMP[4].xxxx, IMM[2].zzzz, CONST[0].xyzz 29: MUL TEMP[3].xyz, TEMP[5].xyzz, TEMP[3].xyzz 30: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[3].xyzz 31: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww 32: MUL TEMP[5].x, TEMP[5].xxxx, IMM[3].wwww 33: MOV_SAT TEMP[5].x, TEMP[5].xxxx 34: MAD TEMP[6].x, TEMP[5].xxxx, IMM[1].wwww, IMM[2].xxxx 35: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx 36: MAD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx, IMM[0].yyyy 37: MAD TEMP[1].xyz, TEMP[5].xxxx, IMM[4].xxxx, TEMP[3].xyzz 38: MUL TEMP[5].xyz, TEMP[2].zzzz, TEMP[1].xyzz 39: MAD TEMP[3].xyz, TEMP[5].xyzz, IMM[4].yyyy, TEMP[3].xyzz 40: MOV_SAT TEMP[3].xyz, TEMP[3].xyzz 41: MUL TEMP[5].xyz, TEMP[2].yyyy, TEMP[3].xyzz 42: MOV TEMP[6].xy, IN[0].xyyy 43: TEX TEMP[6], TEMP[6], SAMP[2], 2D 44: MAD TEMP[3].xyz, TEMP[3].xyzz, -TEMP[2].yyyy, TEMP[6].xyzz 45: MUL TEMP[2].x, TEMP[2].yyyy, CONST[3].zzzz 46: MUL TEMP[1].x, TEMP[1].wwww, TEMP[2].xxxx 47: MOV TEMP[2].xy, IN[0].xyyy 48: TEX TEMP[2].x, TEMP[2], SAMP[3], 2D 49: ADD TEMP[0].x, TEMP[0].xxxx, -TEMP[2].xxxx 50: MOV_SAT TEMP[2].x, TEMP[0].xxxx 51: MAD TEMP[3].xyz, TEMP[2].xxxx, TEMP[3].xyzz, TEMP[5].xyzz 52: LRP TEMP[0].x, TEMP[4].xxxx, IMM[0].yyyy, CONST[3].zzzz 53: MUL TEMP[0].x, TEMP[1].xxxx, TEMP[0].xxxx 54: MAD TEMP[1].x, TEMP[0].xxxx, -CONST[3].xxxx, TEMP[6].wwww 55: MUL TEMP[0].x, TEMP[0].xxxx, CONST[3].xxxx 56: MAD TEMP[0].x, TEMP[2].xxxx, TEMP[1].xxxx, TEMP[0].xxxx 57: MOV TEMP[3].w, TEMP[0].xxxx 58: MOV OUT[0], TEMP[3] 59: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 56) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %31 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %32 = load <8 x i32>, <8 x i32> addrspace(2)* %31, align 32, !tbaa !0 %33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %34 = load <4 x i32>, <4 x i32> addrspace(2)* %33, align 16, !tbaa !0 %35 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %36 = load <8 x i32>, <8 x i32> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !tbaa !0 %39 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 %41 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 %43 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !tbaa !0 %47 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %48 = load <8 x i32>, <8 x i32> addrspace(2)* %47, align 32, !tbaa !0 %49 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %50 = load <4 x i32>, <4 x i32> addrspace(2)* %49, align 16, !tbaa !0 %51 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %52 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %53 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %54 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %55 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %56 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %57 = bitcast float %55 to i32 %58 = bitcast float %56 to i32 %59 = insertelement <2 x i32> undef, i32 %57, i32 0 %60 = insertelement <2 x i32> %59, i32 %58, i32 1 %61 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %60, <8 x i32> %48, <4 x i32> %50, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %62 = extractelement <4 x float> %61, i32 0 %63 = extractelement <4 x float> %61, i32 1 %64 = extractelement <4 x float> %61, i32 2 %65 = extractelement <4 x float> %61, i32 3 %66 = fmul float %30, 7.500000e-01 %67 = bitcast float %51 to i32 %68 = bitcast float %52 to i32 %69 = insertelement <2 x i32> undef, i32 %67, i32 0 %70 = insertelement <2 x i32> %69, i32 %68, i32 1 %71 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %70, <8 x i32> %32, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %72 = extractelement <4 x float> %71, i32 0 %73 = extractelement <4 x float> %71, i32 1 %74 = extractelement <4 x float> %71, i32 2 %75 = extractelement <4 x float> %71, i32 3 %76 = fsub float 1.000000e+00, %72 %77 = fmul float %76, %76 %78 = fmul float %77, %77 %79 = fmul float %78, 2.500000e-01 %80 = fadd float %79, %66 %81 = fadd float %62, -1.000000e+00 %82 = fadd float %63, -1.000000e+00 %83 = fadd float %64, -1.000000e+00 %84 = fadd float %65, -1.000000e+00 %85 = fmul float %80, %81 %86 = fadd float %85, 1.000000e+00 %87 = fmul float %80, %82 %88 = fadd float %87, 1.000000e+00 %89 = fmul float %80, %83 %90 = fadd float %89, 1.000000e+00 %91 = fmul float %80, %84 %92 = fadd float %91, 1.000000e+00 %93 = fmul float %30, 6.000000e+00 %94 = fadd float %93, 1.000000e+00 %95 = bitcast float %53 to i32 %96 = bitcast float %54 to i32 %97 = insertelement <2 x i32> undef, i32 %95, i32 0 %98 = insertelement <2 x i32> %97, i32 %96, i32 1 %99 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %98, <8 x i32> %36, <4 x i32> %38, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %100 = extractelement <4 x float> %99, i32 1 %101 = fmul float %100, %72 %102 = fadd float %101, %75 %103 = fmul float %102, %94 %104 = fadd float %103, 0xBFE28F5C20000000 %105 = fmul float %104, 1.000000e+01 %106 = call float @llvm.AMDIL.clamp.(float %105, float 0.000000e+00, float 1.000000e+00) %107 = fmul float %106, -2.000000e+00 %108 = fadd float %107, 3.000000e+00 %109 = fmul float %106, %106 %110 = fmul float %109, %108 %111 = fmul float %108, %109 %112 = fadd float %111, 1.000000e+00 %113 = fmul float %110, 1.000000e+02 %114 = call float @llvm.AMDIL.clamp.(float %113, float 0.000000e+00, float 1.000000e+00) %115 = fmul float %114, -2.000000e+00 %116 = fadd float %115, 3.000000e+00 %117 = fmul float %114, %114 %118 = fmul float %117, %116 %119 = fsub float 1.000000e+00, %118 %120 = fmul float %86, %119 %121 = fadd float %118, %120 %122 = fsub float 1.000000e+00, %118 %123 = fmul float %88, %122 %124 = fadd float %118, %123 %125 = fsub float 1.000000e+00, %118 %126 = fmul float %90, %125 %127 = fadd float %118, %126 %128 = fsub float 1.000000e+00, %118 %129 = fmul float %118, 0x3FA99999A0000000 %130 = fmul float %25, %128 %131 = fadd float %129, %130 %132 = fsub float 1.000000e+00, %118 %133 = fmul float %118, 0x3FA99999A0000000 %134 = fmul float %26, %132 %135 = fadd float %133, %134 %136 = fsub float 1.000000e+00, %118 %137 = fmul float %118, 0x3FA99999A0000000 %138 = fmul float %27, %136 %139 = fadd float %137, %138 %140 = fmul float %121, %131 %141 = fmul float %124, %135 %142 = fmul float %127, %139 %143 = fmul float %140, 0x3FD3333340000000 %144 = fmul float %141, 0x3FE2E147A0000000 %145 = fadd float %144, %143 %146 = fmul float %142, 0x3FBC28F5C0000000 %147 = fadd float %145, %146 %148 = fadd float %147, 0xBFB47AE140000000 %149 = fmul float %148, 0x402C924920000000 %150 = call float @llvm.AMDIL.clamp.(float %149, float 0.000000e+00, float 1.000000e+00) %151 = fmul float %150, -2.000000e+00 %152 = fadd float %151, 3.000000e+00 %153 = fmul float %150, %150 %154 = fmul float %153, %152 %155 = fsub float 1.000000e+00, %154 %156 = fmul float %155, 0x3F747AE140000000 %157 = fadd float %156, %140 %158 = fmul float %155, 0x3F747AE140000000 %159 = fadd float %158, %141 %160 = fmul float %155, 0x3F747AE140000000 %161 = fadd float %160, %142 %162 = fmul float %74, %157 %163 = fmul float %74, %159 %164 = fmul float %74, %161 %165 = fmul float %162, 2.000000e+00 %166 = fadd float %165, %140 %167 = fmul float %163, 2.000000e+00 %168 = fadd float %167, %141 %169 = fmul float %164, 2.000000e+00 %170 = fadd float %169, %142 %171 = call float @llvm.AMDIL.clamp.(float %166, float 0.000000e+00, float 1.000000e+00) %172 = call float @llvm.AMDIL.clamp.(float %168, float 0.000000e+00, float 1.000000e+00) %173 = call float @llvm.AMDIL.clamp.(float %170, float 0.000000e+00, float 1.000000e+00) %174 = fmul float %73, %171 %175 = fmul float %73, %172 %176 = fmul float %73, %173 %177 = bitcast float %51 to i32 %178 = bitcast float %52 to i32 %179 = insertelement <2 x i32> undef, i32 %177, i32 0 %180 = insertelement <2 x i32> %179, i32 %178, i32 1 %181 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %180, <8 x i32> %40, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %182 = extractelement <4 x float> %181, i32 0 %183 = extractelement <4 x float> %181, i32 1 %184 = extractelement <4 x float> %181, i32 2 %185 = extractelement <4 x float> %181, i32 3 %186 = fmul float %73, %171 %187 = fsub float %182, %186 %188 = fmul float %73, %172 %189 = fsub float %183, %188 %190 = fmul float %73, %173 %191 = fsub float %184, %190 %192 = fmul float %73, %29 %193 = fmul float %92, %192 %194 = bitcast float %51 to i32 %195 = bitcast float %52 to i32 %196 = insertelement <2 x i32> undef, i32 %194, i32 0 %197 = insertelement <2 x i32> %196, i32 %195, i32 1 %198 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %197, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %199 = extractelement <4 x float> %198, i32 0 %200 = fsub float %112, %199 %201 = call float @llvm.AMDIL.clamp.(float %200, float 0.000000e+00, float 1.000000e+00) %202 = fmul float %201, %187 %203 = fadd float %202, %174 %204 = fmul float %201, %189 %205 = fadd float %204, %175 %206 = fmul float %201, %191 %207 = fadd float %206, %176 %208 = fsub float 1.000000e+00, %118 %209 = fmul float %29, %208 %210 = fadd float %118, %209 %211 = fmul float %193, %210 %212 = fmul float %28, %211 %213 = fsub float %185, %212 %214 = fmul float %211, %28 %215 = fmul float %201, %213 %216 = fadd float %215, %214 %217 = call i32 @llvm.SI.packf16(float %203, float %205) %218 = bitcast i32 %217 to float %219 = call i32 @llvm.SI.packf16(float %207, float %216) %220 = bitcast i32 %219 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %218, float %220, float %218, float %220) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A v_mov_b32_e32 v2, 0x3f400000 ; 7E0402FF 3F400000 v_mov_b32_e32 v3, 0x40c00000 ; 7E0602FF 40C00000 v_mov_b32_e32 v4, 0x40400000 ; 7E0802FF 40400000 v_mov_b32_e32 v5, 0xbda3d70a ; 7E0A02FF BDA3D70A v_mov_b32_e32 v6, 0x3ba3d70a ; 7E0C02FF 3BA3D70A v_interp_p1_f32 v7, v0, 0, 0, [m0] ; C81C0000 v_interp_p2_f32 v7, [v7], v1, 0, 0, [m0] ; C81D0001 v_interp_p1_f32 v8, v0, 1, 0, [m0] ; C8200100 v_interp_p2_f32 v8, [v8], v1, 1, 0, [m0] ; C8210101 v_interp_p1_f32 v9, v0, 0, 1, [m0] ; C8240400 v_interp_p2_f32 v9, [v9], v1, 0, 1, [m0] ; C8250401 v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500 v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501 v_interp_p1_f32 v11, v0, 2, 1, [m0] ; C82C0600 v_interp_p2_f32 v11, [v11], v1, 2, 1, [m0] ; C82D0601 v_interp_p1_f32 v12, v0, 3, 1, [m0] ; C8300700 v_interp_p2_f32 v12, [v12], v1, 3, 1, [m0] ; C8310701 s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C s_load_dwordx4 s[40:43], s[4:5], 0x10 ; C0940510 s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700 s_load_dwordx8 s[52:59], s[6:7], 0x8 ; C0DA0708 s_load_dwordx8 s[20:27], s[6:7], 0x10 ; C0CA0710 s_load_dwordx8 s[8:15], s[6:7], 0x18 ; C0C40718 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[28:31], 0x0 ; C2021D00 s_buffer_load_dword s5, s[28:31], 0x1 ; C2029D01 s_buffer_load_dword s60, s[28:31], 0x2 ; C21E1D02 s_buffer_load_dword s61, s[28:31], 0xc ; C21E9D0C s_buffer_load_dword s62, s[28:31], 0xe ; C21F1D0E s_buffer_load_dword s28, s[28:31], 0xf ; C20E1D0F s_load_dwordx8 s[64:71], s[6:7], 0x20 ; C0E00720 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s28, v2 ; 1000041C image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[64:71], s[40:43] ; F0800F00 01500B0B image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[44:51], s[32:35] ; F0800F00 010B0F07 image_sample v1, 2, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[52:59], s[36:39] ; F0800200 012D0109 s_waitcnt vmcnt(1) ; BF8C0771 v_sub_f32_e32 v2, 1.0, v15 ; 08041EF2 v_mad_f32 v2, -v15, v2, v2 ; D2820002 240A050F v_mul_f32_e32 v2, v2, v2 ; 10040502 v_madmk_f32_e32 v0, v2, v0, 0x3e800000 ; 40000102 3E800000 v_add_f32_e32 v2, -1.0, v11 ; 060416F3 v_add_f32_e32 v9, -1.0, v12 ; 061218F3 v_add_f32_e32 v10, -1.0, v13 ; 06141AF3 v_mad_f32 v2, v0, v2, 1.0 ; D2820002 03CA0500 v_mad_f32 v9, v0, v9, 1.0 ; D2820009 03CA1300 v_mad_f32 v10, v0, v10, 1.0 ; D282000A 03CA1500 v_mad_f32 v0, v14, v0, -v0 ; D2820000 8402010E v_mad_f32 v3, v3, s28, 1.0 ; D2820003 03C83903 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v18, v15, v1 ; 3E24030F v_madak_f32_e32 v1, v18, v3, 0xbf147ae1 ; 42020712 BF147AE1 v_mul_f32_e32 v1, 0x41200000, v1 ; 100202FF 41200000 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mad_f32 v3, -2.0, v1, v4 ; D2820003 041202F5 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mul_f32_e32 v11, v3, v1 ; 10160303 v_mad_f32 v1, v1, v3, 1.0 ; D2820001 03CA0701 v_mul_f32_e32 v3, 0x42c80000, v11 ; 100616FF 42C80000 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mad_f32 v11, -2.0, v3, v4 ; D282000B 041206F5 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mul_f32_e32 v12, v11, v3 ; 1018070B v_mad_f32 v3, -v3, v11, 1.0 ; D2820003 23CA1703 v_mad_f32 v2, v3, v2, v12 ; D2820002 04320503 v_mad_f32 v9, v3, v9, v12 ; D2820009 04321303 v_mad_f32 v10, v3, v10, v12 ; D282000A 04321503 v_mul_f32_e32 v11, 0x3d4ccccd, v12 ; 101618FF 3D4CCCCD v_mad_f32 v13, s4, v3, v11 ; D282000D 042E0604 v_mad_f32 v14, s5, v3, v11 ; D282000E 042E0605 v_mad_f32 v11, s60, v3, v11 ; D282000B 042E063C v_mul_f32_e32 v15, v13, v2 ; 101E050D v_mul_f32_e32 v18, v14, v9 ; 1024130E v_mul_f32_e32 v19, v11, v10 ; 1026150B v_mul_f32_e32 v15, 0x3e99999a, v15 ; 101E1EFF 3E99999A v_madmk_f32_e32 v15, v18, v15, 0x3f170a3d ; 401E1F12 3F170A3D v_madmk_f32_e32 v15, v19, v15, 0x3de147ae ; 401E1F13 3DE147AE v_add_f32_e32 v5, v15, v5 ; 060A0B0F v_mul_f32_e32 v5, 0x41649249, v5 ; 100A0AFF 41649249 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mac_f32_e32 v4, -2.0, v5 ; 3E080AF5 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mad_f32 v4, -v4, v6, v6 ; D2820004 241A0D04 v_mad_f32 v5, v13, v2, v4 ; D2820005 0412050D v_mad_f32 v6, v14, v9, v4 ; D2820006 0412130E v_mad_f32 v4, v11, v10, v4 ; D2820004 0412150B v_mul_f32_e32 v15, v5, v17 ; 101E2305 v_mul_f32_e32 v18, v6, v17 ; 10242306 v_mul_f32_e32 v19, v4, v17 ; 10262304 v_mac_f32_e32 v15, v5, v17 ; 3E1E2305 v_mac_f32_e32 v15, v13, v2 ; 3E1E050D v_mac_f32_e32 v18, v6, v17 ; 3E242306 v_mac_f32_e32 v18, v14, v9 ; 3E24130E v_mac_f32_e32 v19, v4, v17 ; 3E262304 v_mac_f32_e32 v19, v11, v10 ; 3E26150B v_add_f32_e64 v2, 0, v15 clamp ; D2060802 00021E80 v_add_f32_e64 v4, 0, v18 clamp ; D2060804 00022480 v_add_f32_e64 v5, 0, v19 clamp ; D2060805 00022680 v_mul_f32_e32 v6, v2, v16 ; 100C2102 v_mul_f32_e32 v9, v4, v16 ; 10122104 v_mul_f32_e32 v10, v5, v16 ; 10142105 image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[20:27], s[16:19] ; F0800F00 00851107 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, -v16, v2, v17 ; D2820002 24460510 v_mad_f32 v4, -v16, v4, v18 ; D2820004 244A0910 v_mad_f32 v5, -v16, v5, v19 ; D2820005 244E0B10 v_mul_f32_e32 v11, s62, v16 ; 1016203E v_mac_f32_e32 v11, v11, v0 ; 3E16010B image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[8:15], s[0:3] ; F0800100 00020007 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v0, v0, v1 ; 0A000300 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mac_f32_e32 v6, v2, v0 ; 3E0C0102 v_mac_f32_e32 v9, v4, v0 ; 3E120104 v_mac_f32_e32 v10, v5, v0 ; 3E140105 v_mac_f32_e32 v12, s62, v3 ; 3E18063E v_mul_f32_e32 v1, v12, v11 ; 1002170C v_mul_f32_e32 v2, s61, v1 ; 1004023D v_mad_f32 v1, -s61, v1, v20 ; D2820001 2452023D v_mac_f32_e32 v2, v1, v0 ; 3E040101 v_cvt_pkrtz_f16_f32_e32 v0, v6, v9 ; 5E001306 v_cvt_pkrtz_f16_f32_e32 v1, v10, v2 ; 5E02050A exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 24 Code Size: 728 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 6.0000, 1.0000, 0.7500, 0.2500} IMM[1] FLT32 { -1.0000, 0.3000, 0.5900, 0.1100} IMM[2] FLT32 { -0.0800, 14.2857, -2.0000, 3.0000} IMM[3] FLT32 { 0.0300, 0.5000, -0.5800, 10.0000} IMM[4] FLT32 { -0.9000, -0.0100, 10.0000, -100.0000} IMM[5] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xyz, -CONST[0].xyzz, CONST[1].xyzz 1: MOV TEMP[1].xy, IN[0].xyyy 2: TEX TEMP[1].xyz, TEMP[1], SAMP[3], 2D 3: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[0].xyzz 4: LRP TEMP[2].xyz, TEMP[1].yyyy, CONST[2].xyzz, TEMP[0].xyzz 5: ADD TEMP[0].x, -TEMP[2].xxxx, CONST[0].wwww 6: ADD TEMP[3].x, -TEMP[2].yyyy, CONST[1].wwww 7: MOV TEMP[0].y, TEMP[3].xxxx 8: ADD TEMP[3].x, -TEMP[2].zzzz, CONST[2].wwww 9: MOV TEMP[0].z, TEMP[3].xxxx 10: MAD TEMP[0].xyz, TEMP[1].zzzz, TEMP[0].xyzz, TEMP[2].xyzz 11: MOV TEMP[1].xy, IN[1].zwww 12: TEX TEMP[1], TEMP[1], SAMP[4], 2D 13: MUL TEMP[3].x, IMM[0].zzzz, CONST[3].wwww 14: MOV TEMP[4].xy, IN[0].xyyy 15: TEX TEMP[4], TEMP[4], SAMP[0], 2D 16: ADD TEMP[2].x, -TEMP[4].xxxx, IMM[0].yyyy 17: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 18: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 19: MAD TEMP[3].x, TEMP[2].xxxx, IMM[0].wwww, TEMP[3].xxxx 20: ADD TEMP[5], TEMP[1], IMM[1].xxxx 21: MAD TEMP[3], TEMP[3].xxxx, TEMP[5], IMM[0].yyyy 22: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xyzz 23: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[1].yzww 24: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx 25: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].yyyy 26: MOV_SAT TEMP[5].x, TEMP[5].xxxx 27: MAD TEMP[1].x, TEMP[5].xxxx, IMM[2].zzzz, IMM[2].wwww 28: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx 29: MAD TEMP[5].x, TEMP[1].xxxx, -TEMP[5].xxxx, IMM[0].yyyy 30: MAD TEMP[1].xyz, TEMP[5].xxxx, IMM[3].xxxx, TEMP[0].xyzz 31: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].zzzz 32: MAD TEMP[0].xyz, TEMP[1].xyzz, IMM[3].yyyy, TEMP[0].xyzz 33: MOV_SAT TEMP[5].xyz, TEMP[0].xyzz 34: MUL TEMP[1].xyz, TEMP[4].yyyy, TEMP[5].xyzz 35: MOV TEMP[6].xy, IN[0].xyyy 36: TEX TEMP[6], TEMP[6], SAMP[2], 2D 37: MAD TEMP[0].xyz, TEMP[5].xyzz, -TEMP[4].yyyy, TEMP[6].xyzz 38: MOV TEMP[5].xy, IN[1].xyyy 39: TEX TEMP[5].y, TEMP[5], SAMP[1], 2D 40: MAD TEMP[5].x, TEMP[5].yyyy, TEMP[4].xxxx, TEMP[4].wwww 41: MUL TEMP[4].x, TEMP[4].yyyy, CONST[3].zzzz 42: MUL TEMP[3].x, TEMP[3].wwww, TEMP[4].xxxx 43: MAD TEMP[2].x, CONST[3].wwww, IMM[0].xxxx, IMM[0].yyyy 44: MAD TEMP[4].x, TEMP[5].xxxx, TEMP[2].xxxx, IMM[3].zzzz 45: MUL TEMP[4].x, TEMP[4].xxxx, IMM[3].wwww 46: MOV_SAT TEMP[4].x, TEMP[4].xxxx 47: MAD TEMP[2].x, TEMP[4].xxxx, IMM[2].zzzz, IMM[2].wwww 48: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx 49: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[2].xxxx 50: MAD TEMP[2].xy, TEMP[2].xxxx, TEMP[4].xxxx, IMM[4].xyyy 51: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[4].zwww 52: MAD TEMP[1].xyz, TEMP[5].xxxx, TEMP[0].xyzz, TEMP[1].xyzz 53: MAX TEMP[4].x, TEMP[2].yyyy, IMM[5].xxxx 54: MOV_SAT TEMP[2].x, TEMP[2].xxxx 55: MAD TEMP[5].x, TEMP[4].xxxx, IMM[2].zzzz, IMM[2].wwww 56: MUL TEMP[0].x, TEMP[4].xxxx, TEMP[4].xxxx 57: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[5].xxxx 58: MAD TEMP[4].x, TEMP[3].xxxx, -TEMP[0].xxxx, TEMP[6].wwww 59: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx 60: MAD TEMP[3].x, TEMP[2].xxxx, IMM[2].zzzz, IMM[2].wwww 61: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 62: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx 63: MAD TEMP[0].x, TEMP[2].xxxx, TEMP[4].xxxx, TEMP[0].xxxx 64: MOV TEMP[1].w, TEMP[0].xxxx 65: MOV OUT[0], TEMP[1] 66: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %39 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 %41 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 %43 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !tbaa !0 %47 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %48 = load <8 x i32>, <8 x i32> addrspace(2)* %47, align 32, !tbaa !0 %49 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %50 = load <4 x i32>, <4 x i32> addrspace(2)* %49, align 16, !tbaa !0 %51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %52 = load <8 x i32>, <8 x i32> addrspace(2)* %51, align 32, !tbaa !0 %53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %54 = load <4 x i32>, <4 x i32> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %56 = load <8 x i32>, <8 x i32> addrspace(2)* %55, align 32, !tbaa !0 %57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %58 = load <4 x i32>, <4 x i32> addrspace(2)* %57, align 16, !tbaa !0 %59 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %60 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %61 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %62 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %63 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %64 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %65 = fsub float %29, %25 %66 = fsub float %30, %26 %67 = fsub float %31, %27 %68 = bitcast float %59 to i32 %69 = bitcast float %60 to i32 %70 = insertelement <2 x i32> undef, i32 %68, i32 0 %71 = insertelement <2 x i32> %70, i32 %69, i32 1 %72 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %71, <8 x i32> %52, <4 x i32> %54, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = extractelement <4 x float> %72, i32 2 %76 = fmul float %73, %65 %77 = fadd float %76, %25 %78 = fmul float %73, %66 %79 = fadd float %78, %26 %80 = fmul float %73, %67 %81 = fadd float %80, %27 %82 = fsub float 1.000000e+00, %74 %83 = fmul float %33, %74 %84 = fmul float %77, %82 %85 = fadd float %83, %84 %86 = fsub float 1.000000e+00, %74 %87 = fmul float %34, %74 %88 = fmul float %79, %86 %89 = fadd float %87, %88 %90 = fsub float 1.000000e+00, %74 %91 = fmul float %35, %74 %92 = fmul float %81, %90 %93 = fadd float %91, %92 %94 = fsub float %28, %85 %95 = fsub float %32, %89 %96 = fsub float %36, %93 %97 = fmul float %75, %94 %98 = fadd float %97, %85 %99 = fmul float %75, %95 %100 = fadd float %99, %89 %101 = fmul float %75, %96 %102 = fadd float %101, %93 %103 = bitcast float %63 to i32 %104 = bitcast float %64 to i32 %105 = insertelement <2 x i32> undef, i32 %103, i32 0 %106 = insertelement <2 x i32> %105, i32 %104, i32 1 %107 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %106, <8 x i32> %56, <4 x i32> %58, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %108 = extractelement <4 x float> %107, i32 0 %109 = extractelement <4 x float> %107, i32 1 %110 = extractelement <4 x float> %107, i32 2 %111 = extractelement <4 x float> %107, i32 3 %112 = fmul float %38, 7.500000e-01 %113 = bitcast float %59 to i32 %114 = bitcast float %60 to i32 %115 = insertelement <2 x i32> undef, i32 %113, i32 0 %116 = insertelement <2 x i32> %115, i32 %114, i32 1 %117 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %116, <8 x i32> %40, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %118 = extractelement <4 x float> %117, i32 0 %119 = extractelement <4 x float> %117, i32 1 %120 = extractelement <4 x float> %117, i32 2 %121 = extractelement <4 x float> %117, i32 3 %122 = fsub float 1.000000e+00, %118 %123 = fmul float %122, %122 %124 = fmul float %123, %123 %125 = fmul float %124, 2.500000e-01 %126 = fadd float %125, %112 %127 = fadd float %108, -1.000000e+00 %128 = fadd float %109, -1.000000e+00 %129 = fadd float %110, -1.000000e+00 %130 = fadd float %111, -1.000000e+00 %131 = fmul float %126, %127 %132 = fadd float %131, 1.000000e+00 %133 = fmul float %126, %128 %134 = fadd float %133, 1.000000e+00 %135 = fmul float %126, %129 %136 = fadd float %135, 1.000000e+00 %137 = fmul float %126, %130 %138 = fadd float %137, 1.000000e+00 %139 = fmul float %98, %132 %140 = fmul float %100, %134 %141 = fmul float %102, %136 %142 = fmul float %139, 0x3FD3333340000000 %143 = fmul float %140, 0x3FE2E147A0000000 %144 = fadd float %143, %142 %145 = fmul float %141, 0x3FBC28F5C0000000 %146 = fadd float %144, %145 %147 = fadd float %146, 0xBFB47AE140000000 %148 = fmul float %147, 0x402C924920000000 %149 = call float @llvm.AMDIL.clamp.(float %148, float 0.000000e+00, float 1.000000e+00) %150 = fmul float %149, -2.000000e+00 %151 = fadd float %150, 3.000000e+00 %152 = fmul float %149, %149 %153 = fmul float %152, %151 %154 = fsub float 1.000000e+00, %153 %155 = fmul float %154, 0x3F9EB851E0000000 %156 = fadd float %155, %139 %157 = fmul float %154, 0x3F9EB851E0000000 %158 = fadd float %157, %140 %159 = fmul float %154, 0x3F9EB851E0000000 %160 = fadd float %159, %141 %161 = fmul float %156, %120 %162 = fmul float %158, %120 %163 = fmul float %160, %120 %164 = fmul float %161, 5.000000e-01 %165 = fadd float %164, %139 %166 = fmul float %162, 5.000000e-01 %167 = fadd float %166, %140 %168 = fmul float %163, 5.000000e-01 %169 = fadd float %168, %141 %170 = call float @llvm.AMDIL.clamp.(float %165, float 0.000000e+00, float 1.000000e+00) %171 = call float @llvm.AMDIL.clamp.(float %167, float 0.000000e+00, float 1.000000e+00) %172 = call float @llvm.AMDIL.clamp.(float %169, float 0.000000e+00, float 1.000000e+00) %173 = fmul float %119, %170 %174 = fmul float %119, %171 %175 = fmul float %119, %172 %176 = bitcast float %59 to i32 %177 = bitcast float %60 to i32 %178 = insertelement <2 x i32> undef, i32 %176, i32 0 %179 = insertelement <2 x i32> %178, i32 %177, i32 1 %180 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %179, <8 x i32> %48, <4 x i32> %50, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %181 = extractelement <4 x float> %180, i32 0 %182 = extractelement <4 x float> %180, i32 1 %183 = extractelement <4 x float> %180, i32 2 %184 = extractelement <4 x float> %180, i32 3 %185 = fmul float %119, %170 %186 = fsub float %181, %185 %187 = fmul float %119, %171 %188 = fsub float %182, %187 %189 = fmul float %119, %172 %190 = fsub float %183, %189 %191 = bitcast float %61 to i32 %192 = bitcast float %62 to i32 %193 = insertelement <2 x i32> undef, i32 %191, i32 0 %194 = insertelement <2 x i32> %193, i32 %192, i32 1 %195 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %194, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %196 = extractelement <4 x float> %195, i32 1 %197 = fmul float %196, %118 %198 = fadd float %197, %121 %199 = fmul float %119, %37 %200 = fmul float %138, %199 %201 = fmul float %38, 6.000000e+00 %202 = fadd float %201, 1.000000e+00 %203 = fmul float %198, %202 %204 = fadd float %203, 0xBFE28F5C20000000 %205 = fmul float %204, 1.000000e+01 %206 = call float @llvm.AMDIL.clamp.(float %205, float 0.000000e+00, float 1.000000e+00) %207 = fmul float %206, -2.000000e+00 %208 = fadd float %207, 3.000000e+00 %209 = fmul float %206, %206 %210 = fmul float %209, %208 %211 = fmul float %208, %209 %212 = fadd float %211, 0xBFECCCCCC0000000 %213 = fmul float %208, %209 %214 = fadd float %213, 0xBF847AE140000000 %215 = fmul float %212, 1.000000e+01 %216 = fmul float %214, -1.000000e+02 %217 = fmul float %210, %186 %218 = fadd float %217, %173 %219 = fmul float %210, %188 %220 = fadd float %219, %174 %221 = fmul float %210, %190 %222 = fadd float %221, %175 %223 = call float @llvm.maxnum.f32(float %216, float 0.000000e+00) %224 = call float @llvm.AMDIL.clamp.(float %215, float 0.000000e+00, float 1.000000e+00) %225 = fmul float %223, -2.000000e+00 %226 = fadd float %225, 3.000000e+00 %227 = fmul float %223, %223 %228 = fmul float %227, %226 %229 = fmul float %228, %200 %230 = fsub float %184, %229 %231 = fmul float %228, %200 %232 = fmul float %224, -2.000000e+00 %233 = fadd float %232, 3.000000e+00 %234 = fmul float %224, %224 %235 = fmul float %234, %233 %236 = fmul float %235, %230 %237 = fadd float %236, %231 %238 = call i32 @llvm.SI.packf16(float %218, float %220) %239 = bitcast i32 %238 to float %240 = call i32 @llvm.SI.packf16(float %222, float %237) %241 = bitcast i32 %240 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %239, float %241, float %239, float %241) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A v_mov_b32_e32 v2, 0x3f400000 ; 7E0402FF 3F400000 v_mov_b32_e32 v3, 0xbda3d70a ; 7E0602FF BDA3D70A v_mov_b32_e32 v4, 0x40400000 ; 7E0802FF 40400000 v_mov_b32_e32 v5, 0x3cf5c28f ; 7E0A02FF 3CF5C28F v_mov_b32_e32 v6, 0x40c00000 ; 7E0C02FF 40C00000 v_mov_b32_e32 v7, 0x41200000 ; 7E0E02FF 41200000 v_mov_b32_e32 v8, 0xc2c80000 ; 7E1002FF C2C80000 v_interp_p1_f32 v9, v0, 0, 0, [m0] ; C8240000 v_interp_p2_f32 v9, [v9], v1, 0, 0, [m0] ; C8250001 v_interp_p1_f32 v10, v0, 1, 0, [m0] ; C8280100 v_interp_p2_f32 v10, [v10], v1, 1, 0, [m0] ; C8290101 v_interp_p1_f32 v11, v0, 0, 1, [m0] ; C82C0400 v_interp_p2_f32 v11, [v11], v1, 0, 1, [m0] ; C82D0401 v_interp_p1_f32 v12, v0, 1, 1, [m0] ; C8300500 v_interp_p2_f32 v12, [v12], v1, 1, 1, [m0] ; C8310501 v_interp_p1_f32 v13, v0, 2, 1, [m0] ; C8340600 v_interp_p2_f32 v13, [v13], v1, 2, 1, [m0] ; C8350601 v_interp_p1_f32 v14, v0, 3, 1, [m0] ; C8380700 v_interp_p2_f32 v14, [v14], v1, 3, 1, [m0] ; C8390701 s_load_dwordx4 s[44:47], s[2:3], 0x0 ; C0960300 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx4 s[52:55], s[4:5], 0xc ; C09A050C s_load_dwordx4 s[40:43], s[4:5], 0x10 ; C0940510 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_load_dwordx8 s[8:15], s[6:7], 0x8 ; C0C40708 s_load_dwordx8 s[20:27], s[6:7], 0x10 ; C0CA0710 s_load_dwordx8 s[56:63], s[6:7], 0x18 ; C0DC0718 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[44:47], 0x0 ; C2022D00 s_buffer_load_dword s5, s[44:47], 0x1 ; C202AD01 s_buffer_load_dword s64, s[44:47], 0x2 ; C2202D02 s_buffer_load_dword s65, s[44:47], 0x3 ; C220AD03 s_buffer_load_dword s48, s[44:47], 0x4 ; C2182D04 s_buffer_load_dword s49, s[44:47], 0x5 ; C218AD05 s_buffer_load_dword s50, s[44:47], 0x6 ; C2192D06 s_buffer_load_dword s66, s[44:47], 0x7 ; C2212D07 s_buffer_load_dword s67, s[44:47], 0x8 ; C221AD08 s_buffer_load_dword s68, s[44:47], 0x9 ; C2222D09 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s4 ; 7E000204 s_buffer_load_dword s69, s[44:47], 0xa ; C222AD0A v_mov_b32_e32 v1, s5 ; 7E020205 s_buffer_load_dword s70, s[44:47], 0xb ; C2232D0B v_mov_b32_e32 v15, s64 ; 7E1E0240 s_buffer_load_dword s71, s[44:47], 0xe ; C223AD0E s_buffer_load_dword s72, s[44:47], 0xf ; C2242D0F v_sub_f32_e32 v0, s48, v0 ; 08000030 v_sub_f32_e32 v1, s49, v1 ; 08020231 v_sub_f32_e32 v15, s50, v15 ; 081E1E32 s_load_dwordx8 s[44:51], s[6:7], 0x20 ; C0D60720 image_sample v[16:18], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[56:63], s[52:55] ; F0800700 01AE1009 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v16, v0, s4 ; D2820000 00120110 v_mad_f32 v1, v16, v1, s5 ; D2820001 00160310 v_mad_f32 v19, -v17, v0, v0 ; D2820013 24020111 v_mac_f32_e32 v19, s67, v17 ; 3E262243 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[44:51], s[40:43] ; F0800F00 014B140D image_sample v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[32:39], s[28:31] ; F0800F00 00E81809 v_sub_f32_e32 v13, 1.0, v17 ; 081A22F2 v_mad_f32 v0, -v0, v13, s65 ; D2820000 21061B00 v_mad_f32 v0, -s67, v17, v0 ; D2820000 24022243 v_mac_f32_e32 v19, v0, v18 ; 3E262500 v_mad_f32 v0, -v17, v1, v1 ; D2820000 24060311 v_mac_f32_e32 v0, s68, v17 ; 3E002244 v_mad_f32 v1, -v1, v13, s66 ; D2820001 210A1B01 v_mad_f32 v1, -s68, v17, v1 ; D2820001 24062244 v_mac_f32_e32 v0, v1, v18 ; 3E002501 v_mad_f32 v1, v16, v15, s64 ; D2820001 01021F10 v_mad_f32 v13, -v1, v13, s70 ; D282000D 211A1B01 v_mad_f32 v1, -v17, v1, v1 ; D2820001 24060311 v_mac_f32_e32 v1, s69, v17 ; 3E022245 v_mad_f32 v13, -s69, v17, v13 ; D282000D 24362245 v_mac_f32_e32 v1, v13, v18 ; 3E02250D v_mul_f32_e32 v2, s72, v2 ; 10040448 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v13, 1.0, v24 ; 081A30F2 v_mad_f32 v13, -v24, v13, v13 ; D282000D 24361B18 v_mul_f32_e32 v13, v13, v13 ; 101A1B0D v_madmk_f32_e32 v2, v13, v2, 0x3e800000 ; 4004050D 3E800000 v_mad_f32 v13, v20, v2, -v2 ; D282000D 840A0514 v_mad_f32 v14, v21, v2, -v2 ; D282000E 840A0515 v_mad_f32 v15, v22, v2, -v2 ; D282000F 840A0516 v_mad_f32 v2, v23, v2, -v2 ; D2820002 840A0517 v_mac_f32_e32 v19, v19, v13 ; 3E261B13 v_mac_f32_e32 v0, v0, v14 ; 3E001D00 v_mac_f32_e32 v1, v1, v15 ; 3E021F01 v_mul_f32_e32 v13, 0x3e99999a, v19 ; 101A26FF 3E99999A v_madmk_f32_e32 v13, v0, v13, 0x3f170a3d ; 401A1B00 3F170A3D v_madmk_f32_e32 v13, v1, v13, 0x3de147ae ; 401A1B01 3DE147AE v_add_f32_e32 v3, v13, v3 ; 0606070D v_mul_f32_e32 v3, 0x41649249, v3 ; 100606FF 41649249 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mad_f32 v13, -2.0, v3, v4 ; D282000D 041206F5 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mad_f32 v3, -v3, v13, 1.0 ; D2820003 23CA1B03 v_mad_f32 v13, v5, v3, v19 ; D282000D 044E0705 v_mad_f32 v14, v5, v3, v0 ; D282000E 04020705 v_mad_f32 v3, v5, v3, v1 ; D2820003 04060705 v_mul_f32_e32 v5, v26, v13 ; 100A1B1A v_mul_f32_e32 v13, v26, v14 ; 101A1D1A v_mul_f32_e32 v3, v26, v3 ; 1006071A v_mac_f32_e32 v19, 0.5, v5 ; 3E260AF0 v_mac_f32_e32 v0, 0.5, v13 ; 3E001AF0 v_mac_f32_e32 v1, 0.5, v3 ; 3E0206F0 v_add_f32_e64 v3, 0, v19 clamp ; D2060803 00022680 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v5, v3, v25 ; 100A3303 v_mul_f32_e32 v13, v0, v25 ; 101A3300 image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[20:27], s[16:19] ; F0800F00 00850E09 image_sample v9, 2, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800200 0002090B v_mul_f32_e32 v10, v1, v25 ; 10143301 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v3, -v25, v3, v14 ; D2820003 243A0719 v_mad_f32 v0, -v25, v0, v15 ; D2820000 243E0119 v_mad_f32 v1, -v25, v1, v16 ; D2820001 24420319 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v27, v24, v9 ; 3E361318 v_mul_f32_e32 v9, s71, v25 ; 10123247 v_mac_f32_e32 v9, v9, v2 ; 3E120509 v_mad_f32 v2, v6, s72, 1.0 ; D2820002 03C89106 v_madak_f32_e32 v2, v27, v2, 0xbf147ae1 ; 4204051B BF147AE1 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mad_f32 v6, -2.0, v2, v4 ; D2820006 041204F5 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mul_f32_e32 v11, v6, v2 ; 10160506 v_madak_f32_e32 v12, v2, v6, 0xbf666666 ; 42180D02 BF666666 v_madak_f32_e32 v2, v2, v6, 0xbc23d70a ; 42040D02 BC23D70A v_mul_f32_e32 v6, v7, v12 ; 100C1907 v_mul_f32_e32 v2, v2, v8 ; 10041102 v_mac_f32_e32 v5, v3, v11 ; 3E0A1703 v_mac_f32_e32 v13, v0, v11 ; 3E1A1700 v_mac_f32_e32 v10, v1, v11 ; 3E141701 v_max_f32_e32 v0, 0, v2 ; 20000480 v_add_f32_e64 v1, 0, v6 clamp ; D2060801 00020C80 v_mad_f32 v2, -2.0, v0, v4 ; D2820002 041200F5 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mul_f32_e32 v2, v9, v0 ; 10040109 v_mad_f32 v0, -v0, v9, v17 ; D2820000 24461300 v_mac_f32_e32 v4, -2.0, v1 ; 3E0802F5 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mac_f32_e32 v2, v0, v1 ; 3E040300 v_cvt_pkrtz_f16_f32_e32 v0, v5, v13 ; 5E001B05 v_cvt_pkrtz_f16_f32_e32 v1, v10, v2 ; 5E02050A exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 28 Code Size: 840 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { -0.5000, -1.0000, 10.0000, -10.0000} IMM[1] FLT32 { -2.0000, 3.0000, 6.0000, 1.0000} IMM[2] FLT32 { -0.5800, -1.0000, -0.0000, 1.0000} IMM[3] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xw, TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[1].xyyy 3: TEX TEMP[1].y, TEMP[1], SAMP[1], 2D 4: MAD TEMP[0].x, TEMP[1].yyyy, TEMP[0].xxxx, TEMP[0].wwww 5: MOV TEMP[1].xy, IN[0].zwww 6: TEX TEMP[1].w, TEMP[1], SAMP[4], 2D 7: ADD TEMP[2].xy, TEMP[1].wwww, IMM[0].xyyy 8: ADD TEMP[3].x, TEMP[1].wwww, TEMP[1].wwww 9: MOV_SAT TEMP[3].x, TEMP[3].xxxx 10: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[0].zwww 11: MOV_SAT TEMP[2].xy, TEMP[2].xyyy 12: MAD TEMP[1].xy, TEMP[2].xyyy, IMM[1].xxxx, IMM[1].yyyy 13: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[2].xyyy 14: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[1].xyyy 15: MUL TEMP[2].x, TEMP[2].yyyy, TEMP[2].xxxx 16: MAD TEMP[4].x, CONST[3].wwww, IMM[1].zzzz, IMM[1].wwww 17: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx, TEMP[2].xxxx 18: MAD TEMP[2].x, TEMP[3].xxxx, IMM[1].xxxx, IMM[1].yyyy 19: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx 20: MUL TEMP[2].x, TEMP[3].xxxx, TEMP[2].xxxx 21: MOV TEMP[3].xy, IN[0].xyyy 22: TEX TEMP[3].yz, TEMP[3], SAMP[3], 2D 23: ADD TEMP[3].x, TEMP[3].zzzz, TEMP[3].yyyy 24: MOV_SAT TEMP[3].x, TEMP[3].xxxx 25: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[1].wwww 26: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[1].wwww 27: MAX TEMP[2].x, TEMP[3].xxxx, TEMP[2].xxxx 28: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx, IMM[2].xxxx 29: MUL TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz 30: MOV_SAT TEMP[2].x, TEMP[0].xxxx 31: MAD TEMP[3].x, TEMP[2].xxxx, IMM[1].xxxx, IMM[1].yyyy 32: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[2].xxxx 33: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx 34: MOV TEMP[2].xy, IN[0].xyyy 35: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 36: MAD TEMP[2].xyz, CONST[3].yyyy, IMM[2].yzzz, TEMP[2].xyzz 37: MUL TEMP[1].xyz, IMM[3].xyyy, CONST[3].yyyy 38: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[2].xyzz, TEMP[1].xyzz 39: MOV TEMP[0].w, IMM[1].wwww 40: MOV OUT[0], TEMP[0] 41: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %27 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 %29 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %30 = load <4 x i32>, <4 x i32> addrspace(2)* %29, align 16, !tbaa !0 %31 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %32 = load <8 x i32>, <8 x i32> addrspace(2)* %31, align 32, !tbaa !0 %33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %34 = load <4 x i32>, <4 x i32> addrspace(2)* %33, align 16, !tbaa !0 %35 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %36 = load <8 x i32>, <8 x i32> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !tbaa !0 %39 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 %41 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 %43 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !tbaa !0 %47 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %48 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %49 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %50 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %51 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %52 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %53 = bitcast float %47 to i32 %54 = bitcast float %48 to i32 %55 = insertelement <2 x i32> undef, i32 %53, i32 0 %56 = insertelement <2 x i32> %55, i32 %54, i32 1 %57 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %56, <8 x i32> %28, <4 x i32> %30, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 3 %60 = bitcast float %51 to i32 %61 = bitcast float %52 to i32 %62 = insertelement <2 x i32> undef, i32 %60, i32 0 %63 = insertelement <2 x i32> %62, i32 %61, i32 1 %64 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %63, <8 x i32> %32, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %65 = extractelement <4 x float> %64, i32 1 %66 = fmul float %65, %58 %67 = fadd float %66, %59 %68 = bitcast float %49 to i32 %69 = bitcast float %50 to i32 %70 = insertelement <2 x i32> undef, i32 %68, i32 0 %71 = insertelement <2 x i32> %70, i32 %69, i32 1 %72 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %71, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %73 = extractelement <4 x float> %72, i32 3 %74 = fadd float %73, -5.000000e-01 %75 = fadd float %73, -1.000000e+00 %76 = fadd float %73, %73 %77 = call float @llvm.AMDIL.clamp.(float %76, float 0.000000e+00, float 1.000000e+00) %78 = fmul float %74, 1.000000e+01 %79 = fmul float %75, -1.000000e+01 %80 = call float @llvm.AMDIL.clamp.(float %78, float 0.000000e+00, float 1.000000e+00) %81 = call float @llvm.AMDIL.clamp.(float %79, float 0.000000e+00, float 1.000000e+00) %82 = fmul float %80, -2.000000e+00 %83 = fadd float %82, 3.000000e+00 %84 = fmul float %81, -2.000000e+00 %85 = fadd float %84, 3.000000e+00 %86 = fmul float %80, %80 %87 = fmul float %81, %81 %88 = fmul float %86, %83 %89 = fmul float %87, %85 %90 = fmul float %89, %88 %91 = fmul float %26, 6.000000e+00 %92 = fadd float %91, 1.000000e+00 %93 = fmul float %67, %92 %94 = fadd float %93, %90 %95 = fmul float %77, -2.000000e+00 %96 = fadd float %95, 3.000000e+00 %97 = fmul float %77, %77 %98 = fmul float %97, %96 %99 = bitcast float %47 to i32 %100 = bitcast float %48 to i32 %101 = insertelement <2 x i32> undef, i32 %99, i32 0 %102 = insertelement <2 x i32> %101, i32 %100, i32 1 %103 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %102, <8 x i32> %40, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %104 = extractelement <4 x float> %103, i32 1 %105 = extractelement <4 x float> %103, i32 2 %106 = fadd float %105, %104 %107 = call float @llvm.AMDIL.clamp.(float %106, float 0.000000e+00, float 1.000000e+00) %108 = fsub float 1.000000e+00, %107 %109 = fsub float 1.000000e+00, %108 %110 = call float @llvm.maxnum.f32(float %109, float %98) %111 = fmul float %94, %110 %112 = fadd float %111, 0xBFE28F5C20000000 %113 = fmul float %112, 1.000000e+01 %114 = call float @llvm.AMDIL.clamp.(float %113, float 0.000000e+00, float 1.000000e+00) %115 = fmul float %114, -2.000000e+00 %116 = fadd float %115, 3.000000e+00 %117 = fmul float %114, %114 %118 = fmul float %117, %116 %119 = bitcast float %47 to i32 %120 = bitcast float %48 to i32 %121 = insertelement <2 x i32> undef, i32 %119, i32 0 %122 = insertelement <2 x i32> %121, i32 %120, i32 1 %123 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %122, <8 x i32> %36, <4 x i32> %38, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %124 = extractelement <4 x float> %123, i32 0 %125 = extractelement <4 x float> %123, i32 1 %126 = extractelement <4 x float> %123, i32 2 %127 = fsub float %124, %25 %128 = fmul float %25, -0.000000e+00 %129 = fadd float %128, %125 %130 = fmul float %25, -0.000000e+00 %131 = fadd float %130, %126 %132 = fmul float %25, 0.000000e+00 %133 = fmul float %25, 0.000000e+00 %134 = fmul float %118, %127 %135 = fadd float %134, %25 %136 = fmul float %118, %129 %137 = fadd float %136, %132 %138 = fmul float %118, %131 %139 = fadd float %138, %133 %140 = call i32 @llvm.SI.packf16(float %135, float %137) %141 = bitcast i32 %140 to float %142 = call i32 @llvm.SI.packf16(float %139, float 1.000000e+00) %143 = bitcast i32 %142 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %141, float %143, float %141, float %143) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A v_mov_b32_e32 v2, 0x41200000 ; 7E0402FF 41200000 v_mov_b32_e32 v3, 0x40400000 ; 7E0602FF 40400000 v_mov_b32_e32 v4, 0x40c00000 ; 7E0802FF 40C00000 v_mov_b32_e32 v5, 0x80000000 ; 7E0A02FF 80000000 v_interp_p1_f32 v6, v0, 0, 0, [m0] ; C8180000 v_interp_p2_f32 v6, [v6], v1, 0, 0, [m0] ; C8190001 v_interp_p1_f32 v7, v0, 1, 0, [m0] ; C81C0100 v_interp_p2_f32 v7, [v7], v1, 1, 0, [m0] ; C81D0101 v_interp_p1_f32 v8, v0, 2, 0, [m0] ; C8200200 v_interp_p2_f32 v8, [v8], v1, 2, 0, [m0] ; C8210201 v_interp_p1_f32 v9, v0, 3, 0, [m0] ; C8240300 v_interp_p2_f32 v9, [v9], v1, 3, 0, [m0] ; C8250301 v_interp_p1_f32 v10, v0, 0, 1, [m0] ; C8280400 v_interp_p2_f32 v10, [v10], v1, 0, 1, [m0] ; C8290401 v_interp_p1_f32 v11, v0, 1, 1, [m0] ; C82C0500 v_interp_p2_f32 v11, [v11], v1, 1, 1, [m0] ; C82D0501 s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508 s_load_dwordx4 s[16:19], s[4:5], 0xc ; C088050C s_load_dwordx4 s[40:43], s[4:5], 0x10 ; C0940510 s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700 s_load_dwordx8 s[52:59], s[6:7], 0x8 ; C0DA0708 s_load_dwordx8 s[8:15], s[6:7], 0x10 ; C0C40710 s_load_dwordx8 s[20:27], s[6:7], 0x18 ; C0CA0718 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[28:31], 0xd ; C2021D0D s_buffer_load_dword s5, s[28:31], 0xf ; C2029D0F s_load_dwordx8 s[60:67], s[6:7], 0x20 ; C0DE0720 image_sample v[0:1], 9, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[44:51], s[32:35] ; F0800900 010B0006 image_sample v10, 2, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[52:59], s[36:39] ; F0800200 012D0A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v1, v0, v10 ; 3E021500 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v0, 0, s4 ; D2100000 00000880 v_mad_f32 v4, v4, s5, 1.0 ; D2820004 03C80B04 image_sample v8, 8, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[60:67], s[40:43] ; F0800800 014F0808 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v9, -0.5, v8 ; 061210F1 v_mul_f32_e32 v9, v2, v9 ; 10121302 v_madmk_f32_e32 v10, v8, v2, 0xc1200000 ; 40140508 C1200000 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_mad_f32 v11, -2.0, v9, v3 ; D282000B 040E12F5 v_mul_f32_e32 v9, v9, v9 ; 10121309 v_mul_f32_e32 v9, v11, v9 ; 1012130B v_mad_f32 v11, -2.0, v10, v3 ; D282000B 040E14F5 v_mul_f32_e32 v10, v10, v10 ; 1014150A v_mul_f32_e32 v10, v11, v10 ; 1014150B v_mul_f32_e32 v9, v9, v10 ; 10121509 v_add_f32_e32 v8, v8, v8 ; 06101108 v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_mac_f32_e32 v9, v4, v1 ; 3E120304 v_mad_f32 v1, -2.0, v8, v3 ; D2820001 040E10F5 v_mul_f32_e32 v4, v8, v8 ; 10081108 v_mul_f32_e32 v1, v1, v4 ; 10020901 image_sample v[10:11], 6, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[20:27], s[16:19] ; F0800600 00850A06 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v10, v11 ; 0608170A v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_sub_f32_e32 v4, 1.0, v4 ; 080808F2 v_sub_f32_e32 v4, 1.0, v4 ; 080808F2 v_max_f32_e32 v1, v1, v4 ; 20020901 v_madak_f32_e32 v1, v9, v1, 0xbf147ae1 ; 42020309 BF147AE1 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mac_f32_e32 v3, -2.0, v1 ; 3E0602F5 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mul_f32_e32 v1, v3, v1 ; 10020303 image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800700 00020206 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v2, s4, v2 ; 0A040404 v_mad_f32 v3, s4, v5, v3 ; D2820003 040E0A04 v_mac_f32_e32 v4, s4, v5 ; 3E080A04 v_mad_f32 v2, v1, v2, s4 ; D2820002 00120501 v_mad_f32 v3, v3, v1, v0 ; D2820003 04020303 v_mac_f32_e32 v0, v4, v1 ; 3E000304 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 12 Code Size: 440 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..10], LOCAL IMM[0] FLT32 { 1.0000, 6.0000, 0.7500, 0.2500} IMM[1] FLT32 { -1.0000, 0.3000, 0.5900, 0.1100} IMM[2] FLT32 { -0.0800, 14.2857, -2.0000, 3.0000} IMM[3] FLT32 { 0.0300, 0.5000, -0.5000, -1.0000} IMM[4] FLT32 { 10.0000, -10.0000, -0.5800, -100.0000} IMM[5] FLT32 { -0.9000, -0.0100, 0.0000, 0.0000} 0: ADD TEMP[0].xyz, -CONST[0].xyzz, CONST[1].xyzz 1: MOV TEMP[1].xy, IN[0].zwww 2: TEX TEMP[1], TEMP[1], SAMP[5], 2D 3: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[0].xyzz 4: LRP TEMP[2].xyz, TEMP[1].yyyy, CONST[2].xyzz, TEMP[0].xyzz 5: MOV TEMP[0].x, CONST[0].wwww 6: MOV TEMP[0].y, CONST[1].wwww 7: MOV TEMP[0].z, CONST[2].wwww 8: LRP TEMP[3].xyz, TEMP[1].zzzz, TEMP[0].xyzz, TEMP[2].xyzz 9: MOV TEMP[4].xy, IN[0].xyyy 10: TEX TEMP[5].yz, TEMP[4], SAMP[3], 2D 11: LRP TEMP[4].xyz, TEMP[5].yyyy, CONST[2].xyzz, TEMP[3].xyzz 12: LRP TEMP[3].xyz, TEMP[5].zzzz, TEMP[0].xyzz, TEMP[4].xyzz 13: ADD TEMP[0].x, TEMP[5].zzzz, TEMP[5].yyyy 14: MOV_SAT TEMP[5].x, TEMP[0].xxxx 15: ADD TEMP[0].x, -TEMP[5].xxxx, IMM[0].xxxx 16: ADD TEMP[0].x, -TEMP[0].xxxx, IMM[0].xxxx 17: MOV TEMP[5].xy, IN[1].zwww 18: TEX TEMP[5], TEMP[5], SAMP[4], 2D 19: MUL TEMP[6].x, IMM[0].zzzz, CONST[3].wwww 20: MOV TEMP[7].xy, IN[0].xyyy 21: TEX TEMP[7], TEMP[7], SAMP[0], 2D 22: ADD TEMP[8].x, -TEMP[7].xxxx, IMM[0].xxxx 23: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[8].xxxx 24: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[8].xxxx 25: MAD TEMP[6].x, TEMP[8].xxxx, IMM[0].wwww, TEMP[6].xxxx 26: ADD TEMP[5], TEMP[5], IMM[1].xxxx 27: MAD TEMP[5], TEMP[6].xxxx, TEMP[5], IMM[0].xxxx 28: MUL TEMP[6].xyz, TEMP[3].xyzz, TEMP[5].xyzz 29: DP3 TEMP[8].x, TEMP[6].xyzz, IMM[1].yzww 30: ADD TEMP[4].x, TEMP[8].xxxx, IMM[2].xxxx 31: MUL TEMP[4].x, TEMP[4].xxxx, IMM[2].yyyy 32: MOV_SAT TEMP[8].x, TEMP[4].xxxx 33: MAD TEMP[2].x, TEMP[8].xxxx, IMM[2].zzzz, IMM[2].wwww 34: MUL TEMP[4].x, TEMP[8].xxxx, TEMP[8].xxxx 35: MAD TEMP[4].x, TEMP[2].xxxx, -TEMP[4].xxxx, IMM[0].xxxx 36: MAD TEMP[2].xyz, TEMP[4].xxxx, IMM[3].xxxx, TEMP[6].xyzz 37: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].zzzz 38: MAD TEMP[6].xyz, TEMP[2].xyzz, IMM[3].yyyy, TEMP[6].xyzz 39: MOV_SAT TEMP[6].xyz, TEMP[6].xyzz 40: MUL TEMP[2].xyz, TEMP[7].yyyy, TEMP[6].xyzz 41: MOV TEMP[8].xy, IN[0].xyyy 42: TEX TEMP[8], TEMP[8], SAMP[2], 2D 43: MAD TEMP[6].xyz, TEMP[6].xyzz, -TEMP[7].yyyy, TEMP[8].xyzz 44: ADD TEMP[4].x, TEMP[1].wwww, TEMP[1].wwww 45: MOV_SAT TEMP[9].x, TEMP[4].xxxx 46: ADD TEMP[3].xy, TEMP[1].wwww, IMM[3].zwww 47: MUL TEMP[3].xy, TEMP[3].xyyy, IMM[4].xyyy 48: MOV_SAT TEMP[1].xy, TEMP[3].xyyy 49: MAD TEMP[10].x, TEMP[9].xxxx, IMM[2].zzzz, IMM[2].wwww 50: MUL TEMP[4].x, TEMP[9].xxxx, TEMP[9].xxxx 51: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[10].xxxx 52: MAX TEMP[9].x, TEMP[0].xxxx, TEMP[4].xxxx 53: MAD TEMP[10].xy, TEMP[1].xyyy, IMM[2].zzzz, IMM[2].wwww 54: MUL TEMP[3].xy, TEMP[1].xyyy, TEMP[1].xyyy 55: MUL TEMP[1].xy, TEMP[10].xyyy, TEMP[3].xyyy 56: MUL TEMP[0].x, TEMP[1].yyyy, TEMP[1].xxxx 57: MOV TEMP[1].xy, IN[1].xyyy 58: TEX TEMP[1].y, TEMP[1], SAMP[1], 2D 59: MAD TEMP[4].x, TEMP[1].yyyy, TEMP[7].xxxx, TEMP[7].wwww 60: MUL TEMP[1].x, TEMP[7].yyyy, CONST[3].zzzz 61: MUL TEMP[1].x, TEMP[5].wwww, TEMP[1].xxxx 62: MAD TEMP[3].x, CONST[3].wwww, IMM[0].yyyy, IMM[0].xxxx 63: MAD TEMP[0].x, TEMP[4].xxxx, TEMP[3].xxxx, TEMP[0].xxxx 64: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[9].xxxx, IMM[4].zzzz 65: MUL TEMP[0].x, TEMP[0].xxxx, IMM[4].xxxx 66: MOV_SAT TEMP[3].x, TEMP[0].xxxx 67: MAD TEMP[4].x, TEMP[3].xxxx, IMM[2].zzzz, IMM[2].wwww 68: MUL TEMP[0].x, TEMP[3].xxxx, TEMP[3].xxxx 69: MUL TEMP[3].x, TEMP[0].xxxx, TEMP[4].xxxx 70: MAD TEMP[4].xy, TEMP[4].xxxx, TEMP[0].xxxx, IMM[5].xyyy 71: MUL TEMP[4].xy, TEMP[4].xyyy, IMM[4].xwww 72: MAD TEMP[2].xyz, TEMP[3].xxxx, TEMP[6].xyzz, TEMP[2].xyzz 73: MAX TEMP[3].x, TEMP[4].yyyy, IMM[5].zzzz 74: MOV_SAT TEMP[4].x, TEMP[4].xxxx 75: MAD TEMP[5].x, TEMP[3].xxxx, IMM[2].zzzz, IMM[2].wwww 76: MUL TEMP[0].x, TEMP[3].xxxx, TEMP[3].xxxx 77: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[5].xxxx 78: MAD TEMP[3].x, TEMP[1].xxxx, -TEMP[0].xxxx, TEMP[8].wwww 79: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx 80: MAD TEMP[1].x, TEMP[4].xxxx, IMM[2].zzzz, IMM[2].wwww 81: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx 82: MUL TEMP[1].x, TEMP[4].xxxx, TEMP[1].xxxx 83: MAD TEMP[0].x, TEMP[1].xxxx, TEMP[3].xxxx, TEMP[0].xxxx 84: MOV TEMP[2].w, TEMP[0].xxxx 85: MOV OUT[0], TEMP[2] 86: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %39 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 %41 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 %43 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !tbaa !0 %47 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %48 = load <8 x i32>, <8 x i32> addrspace(2)* %47, align 32, !tbaa !0 %49 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %50 = load <4 x i32>, <4 x i32> addrspace(2)* %49, align 16, !tbaa !0 %51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %52 = load <8 x i32>, <8 x i32> addrspace(2)* %51, align 32, !tbaa !0 %53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %54 = load <4 x i32>, <4 x i32> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %56 = load <8 x i32>, <8 x i32> addrspace(2)* %55, align 32, !tbaa !0 %57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %58 = load <4 x i32>, <4 x i32> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %60 = load <8 x i32>, <8 x i32> addrspace(2)* %59, align 32, !tbaa !0 %61 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %62 = load <4 x i32>, <4 x i32> addrspace(2)* %61, align 16, !tbaa !0 %63 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %64 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %65 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %66 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %67 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %68 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %69 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %70 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %71 = fsub float %29, %25 %72 = fsub float %30, %26 %73 = fsub float %31, %27 %74 = bitcast float %65 to i32 %75 = bitcast float %66 to i32 %76 = insertelement <2 x i32> undef, i32 %74, i32 0 %77 = insertelement <2 x i32> %76, i32 %75, i32 1 %78 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %77, <8 x i32> %60, <4 x i32> %62, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 1 %81 = extractelement <4 x float> %78, i32 2 %82 = extractelement <4 x float> %78, i32 3 %83 = fmul float %79, %71 %84 = fadd float %83, %25 %85 = fmul float %79, %72 %86 = fadd float %85, %26 %87 = fmul float %79, %73 %88 = fadd float %87, %27 %89 = fsub float 1.000000e+00, %80 %90 = fmul float %33, %80 %91 = fmul float %84, %89 %92 = fadd float %90, %91 %93 = fsub float 1.000000e+00, %80 %94 = fmul float %34, %80 %95 = fmul float %86, %93 %96 = fadd float %94, %95 %97 = fsub float 1.000000e+00, %80 %98 = fmul float %35, %80 %99 = fmul float %88, %97 %100 = fadd float %98, %99 %101 = fsub float 1.000000e+00, %81 %102 = fmul float %28, %81 %103 = fmul float %92, %101 %104 = fadd float %102, %103 %105 = fsub float 1.000000e+00, %81 %106 = fmul float %32, %81 %107 = fmul float %96, %105 %108 = fadd float %106, %107 %109 = fsub float 1.000000e+00, %81 %110 = fmul float %36, %81 %111 = fmul float %100, %109 %112 = fadd float %110, %111 %113 = bitcast float %63 to i32 %114 = bitcast float %64 to i32 %115 = insertelement <2 x i32> undef, i32 %113, i32 0 %116 = insertelement <2 x i32> %115, i32 %114, i32 1 %117 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %116, <8 x i32> %52, <4 x i32> %54, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %118 = extractelement <4 x float> %117, i32 1 %119 = extractelement <4 x float> %117, i32 2 %120 = fsub float 1.000000e+00, %118 %121 = fmul float %33, %118 %122 = fmul float %104, %120 %123 = fadd float %121, %122 %124 = fsub float 1.000000e+00, %118 %125 = fmul float %34, %118 %126 = fmul float %108, %124 %127 = fadd float %125, %126 %128 = fsub float 1.000000e+00, %118 %129 = fmul float %35, %118 %130 = fmul float %112, %128 %131 = fadd float %129, %130 %132 = fsub float 1.000000e+00, %119 %133 = fmul float %28, %119 %134 = fmul float %123, %132 %135 = fadd float %133, %134 %136 = fsub float 1.000000e+00, %119 %137 = fmul float %32, %119 %138 = fmul float %127, %136 %139 = fadd float %137, %138 %140 = fsub float 1.000000e+00, %119 %141 = fmul float %36, %119 %142 = fmul float %131, %140 %143 = fadd float %141, %142 %144 = fadd float %119, %118 %145 = call float @llvm.AMDIL.clamp.(float %144, float 0.000000e+00, float 1.000000e+00) %146 = fsub float 1.000000e+00, %145 %147 = fsub float 1.000000e+00, %146 %148 = bitcast float %69 to i32 %149 = bitcast float %70 to i32 %150 = insertelement <2 x i32> undef, i32 %148, i32 0 %151 = insertelement <2 x i32> %150, i32 %149, i32 1 %152 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %151, <8 x i32> %56, <4 x i32> %58, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %153 = extractelement <4 x float> %152, i32 0 %154 = extractelement <4 x float> %152, i32 1 %155 = extractelement <4 x float> %152, i32 2 %156 = extractelement <4 x float> %152, i32 3 %157 = fmul float %38, 7.500000e-01 %158 = bitcast float %63 to i32 %159 = bitcast float %64 to i32 %160 = insertelement <2 x i32> undef, i32 %158, i32 0 %161 = insertelement <2 x i32> %160, i32 %159, i32 1 %162 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %161, <8 x i32> %40, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %163 = extractelement <4 x float> %162, i32 0 %164 = extractelement <4 x float> %162, i32 1 %165 = extractelement <4 x float> %162, i32 2 %166 = extractelement <4 x float> %162, i32 3 %167 = fsub float 1.000000e+00, %163 %168 = fmul float %167, %167 %169 = fmul float %168, %168 %170 = fmul float %169, 2.500000e-01 %171 = fadd float %170, %157 %172 = fadd float %153, -1.000000e+00 %173 = fadd float %154, -1.000000e+00 %174 = fadd float %155, -1.000000e+00 %175 = fadd float %156, -1.000000e+00 %176 = fmul float %171, %172 %177 = fadd float %176, 1.000000e+00 %178 = fmul float %171, %173 %179 = fadd float %178, 1.000000e+00 %180 = fmul float %171, %174 %181 = fadd float %180, 1.000000e+00 %182 = fmul float %171, %175 %183 = fadd float %182, 1.000000e+00 %184 = fmul float %135, %177 %185 = fmul float %139, %179 %186 = fmul float %143, %181 %187 = fmul float %184, 0x3FD3333340000000 %188 = fmul float %185, 0x3FE2E147A0000000 %189 = fadd float %188, %187 %190 = fmul float %186, 0x3FBC28F5C0000000 %191 = fadd float %189, %190 %192 = fadd float %191, 0xBFB47AE140000000 %193 = fmul float %192, 0x402C924920000000 %194 = call float @llvm.AMDIL.clamp.(float %193, float 0.000000e+00, float 1.000000e+00) %195 = fmul float %194, -2.000000e+00 %196 = fadd float %195, 3.000000e+00 %197 = fmul float %194, %194 %198 = fmul float %197, %196 %199 = fsub float 1.000000e+00, %198 %200 = fmul float %199, 0x3F9EB851E0000000 %201 = fadd float %200, %184 %202 = fmul float %199, 0x3F9EB851E0000000 %203 = fadd float %202, %185 %204 = fmul float %199, 0x3F9EB851E0000000 %205 = fadd float %204, %186 %206 = fmul float %201, %165 %207 = fmul float %203, %165 %208 = fmul float %205, %165 %209 = fmul float %206, 5.000000e-01 %210 = fadd float %209, %184 %211 = fmul float %207, 5.000000e-01 %212 = fadd float %211, %185 %213 = fmul float %208, 5.000000e-01 %214 = fadd float %213, %186 %215 = call float @llvm.AMDIL.clamp.(float %210, float 0.000000e+00, float 1.000000e+00) %216 = call float @llvm.AMDIL.clamp.(float %212, float 0.000000e+00, float 1.000000e+00) %217 = call float @llvm.AMDIL.clamp.(float %214, float 0.000000e+00, float 1.000000e+00) %218 = fmul float %164, %215 %219 = fmul float %164, %216 %220 = fmul float %164, %217 %221 = bitcast float %63 to i32 %222 = bitcast float %64 to i32 %223 = insertelement <2 x i32> undef, i32 %221, i32 0 %224 = insertelement <2 x i32> %223, i32 %222, i32 1 %225 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %224, <8 x i32> %48, <4 x i32> %50, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %226 = extractelement <4 x float> %225, i32 0 %227 = extractelement <4 x float> %225, i32 1 %228 = extractelement <4 x float> %225, i32 2 %229 = extractelement <4 x float> %225, i32 3 %230 = fmul float %164, %215 %231 = fsub float %226, %230 %232 = fmul float %164, %216 %233 = fsub float %227, %232 %234 = fmul float %164, %217 %235 = fsub float %228, %234 %236 = fadd float %82, %82 %237 = call float @llvm.AMDIL.clamp.(float %236, float 0.000000e+00, float 1.000000e+00) %238 = fadd float %82, -5.000000e-01 %239 = fadd float %82, -1.000000e+00 %240 = fmul float %238, 1.000000e+01 %241 = fmul float %239, -1.000000e+01 %242 = call float @llvm.AMDIL.clamp.(float %240, float 0.000000e+00, float 1.000000e+00) %243 = call float @llvm.AMDIL.clamp.(float %241, float 0.000000e+00, float 1.000000e+00) %244 = fmul float %237, -2.000000e+00 %245 = fadd float %244, 3.000000e+00 %246 = fmul float %237, %237 %247 = fmul float %246, %245 %248 = call float @llvm.maxnum.f32(float %147, float %247) %249 = fmul float %242, -2.000000e+00 %250 = fadd float %249, 3.000000e+00 %251 = fmul float %243, -2.000000e+00 %252 = fadd float %251, 3.000000e+00 %253 = fmul float %242, %242 %254 = fmul float %243, %243 %255 = fmul float %250, %253 %256 = fmul float %252, %254 %257 = fmul float %256, %255 %258 = bitcast float %67 to i32 %259 = bitcast float %68 to i32 %260 = insertelement <2 x i32> undef, i32 %258, i32 0 %261 = insertelement <2 x i32> %260, i32 %259, i32 1 %262 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %261, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %263 = extractelement <4 x float> %262, i32 1 %264 = fmul float %263, %163 %265 = fadd float %264, %166 %266 = fmul float %164, %37 %267 = fmul float %183, %266 %268 = fmul float %38, 6.000000e+00 %269 = fadd float %268, 1.000000e+00 %270 = fmul float %265, %269 %271 = fadd float %270, %257 %272 = fmul float %271, %248 %273 = fadd float %272, 0xBFE28F5C20000000 %274 = fmul float %273, 1.000000e+01 %275 = call float @llvm.AMDIL.clamp.(float %274, float 0.000000e+00, float 1.000000e+00) %276 = fmul float %275, -2.000000e+00 %277 = fadd float %276, 3.000000e+00 %278 = fmul float %275, %275 %279 = fmul float %278, %277 %280 = fmul float %277, %278 %281 = fadd float %280, 0xBFECCCCCC0000000 %282 = fmul float %277, %278 %283 = fadd float %282, 0xBF847AE140000000 %284 = fmul float %281, 1.000000e+01 %285 = fmul float %283, -1.000000e+02 %286 = fmul float %279, %231 %287 = fadd float %286, %218 %288 = fmul float %279, %233 %289 = fadd float %288, %219 %290 = fmul float %279, %235 %291 = fadd float %290, %220 %292 = call float @llvm.maxnum.f32(float %285, float 0.000000e+00) %293 = call float @llvm.AMDIL.clamp.(float %284, float 0.000000e+00, float 1.000000e+00) %294 = fmul float %292, -2.000000e+00 %295 = fadd float %294, 3.000000e+00 %296 = fmul float %292, %292 %297 = fmul float %296, %295 %298 = fmul float %297, %267 %299 = fsub float %229, %298 %300 = fmul float %297, %267 %301 = fmul float %293, -2.000000e+00 %302 = fadd float %301, 3.000000e+00 %303 = fmul float %293, %293 %304 = fmul float %303, %302 %305 = fmul float %304, %299 %306 = fadd float %305, %300 %307 = call i32 @llvm.SI.packf16(float %287, float %289) %308 = bitcast i32 %307 to float %309 = call i32 @llvm.SI.packf16(float %291, float %306) %310 = bitcast i32 %309 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %308, float %310, float %308, float %310) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A v_mov_b32_e32 v2, 0x3f400000 ; 7E0402FF 3F400000 v_mov_b32_e32 v3, 0xbda3d70a ; 7E0602FF BDA3D70A v_mov_b32_e32 v4, 0x40400000 ; 7E0802FF 40400000 v_mov_b32_e32 v5, 0x3cf5c28f ; 7E0A02FF 3CF5C28F v_mov_b32_e32 v6, 0x41200000 ; 7E0C02FF 41200000 v_mov_b32_e32 v7, 0x40c00000 ; 7E0E02FF 40C00000 v_mov_b32_e32 v8, 0xc2c80000 ; 7E1002FF C2C80000 v_interp_p1_f32 v9, v0, 0, 0, [m0] ; C8240000 v_interp_p2_f32 v9, [v9], v1, 0, 0, [m0] ; C8250001 v_interp_p1_f32 v10, v0, 1, 0, [m0] ; C8280100 v_interp_p2_f32 v10, [v10], v1, 1, 0, [m0] ; C8290101 v_interp_p1_f32 v11, v0, 2, 0, [m0] ; C82C0200 v_interp_p2_f32 v11, [v11], v1, 2, 0, [m0] ; C82D0201 v_interp_p1_f32 v12, v0, 3, 0, [m0] ; C8300300 v_interp_p2_f32 v12, [v12], v1, 3, 0, [m0] ; C8310301 v_interp_p1_f32 v13, v0, 0, 1, [m0] ; C8340400 v_interp_p2_f32 v13, [v13], v1, 0, 1, [m0] ; C8350401 v_interp_p1_f32 v14, v0, 1, 1, [m0] ; C8380500 v_interp_p2_f32 v14, [v14], v1, 1, 1, [m0] ; C8390501 v_interp_p1_f32 v15, v0, 2, 1, [m0] ; C83C0600 v_interp_p2_f32 v15, [v15], v1, 2, 1, [m0] ; C83D0601 v_interp_p1_f32 v16, v0, 3, 1, [m0] ; C8400700 v_interp_p2_f32 v16, [v16], v1, 3, 1, [m0] ; C8410701 s_load_dwordx4 s[52:55], s[2:3], 0x0 ; C09A0300 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx4 s[32:35], s[4:5], 0xc ; C090050C s_load_dwordx4 s[36:39], s[4:5], 0x10 ; C0920510 s_load_dwordx4 s[48:51], s[4:5], 0x14 ; C0980514 s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700 s_load_dwordx8 s[8:15], s[6:7], 0x8 ; C0C40708 s_load_dwordx8 s[20:27], s[6:7], 0x10 ; C0CA0710 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[52:55], 0x0 ; C2023500 s_buffer_load_dword s5, s[52:55], 0x1 ; C202B501 s_buffer_load_dword s56, s[52:55], 0x2 ; C21C3502 s_buffer_load_dword s57, s[52:55], 0x3 ; C21CB503 s_buffer_load_dword s58, s[52:55], 0x4 ; C21D3504 s_buffer_load_dword s59, s[52:55], 0x5 ; C21DB505 s_buffer_load_dword s60, s[52:55], 0x6 ; C21E3506 s_buffer_load_dword s61, s[52:55], 0x7 ; C21EB507 s_buffer_load_dword s62, s[52:55], 0x8 ; C21F3508 s_buffer_load_dword s63, s[52:55], 0x9 ; C21FB509 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s4 ; 7E000204 s_buffer_load_dword s64, s[52:55], 0xa ; C220350A v_mov_b32_e32 v1, s5 ; 7E020205 s_buffer_load_dword s65, s[52:55], 0xb ; C220B50B v_mov_b32_e32 v17, s56 ; 7E220238 s_buffer_load_dword s66, s[52:55], 0xe ; C221350E s_buffer_load_dword s52, s[52:55], 0xf ; C21A350F v_sub_f32_e32 v0, s58, v0 ; 0800003A v_sub_f32_e32 v1, s59, v1 ; 0802023B v_sub_f32_e32 v17, s60, v17 ; 0822223C s_load_dwordx8 s[68:75], s[6:7], 0x18 ; C0E20718 s_load_dwordx8 s[76:83], s[6:7], 0x20 ; C0E60720 s_load_dwordx8 s[84:91], s[6:7], 0x28 ; C0EA0728 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s52, v2 ; 10040434 image_sample v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[84:91], s[48:51] ; F0800F00 0195120B image_sample v[11:12], 6, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[68:75], s[32:35] ; F0800600 01110B09 image_sample v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[76:83], s[36:39] ; F0800F00 0133160F image_sample v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[40:47], s[28:31] ; F0800F00 00EA1A09 s_waitcnt vmcnt(3) ; BF8C0773 v_mad_f32 v0, v18, v0, s4 ; D2820000 00120112 v_mad_f32 v1, v18, v1, s5 ; D2820001 00160312 v_mad_f32 v15, v18, v17, s56 ; D282000F 00E22312 v_mad_f32 v0, -v19, v0, v0 ; D2820000 24020113 v_mac_f32_e32 v0, s62, v19 ; 3E00263E v_mad_f32 v1, -v19, v1, v1 ; D2820001 24060313 v_mac_f32_e32 v1, s63, v19 ; 3E02263F s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v16, 1.0, v26 ; 082034F2 v_mad_f32 v16, -v26, v16, v16 ; D2820010 2442211A v_mul_f32_e32 v16, v16, v16 ; 10202110 v_madmk_f32_e32 v2, v16, v2, 0x3e800000 ; 40040510 3E800000 v_mad_f32 v16, v22, v2, -v2 ; D2820010 840A0516 v_mad_f32 v17, v23, v2, -v2 ; D2820011 840A0517 v_mad_f32 v18, v24, v2, -v2 ; D2820012 840A0518 v_mad_f32 v2, v25, v2, -v2 ; D2820002 840A0519 v_mad_f32 v0, -v20, v0, v0 ; D2820000 24020114 v_mac_f32_e32 v0, s57, v20 ; 3E002839 v_mad_f32 v0, -v11, v0, v0 ; D2820000 2402010B v_mac_f32_e32 v0, s62, v11 ; 3E00163E v_mad_f32 v0, -v12, v0, v0 ; D2820000 2402010C v_mac_f32_e32 v0, s57, v12 ; 3E001839 v_mac_f32_e32 v0, v0, v16 ; 3E002100 v_mad_f32 v1, -v20, v1, v1 ; D2820001 24060314 v_mac_f32_e32 v1, s61, v20 ; 3E02283D v_mad_f32 v1, -v11, v1, v1 ; D2820001 2406030B v_mac_f32_e32 v1, s63, v11 ; 3E02163F v_mad_f32 v1, -v12, v1, v1 ; D2820001 2406030C v_mac_f32_e32 v1, s61, v12 ; 3E02183D v_mac_f32_e32 v1, v1, v17 ; 3E022301 v_mad_f32 v15, -v19, v15, v15 ; D282000F 243E1F13 v_mac_f32_e32 v15, s64, v19 ; 3E1E2640 v_mad_f32 v15, -v20, v15, v15 ; D282000F 243E1F14 v_mac_f32_e32 v15, s65, v20 ; 3E1E2841 v_mad_f32 v15, -v11, v15, v15 ; D282000F 243E1F0B v_mac_f32_e32 v15, s64, v11 ; 3E1E1640 v_mad_f32 v15, -v12, v15, v15 ; D282000F 243E1F0C v_mac_f32_e32 v15, s65, v12 ; 3E1E1841 v_mac_f32_e32 v15, v15, v18 ; 3E1E250F v_mul_f32_e32 v16, 0x3e99999a, v0 ; 102000FF 3E99999A v_madmk_f32_e32 v16, v1, v16, 0x3f170a3d ; 40202101 3F170A3D v_madmk_f32_e32 v16, v15, v16, 0x3de147ae ; 4020210F 3DE147AE v_add_f32_e32 v3, v16, v3 ; 06060710 v_mul_f32_e32 v3, 0x41649249, v3 ; 100606FF 41649249 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mad_f32 v16, -2.0, v3, v4 ; D2820010 041206F5 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mad_f32 v3, -v3, v16, 1.0 ; D2820003 23CA2103 v_mad_f32 v16, v5, v3, v0 ; D2820010 04020705 v_mad_f32 v17, v5, v3, v1 ; D2820011 04060705 v_mad_f32 v3, v5, v3, v15 ; D2820003 043E0705 v_mul_f32_e32 v5, v28, v16 ; 100A211C v_mac_f32_e32 v0, 0.5, v5 ; 3E000AF0 v_mul_f32_e32 v5, v28, v17 ; 100A231C v_mul_f32_e32 v3, v28, v3 ; 1006071C v_mac_f32_e32 v1, 0.5, v5 ; 3E020AF0 v_mac_f32_e32 v15, 0.5, v3 ; 3E1E06F0 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_add_f32_e64 v3, 0, v15 clamp ; D2060803 00021E80 v_add_f32_e32 v5, v21, v21 ; 060A2B15 v_add_f32_e32 v15, -0.5, v21 ; 061E2AF1 v_madmk_f32_e32 v16, v21, v6, 0xc1200000 ; 40200D15 C1200000 v_mul_f32_e32 v17, v0, v27 ; 10223700 v_mul_f32_e32 v18, v1, v27 ; 10243701 v_mul_f32_e32 v15, v6, v15 ; 101E1F06 v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_mad_f32 v19, -2.0, v15, v4 ; D2820013 04121EF5 v_mul_f32_e32 v15, v15, v15 ; 101E1F0F v_mul_f32_e32 v15, v15, v19 ; 101E270F v_mad_f32 v19, -2.0, v16, v4 ; D2820013 041220F5 v_mul_f32_e32 v16, v16, v16 ; 10202110 v_mul_f32_e32 v16, v16, v19 ; 10202710 v_mul_f32_e32 v15, v15, v16 ; 101E210F image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[20:27], s[16:19] ; F0800F00 00851309 image_sample v9, 2, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[8:15], s[0:3] ; F0800200 0002090D v_mul_f32_e32 v10, v3, v27 ; 10143703 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v0, -v27, v0, v19 ; D2820000 244E011B v_mad_f32 v1, -v27, v1, v20 ; D2820001 2452031B v_mad_f32 v3, -v27, v3, v21 ; D2820003 2456071B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v29, v26, v9 ; 3E3A131A v_mad_f32 v7, v7, s52, 1.0 ; D2820007 03C86907 v_mac_f32_e32 v15, v7, v29 ; 3E1E3B07 v_mul_f32_e32 v7, s66, v27 ; 100E3642 v_add_f32_e32 v9, v11, v12 ; 0612190B v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mad_f32 v11, -2.0, v5, v4 ; D282000B 04120AF5 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_mul_f32_e32 v5, v11, v5 ; 100A0B0B v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_sub_f32_e32 v9, 1.0, v9 ; 081212F2 v_sub_f32_e32 v9, 1.0, v9 ; 081212F2 v_max_f32_e32 v5, v5, v9 ; 200A1305 v_mac_f32_e32 v7, v7, v2 ; 3E0E0507 v_madak_f32_e32 v2, v15, v5, 0xbf147ae1 ; 42040B0F BF147AE1 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mad_f32 v5, -2.0, v2, v4 ; D2820005 041204F5 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mul_f32_e32 v9, v5, v2 ; 10120505 v_madak_f32_e32 v11, v2, v5, 0xbf666666 ; 42160B02 BF666666 v_madak_f32_e32 v2, v2, v5, 0xbc23d70a ; 42040B02 BC23D70A v_mul_f32_e32 v5, v6, v11 ; 100A1706 v_mul_f32_e32 v2, v2, v8 ; 10041102 v_mac_f32_e32 v17, v0, v9 ; 3E221300 v_mac_f32_e32 v18, v1, v9 ; 3E241301 v_mac_f32_e32 v10, v3, v9 ; 3E141303 v_max_f32_e32 v0, 0, v2 ; 20000480 v_add_f32_e64 v1, 0, v5 clamp ; D2060801 00020A80 v_mad_f32 v2, -2.0, v0, v4 ; D2820002 041200F5 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mul_f32_e32 v2, v7, v0 ; 10040107 v_mad_f32 v0, -v0, v7, v22 ; D2820000 245A0F00 v_mac_f32_e32 v4, -2.0, v1 ; 3E0802F5 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mac_f32_e32 v2, v0, v1 ; 3E040300 v_cvt_pkrtz_f16_f32_e32 v0, v17, v18 ; 5E002511 v_cvt_pkrtz_f16_f32_e32 v1, v10, v2 ; 5E02050A exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 96 VGPRS: 32 Code Size: 1040 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { -0.5000, -1.0000, 10.0000, -10.0000} IMM[1] FLT32 { -2.0000, 3.0000, 6.0000, 1.0000} IMM[2] FLT32 { -0.9900, 100.0000, 1.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xw, TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[1].xyyy 3: TEX TEMP[1].y, TEMP[1], SAMP[1], 2D 4: MAD TEMP[0].x, TEMP[1].yyyy, TEMP[0].xxxx, TEMP[0].wwww 5: MOV TEMP[1].xy, IN[0].zwww 6: TEX TEMP[1].w, TEMP[1], SAMP[4], 2D 7: ADD TEMP[2].xy, TEMP[1].wwww, IMM[0].xyyy 8: ADD TEMP[3].x, TEMP[1].wwww, TEMP[1].wwww 9: MOV_SAT TEMP[3].x, TEMP[3].xxxx 10: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[0].zwww 11: MOV_SAT TEMP[2].xy, TEMP[2].xyyy 12: MAD TEMP[1].xy, TEMP[2].xyyy, IMM[1].xxxx, IMM[1].yyyy 13: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[2].xyyy 14: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[1].xyyy 15: MUL TEMP[1].x, TEMP[1].yyyy, TEMP[1].xxxx 16: MAD TEMP[2].x, CONST[3].wwww, IMM[1].zzzz, IMM[1].wwww 17: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx, TEMP[1].xxxx 18: ADD TEMP[0].x, TEMP[0].xxxx, IMM[2].xxxx 19: MUL TEMP[0].x, TEMP[0].xxxx, IMM[2].yyyy 20: MOV_SAT TEMP[1].x, TEMP[0].xxxx 21: MAD TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx, IMM[1].yyyy 22: MUL TEMP[0].x, TEMP[1].xxxx, TEMP[1].xxxx 23: MAD TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx, IMM[1].wwww 24: MOV TEMP[1].xy, IN[0].xyyy 25: TEX TEMP[1].xyz, TEMP[1], SAMP[3], 2D 26: ADD TEMP[0].x, TEMP[0].xxxx, -TEMP[1].xxxx 27: MOV_SAT TEMP[0].x, TEMP[0].xxxx 28: MAX TEMP[1].x, TEMP[1].yyyy, TEMP[1].zzzz 29: MAX TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx 30: MOV TEMP[2].xy, IN[0].xyyy 31: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 32: ADD TEMP[4].x, -TEMP[3].xxxx, TEMP[2].xxxx 33: MAD TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx, TEMP[3].xxxx 34: ADD TEMP[3].x, TEMP[2].yyyy, IMM[0].yyyy 35: MAD TEMP[2].xy, TEMP[2].zzzz, IMM[2].zwww, IMM[2].wzzz 36: MOV TEMP[1].zw, TEMP[2].yyxy 37: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx, IMM[1].wwww 38: MOV TEMP[1].y, TEMP[0].xxxx 39: MOV OUT[0], TEMP[1] 40: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %26 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %27 = load <8 x i32>, <8 x i32> addrspace(2)* %26, align 32, !tbaa !0 %28 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %29 = load <4 x i32>, <4 x i32> addrspace(2)* %28, align 16, !tbaa !0 %30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, align 32, !tbaa !0 %32 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %35 = load <8 x i32>, <8 x i32> addrspace(2)* %34, align 32, !tbaa !0 %36 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %37 = load <4 x i32>, <4 x i32> addrspace(2)* %36, align 16, !tbaa !0 %38 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !tbaa !0 %40 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %41 = load <4 x i32>, <4 x i32> addrspace(2)* %40, align 16, !tbaa !0 %42 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %43 = load <8 x i32>, <8 x i32> addrspace(2)* %42, align 32, !tbaa !0 %44 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %45 = load <4 x i32>, <4 x i32> addrspace(2)* %44, align 16, !tbaa !0 %46 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %47 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %48 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %49 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %50 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %51 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %52 = bitcast float %46 to i32 %53 = bitcast float %47 to i32 %54 = insertelement <2 x i32> undef, i32 %52, i32 0 %55 = insertelement <2 x i32> %54, i32 %53, i32 1 %56 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %55, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 3 %59 = bitcast float %50 to i32 %60 = bitcast float %51 to i32 %61 = insertelement <2 x i32> undef, i32 %59, i32 0 %62 = insertelement <2 x i32> %61, i32 %60, i32 1 %63 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %62, <8 x i32> %31, <4 x i32> %33, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %64 = extractelement <4 x float> %63, i32 1 %65 = fmul float %64, %57 %66 = fadd float %65, %58 %67 = bitcast float %48 to i32 %68 = bitcast float %49 to i32 %69 = insertelement <2 x i32> undef, i32 %67, i32 0 %70 = insertelement <2 x i32> %69, i32 %68, i32 1 %71 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %70, <8 x i32> %43, <4 x i32> %45, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %72 = extractelement <4 x float> %71, i32 3 %73 = fadd float %72, -5.000000e-01 %74 = fadd float %72, -1.000000e+00 %75 = fadd float %72, %72 %76 = call float @llvm.AMDIL.clamp.(float %75, float 0.000000e+00, float 1.000000e+00) %77 = fmul float %73, 1.000000e+01 %78 = fmul float %74, -1.000000e+01 %79 = call float @llvm.AMDIL.clamp.(float %77, float 0.000000e+00, float 1.000000e+00) %80 = call float @llvm.AMDIL.clamp.(float %78, float 0.000000e+00, float 1.000000e+00) %81 = fmul float %79, -2.000000e+00 %82 = fadd float %81, 3.000000e+00 %83 = fmul float %80, -2.000000e+00 %84 = fadd float %83, 3.000000e+00 %85 = fmul float %79, %79 %86 = fmul float %80, %80 %87 = fmul float %85, %82 %88 = fmul float %86, %84 %89 = fmul float %88, %87 %90 = fmul float %25, 6.000000e+00 %91 = fadd float %90, 1.000000e+00 %92 = fmul float %66, %91 %93 = fadd float %92, %89 %94 = fadd float %93, 0xBFEFAE1480000000 %95 = fmul float %94, 1.000000e+02 %96 = call float @llvm.AMDIL.clamp.(float %95, float 0.000000e+00, float 1.000000e+00) %97 = fmul float %96, -2.000000e+00 %98 = fadd float %97, 3.000000e+00 %99 = fmul float %96, %96 %100 = fmul float %98, %99 %101 = fadd float %100, 1.000000e+00 %102 = bitcast float %46 to i32 %103 = bitcast float %47 to i32 %104 = insertelement <2 x i32> undef, i32 %102, i32 0 %105 = insertelement <2 x i32> %104, i32 %103, i32 1 %106 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %105, <8 x i32> %39, <4 x i32> %41, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = extractelement <4 x float> %106, i32 2 %110 = fsub float %101, %107 %111 = call float @llvm.AMDIL.clamp.(float %110, float 0.000000e+00, float 1.000000e+00) %112 = call float @llvm.maxnum.f32(float %108, float %109) %113 = call float @llvm.maxnum.f32(float %112, float %111) %114 = bitcast float %46 to i32 %115 = bitcast float %47 to i32 %116 = insertelement <2 x i32> undef, i32 %114, i32 0 %117 = insertelement <2 x i32> %116, i32 %115, i32 1 %118 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %117, <8 x i32> %35, <4 x i32> %37, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %119 = extractelement <4 x float> %118, i32 0 %120 = extractelement <4 x float> %118, i32 1 %121 = extractelement <4 x float> %118, i32 2 %122 = fsub float %119, %76 %123 = fmul float %113, %122 %124 = fadd float %123, %76 %125 = fadd float %120, -1.000000e+00 %126 = fadd float %121, 0.000000e+00 %127 = fmul float %121, 0.000000e+00 %128 = fadd float %127, 1.000000e+00 %129 = fmul float %111, %125 %130 = fadd float %129, 1.000000e+00 %131 = call i32 @llvm.SI.packf16(float %124, float %130) %132 = bitcast i32 %131 to float %133 = call i32 @llvm.SI.packf16(float %126, float %128) %134 = bitcast i32 %133 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %132, float %134, float %132, float %134) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A v_mov_b32_e32 v2, 0x41200000 ; 7E0402FF 41200000 v_mov_b32_e32 v3, 0x40400000 ; 7E0602FF 40400000 v_mov_b32_e32 v4, 0x40c00000 ; 7E0802FF 40C00000 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 v_interp_p1_f32 v6, v0, 1, 0, [m0] ; C8180100 v_interp_p2_f32 v6, [v6], v1, 1, 0, [m0] ; C8190101 v_interp_p1_f32 v7, v0, 2, 0, [m0] ; C81C0200 v_interp_p2_f32 v7, [v7], v1, 2, 0, [m0] ; C81D0201 v_interp_p1_f32 v8, v0, 3, 0, [m0] ; C8200300 v_interp_p2_f32 v8, [v8], v1, 3, 0, [m0] ; C8210301 v_interp_p1_f32 v9, v0, 0, 1, [m0] ; C8240400 v_interp_p2_f32 v9, [v9], v1, 0, 1, [m0] ; C8250401 v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500 v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501 s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508 s_load_dwordx4 s[16:19], s[4:5], 0xc ; C088050C s_load_dwordx4 s[40:43], s[4:5], 0x10 ; C0940510 s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700 s_load_dwordx8 s[52:59], s[6:7], 0x8 ; C0DA0708 s_load_dwordx8 s[8:15], s[6:7], 0x10 ; C0C40710 s_load_dwordx8 s[20:27], s[6:7], 0x18 ; C0CA0718 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[28:31], 0xf ; C2021D0F s_load_dwordx8 s[60:67], s[6:7], 0x20 ; C0DE0720 image_sample v[0:1], 9, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[44:51], s[32:35] ; F0800900 010B0005 image_sample v9, 2, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[52:59], s[36:39] ; F0800200 012D0909 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v1, v0, v9 ; 3E021300 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, 1.0 ; D2820000 03C80904 image_sample v4, 8, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[60:67], s[40:43] ; F0800800 014F0407 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v7, -0.5, v4 ; 060E08F1 v_add_f32_e32 v8, v4, v4 ; 06100904 v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_mul_f32_e32 v7, v2, v7 ; 100E0F02 v_madmk_f32_e32 v2, v4, v2, 0xc1200000 ; 40040504 C1200000 v_add_f32_e64 v4, 0, v7 clamp ; D2060804 00020E80 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mad_f32 v7, -2.0, v4, v3 ; D2820007 040E08F5 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mad_f32 v7, -2.0, v2, v3 ; D2820007 040E04F5 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_madak_f32_e32 v2, v2, v4, 0xbf7d70a4 ; 42040902 BF7D70A4 v_mac_f32_e32 v2, v0, v1 ; 3E040300 v_mul_f32_e32 v0, 0x42c80000, v2 ; 100004FF 42C80000 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mac_f32_e32 v3, -2.0, v0 ; 3E0600F5 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mad_f32 v0, v3, v0, 1.0 ; D2820000 03CA0103 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[20:27], s[16:19] ; F0800700 00850105 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v0, v1, v0 ; 0A000101 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_max3_f32 v1, v2, v3, v0 ; D2A80001 04020702 image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[0:3] ; F0800700 00020205 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v2, v8, v2 ; 0A040508 v_mac_f32_e32 v8, v2, v1 ; 3E100302 v_add_f32_e32 v1, -1.0, v3 ; 060206F3 v_add_f32_e32 v2, 0, v4 ; 06040880 v_mad_f32 v3, 0, v4, 1.0 ; D2820003 03CA0880 v_mad_f32 v0, v0, v1, 1.0 ; D2820000 03CA0300 v_cvt_pkrtz_f16_f32_e32 v0, v8, v0 ; 5E000108 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 12 Code Size: 396 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { -0.5000, -1.0000, 10.0000, -10.0000} IMM[1] FLT32 { -2.0000, 3.0000, 1.0000, 6.0000} IMM[2] FLT32 { -0.5800, 100.0000, 0.0500, 0.7500} IMM[3] FLT32 { 0.2500, 0.3000, 0.5900, 0.1100} IMM[4] FLT32 { -0.0800, 14.2857, 0.0050, 2.0000} 0: ADD TEMP[0].xyz, -CONST[0].xyzz, CONST[1].xyzz 1: MOV TEMP[1].xy, IN[0].zwww 2: TEX TEMP[1], TEMP[1], SAMP[5], 2D 3: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[0].xyzz 4: LRP TEMP[2].xyz, TEMP[1].yyyy, CONST[2].xyzz, TEMP[0].xyzz 5: MOV TEMP[0].x, CONST[0].wwww 6: MOV TEMP[0].y, CONST[1].wwww 7: MOV TEMP[0].z, CONST[2].wwww 8: LRP TEMP[3].xyz, TEMP[1].zzzz, TEMP[0].xyzz, TEMP[2].xyzz 9: ADD TEMP[1].xy, TEMP[1].wwww, IMM[0].xyyy 10: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].zwww 11: MOV_SAT TEMP[1].xy, TEMP[1].xyyy 12: MOV TEMP[4].xy, IN[0].xyyy 13: TEX TEMP[4].xyz, TEMP[4], SAMP[3], 2D 14: LRP TEMP[5].xyz, TEMP[4].yyyy, CONST[2].xyzz, TEMP[3].xyzz 15: LRP TEMP[3].xyz, TEMP[4].zzzz, TEMP[0].xyzz, TEMP[5].xyzz 16: MAD TEMP[0].xy, TEMP[1].xyyy, IMM[1].xxxx, IMM[1].yyyy 17: MUL TEMP[1].xy, TEMP[1].xyyy, TEMP[1].xyyy 18: MUL TEMP[0].xy, TEMP[1].xyyy, TEMP[0].xyyy 19: MUL TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 20: MAD TEMP[1].x, CONST[3].wwww, IMM[1].wwww, IMM[1].zzzz 21: MOV TEMP[6].xy, IN[0].xyyy 22: TEX TEMP[6], TEMP[6], SAMP[0], 2D 23: MOV TEMP[7].xy, IN[1].xyyy 24: TEX TEMP[7].y, TEMP[7], SAMP[1], 2D 25: MAD TEMP[7].x, TEMP[7].yyyy, TEMP[6].xxxx, TEMP[6].wwww 26: MAD TEMP[0].x, TEMP[7].xxxx, TEMP[1].xxxx, TEMP[0].xxxx 27: ADD TEMP[0].x, TEMP[0].xxxx, IMM[2].xxxx 28: MUL TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz 29: MOV_SAT TEMP[1].x, TEMP[0].xxxx 30: MAD TEMP[7].x, TEMP[1].xxxx, IMM[1].xxxx, IMM[1].yyyy 31: MUL TEMP[0].x, TEMP[1].xxxx, TEMP[1].xxxx 32: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[7].xxxx 33: MAD TEMP[0].x, TEMP[7].xxxx, TEMP[0].xxxx, IMM[1].zzzz 34: ADD TEMP[0].x, -TEMP[4].xxxx, TEMP[0].xxxx 35: MOV_SAT TEMP[0].x, TEMP[0].xxxx 36: MUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy 37: MOV_SAT TEMP[1].x, TEMP[1].xxxx 38: MAD TEMP[4].x, TEMP[1].xxxx, IMM[1].xxxx, IMM[1].yyyy 39: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx 40: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx 41: LRP TEMP[2].xyz, TEMP[1].xxxx, IMM[2].zzzz, TEMP[3].xyzz 42: MUL TEMP[3].x, IMM[2].wwww, CONST[3].wwww 43: ADD TEMP[4].x, -TEMP[6].xxxx, IMM[1].zzzz 44: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx 45: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx 46: MAD TEMP[3].x, TEMP[4].xxxx, IMM[3].xxxx, TEMP[3].xxxx 47: MOV TEMP[4].xy, IN[1].zwww 48: TEX TEMP[4], TEMP[4], SAMP[4], 2D 49: ADD TEMP[4], TEMP[4], IMM[0].yyyy 50: MAD TEMP[5], TEMP[3].xxxx, TEMP[4], IMM[1].zzzz 51: LRP TEMP[3].xyz, TEMP[1].xxxx, IMM[1].zzzz, TEMP[5].xyzz 52: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[2].xyzz 53: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[3].yzww 54: ADD TEMP[4].x, TEMP[4].xxxx, IMM[4].xxxx 55: MUL TEMP[4].x, TEMP[4].xxxx, IMM[4].yyyy 56: MOV_SAT TEMP[4].x, TEMP[4].xxxx 57: MAD TEMP[7].x, TEMP[4].xxxx, IMM[1].xxxx, IMM[1].yyyy 58: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx 59: MAD TEMP[4].x, TEMP[7].xxxx, -TEMP[4].xxxx, IMM[1].zzzz 60: MAD TEMP[2].xyz, TEMP[4].xxxx, IMM[4].zzzz, TEMP[3].xyzz 61: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].zzzz 62: MAD TEMP[3].xyz, TEMP[2].xyzz, IMM[4].wwww, TEMP[3].xyzz 63: MOV_SAT TEMP[3].xyz, TEMP[3].xyzz 64: MUL TEMP[2].xyz, TEMP[6].yyyy, TEMP[3].xyzz 65: MOV TEMP[4].xy, IN[0].xyyy 66: TEX TEMP[4], TEMP[4], SAMP[2], 2D 67: MAD TEMP[3].xyz, TEMP[3].xyzz, -TEMP[6].yyyy, TEMP[4].xyzz 68: MUL TEMP[6].x, TEMP[6].yyyy, CONST[3].zzzz 69: MUL TEMP[5].x, TEMP[5].wwww, TEMP[6].xxxx 70: MAD TEMP[3].xyz, TEMP[0].xxxx, TEMP[3].xyzz, TEMP[2].xyzz 71: LRP TEMP[2].x, TEMP[1].xxxx, IMM[1].zzzz, CONST[3].zzzz 72: MUL TEMP[1].x, TEMP[5].xxxx, TEMP[2].xxxx 73: MAD TEMP[2].x, TEMP[1].xxxx, -CONST[3].xxxx, TEMP[4].wwww 74: MUL TEMP[1].x, TEMP[1].xxxx, CONST[3].xxxx 75: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx, TEMP[1].xxxx 76: MOV TEMP[3].w, TEMP[0].xxxx 77: MOV OUT[0], TEMP[3] 78: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 48) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 56) %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %40 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %41 = load <8 x i32>, <8 x i32> addrspace(2)* %40, align 32, !tbaa !0 %42 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %43 = load <4 x i32>, <4 x i32> addrspace(2)* %42, align 16, !tbaa !0 %44 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %45 = load <8 x i32>, <8 x i32> addrspace(2)* %44, align 32, !tbaa !0 %46 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 %52 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 %54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %55 = load <4 x i32>, <4 x i32> addrspace(2)* %54, align 16, !tbaa !0 %56 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %59 = load <4 x i32>, <4 x i32> addrspace(2)* %58, align 16, !tbaa !0 %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0 %64 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %65 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %66 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %67 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %68 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %69 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %70 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %71 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %72 = fsub float %29, %25 %73 = fsub float %30, %26 %74 = fsub float %31, %27 %75 = bitcast float %66 to i32 %76 = bitcast float %67 to i32 %77 = insertelement <2 x i32> undef, i32 %75, i32 0 %78 = insertelement <2 x i32> %77, i32 %76, i32 1 %79 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %78, <8 x i32> %61, <4 x i32> %63, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %80 = extractelement <4 x float> %79, i32 0 %81 = extractelement <4 x float> %79, i32 1 %82 = extractelement <4 x float> %79, i32 2 %83 = extractelement <4 x float> %79, i32 3 %84 = fmul float %80, %72 %85 = fadd float %84, %25 %86 = fmul float %80, %73 %87 = fadd float %86, %26 %88 = fmul float %80, %74 %89 = fadd float %88, %27 %90 = fsub float 1.000000e+00, %81 %91 = fmul float %33, %81 %92 = fmul float %85, %90 %93 = fadd float %91, %92 %94 = fsub float 1.000000e+00, %81 %95 = fmul float %34, %81 %96 = fmul float %87, %94 %97 = fadd float %95, %96 %98 = fsub float 1.000000e+00, %81 %99 = fmul float %35, %81 %100 = fmul float %89, %98 %101 = fadd float %99, %100 %102 = fsub float 1.000000e+00, %82 %103 = fmul float %28, %82 %104 = fmul float %93, %102 %105 = fadd float %103, %104 %106 = fsub float 1.000000e+00, %82 %107 = fmul float %32, %82 %108 = fmul float %97, %106 %109 = fadd float %107, %108 %110 = fsub float 1.000000e+00, %82 %111 = fmul float %36, %82 %112 = fmul float %101, %110 %113 = fadd float %111, %112 %114 = fadd float %83, -5.000000e-01 %115 = fadd float %83, -1.000000e+00 %116 = fmul float %114, 1.000000e+01 %117 = fmul float %115, -1.000000e+01 %118 = call float @llvm.AMDIL.clamp.(float %116, float 0.000000e+00, float 1.000000e+00) %119 = call float @llvm.AMDIL.clamp.(float %117, float 0.000000e+00, float 1.000000e+00) %120 = bitcast float %64 to i32 %121 = bitcast float %65 to i32 %122 = insertelement <2 x i32> undef, i32 %120, i32 0 %123 = insertelement <2 x i32> %122, i32 %121, i32 1 %124 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %123, <8 x i32> %53, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %125 = extractelement <4 x float> %124, i32 0 %126 = extractelement <4 x float> %124, i32 1 %127 = extractelement <4 x float> %124, i32 2 %128 = fsub float 1.000000e+00, %126 %129 = fmul float %33, %126 %130 = fmul float %105, %128 %131 = fadd float %129, %130 %132 = fsub float 1.000000e+00, %126 %133 = fmul float %34, %126 %134 = fmul float %109, %132 %135 = fadd float %133, %134 %136 = fsub float 1.000000e+00, %126 %137 = fmul float %35, %126 %138 = fmul float %113, %136 %139 = fadd float %137, %138 %140 = fsub float 1.000000e+00, %127 %141 = fmul float %28, %127 %142 = fmul float %131, %140 %143 = fadd float %141, %142 %144 = fsub float 1.000000e+00, %127 %145 = fmul float %32, %127 %146 = fmul float %135, %144 %147 = fadd float %145, %146 %148 = fsub float 1.000000e+00, %127 %149 = fmul float %36, %127 %150 = fmul float %139, %148 %151 = fadd float %149, %150 %152 = fmul float %118, -2.000000e+00 %153 = fadd float %152, 3.000000e+00 %154 = fmul float %119, -2.000000e+00 %155 = fadd float %154, 3.000000e+00 %156 = fmul float %118, %118 %157 = fmul float %119, %119 %158 = fmul float %156, %153 %159 = fmul float %157, %155 %160 = fmul float %159, %158 %161 = fmul float %39, 6.000000e+00 %162 = fadd float %161, 1.000000e+00 %163 = bitcast float %64 to i32 %164 = bitcast float %65 to i32 %165 = insertelement <2 x i32> undef, i32 %163, i32 0 %166 = insertelement <2 x i32> %165, i32 %164, i32 1 %167 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %166, <8 x i32> %41, <4 x i32> %43, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %168 = extractelement <4 x float> %167, i32 0 %169 = extractelement <4 x float> %167, i32 1 %170 = extractelement <4 x float> %167, i32 2 %171 = extractelement <4 x float> %167, i32 3 %172 = bitcast float %68 to i32 %173 = bitcast float %69 to i32 %174 = insertelement <2 x i32> undef, i32 %172, i32 0 %175 = insertelement <2 x i32> %174, i32 %173, i32 1 %176 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %175, <8 x i32> %45, <4 x i32> %47, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %177 = extractelement <4 x float> %176, i32 1 %178 = fmul float %177, %168 %179 = fadd float %178, %171 %180 = fmul float %179, %162 %181 = fadd float %180, %160 %182 = fadd float %181, 0xBFE28F5C20000000 %183 = fmul float %182, 1.000000e+01 %184 = call float @llvm.AMDIL.clamp.(float %183, float 0.000000e+00, float 1.000000e+00) %185 = fmul float %184, -2.000000e+00 %186 = fadd float %185, 3.000000e+00 %187 = fmul float %184, %184 %188 = fmul float %187, %186 %189 = fmul float %186, %187 %190 = fadd float %189, 1.000000e+00 %191 = fsub float %190, %125 %192 = call float @llvm.AMDIL.clamp.(float %191, float 0.000000e+00, float 1.000000e+00) %193 = fmul float %188, 1.000000e+02 %194 = call float @llvm.AMDIL.clamp.(float %193, float 0.000000e+00, float 1.000000e+00) %195 = fmul float %194, -2.000000e+00 %196 = fadd float %195, 3.000000e+00 %197 = fmul float %194, %194 %198 = fmul float %197, %196 %199 = fsub float 1.000000e+00, %198 %200 = fmul float %198, 0x3FA99999A0000000 %201 = fmul float %143, %199 %202 = fadd float %200, %201 %203 = fsub float 1.000000e+00, %198 %204 = fmul float %198, 0x3FA99999A0000000 %205 = fmul float %147, %203 %206 = fadd float %204, %205 %207 = fsub float 1.000000e+00, %198 %208 = fmul float %198, 0x3FA99999A0000000 %209 = fmul float %151, %207 %210 = fadd float %208, %209 %211 = fmul float %39, 7.500000e-01 %212 = fsub float 1.000000e+00, %168 %213 = fmul float %212, %212 %214 = fmul float %213, %213 %215 = fmul float %214, 2.500000e-01 %216 = fadd float %215, %211 %217 = bitcast float %70 to i32 %218 = bitcast float %71 to i32 %219 = insertelement <2 x i32> undef, i32 %217, i32 0 %220 = insertelement <2 x i32> %219, i32 %218, i32 1 %221 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %220, <8 x i32> %57, <4 x i32> %59, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %222 = extractelement <4 x float> %221, i32 0 %223 = extractelement <4 x float> %221, i32 1 %224 = extractelement <4 x float> %221, i32 2 %225 = extractelement <4 x float> %221, i32 3 %226 = fadd float %222, -1.000000e+00 %227 = fadd float %223, -1.000000e+00 %228 = fadd float %224, -1.000000e+00 %229 = fadd float %225, -1.000000e+00 %230 = fmul float %216, %226 %231 = fadd float %230, 1.000000e+00 %232 = fmul float %216, %227 %233 = fadd float %232, 1.000000e+00 %234 = fmul float %216, %228 %235 = fadd float %234, 1.000000e+00 %236 = fmul float %216, %229 %237 = fadd float %236, 1.000000e+00 %238 = fsub float 1.000000e+00, %198 %239 = fmul float %231, %238 %240 = fadd float %198, %239 %241 = fsub float 1.000000e+00, %198 %242 = fmul float %233, %241 %243 = fadd float %198, %242 %244 = fsub float 1.000000e+00, %198 %245 = fmul float %235, %244 %246 = fadd float %198, %245 %247 = fmul float %240, %202 %248 = fmul float %243, %206 %249 = fmul float %246, %210 %250 = fmul float %247, 0x3FD3333340000000 %251 = fmul float %248, 0x3FE2E147A0000000 %252 = fadd float %251, %250 %253 = fmul float %249, 0x3FBC28F5C0000000 %254 = fadd float %252, %253 %255 = fadd float %254, 0xBFB47AE140000000 %256 = fmul float %255, 0x402C924920000000 %257 = call float @llvm.AMDIL.clamp.(float %256, float 0.000000e+00, float 1.000000e+00) %258 = fmul float %257, -2.000000e+00 %259 = fadd float %258, 3.000000e+00 %260 = fmul float %257, %257 %261 = fmul float %260, %259 %262 = fsub float 1.000000e+00, %261 %263 = fmul float %262, 0x3F747AE140000000 %264 = fadd float %263, %247 %265 = fmul float %262, 0x3F747AE140000000 %266 = fadd float %265, %248 %267 = fmul float %262, 0x3F747AE140000000 %268 = fadd float %267, %249 %269 = fmul float %264, %170 %270 = fmul float %266, %170 %271 = fmul float %268, %170 %272 = fmul float %269, 2.000000e+00 %273 = fadd float %272, %247 %274 = fmul float %270, 2.000000e+00 %275 = fadd float %274, %248 %276 = fmul float %271, 2.000000e+00 %277 = fadd float %276, %249 %278 = call float @llvm.AMDIL.clamp.(float %273, float 0.000000e+00, float 1.000000e+00) %279 = call float @llvm.AMDIL.clamp.(float %275, float 0.000000e+00, float 1.000000e+00) %280 = call float @llvm.AMDIL.clamp.(float %277, float 0.000000e+00, float 1.000000e+00) %281 = fmul float %169, %278 %282 = fmul float %169, %279 %283 = fmul float %169, %280 %284 = bitcast float %64 to i32 %285 = bitcast float %65 to i32 %286 = insertelement <2 x i32> undef, i32 %284, i32 0 %287 = insertelement <2 x i32> %286, i32 %285, i32 1 %288 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %287, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %289 = extractelement <4 x float> %288, i32 0 %290 = extractelement <4 x float> %288, i32 1 %291 = extractelement <4 x float> %288, i32 2 %292 = extractelement <4 x float> %288, i32 3 %293 = fmul float %169, %278 %294 = fsub float %289, %293 %295 = fmul float %169, %279 %296 = fsub float %290, %295 %297 = fmul float %169, %280 %298 = fsub float %291, %297 %299 = fmul float %169, %38 %300 = fmul float %237, %299 %301 = fmul float %192, %294 %302 = fadd float %301, %281 %303 = fmul float %192, %296 %304 = fadd float %303, %282 %305 = fmul float %192, %298 %306 = fadd float %305, %283 %307 = fsub float 1.000000e+00, %198 %308 = fmul float %38, %307 %309 = fadd float %198, %308 %310 = fmul float %300, %309 %311 = fmul float %37, %310 %312 = fsub float %292, %311 %313 = fmul float %310, %37 %314 = fmul float %192, %312 %315 = fadd float %314, %313 %316 = call i32 @llvm.SI.packf16(float %302, float %304) %317 = bitcast i32 %316 to float %318 = call i32 @llvm.SI.packf16(float %306, float %315) %319 = bitcast i32 %318 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %317, float %319, float %317, float %319) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A v_mov_b32_e32 v2, 0x41200000 ; 7E0402FF 41200000 v_mov_b32_e32 v3, 0x40400000 ; 7E0602FF 40400000 v_mov_b32_e32 v4, 0x40c00000 ; 7E0802FF 40C00000 v_mov_b32_e32 v5, 0x3f400000 ; 7E0A02FF 3F400000 v_mov_b32_e32 v6, 0xbda3d70a ; 7E0C02FF BDA3D70A v_mov_b32_e32 v7, 0x3ba3d70a ; 7E0E02FF 3BA3D70A v_interp_p1_f32 v8, v0, 0, 0, [m0] ; C8200000 v_interp_p2_f32 v8, [v8], v1, 0, 0, [m0] ; C8210001 v_interp_p1_f32 v9, v0, 1, 0, [m0] ; C8240100 v_interp_p2_f32 v9, [v9], v1, 1, 0, [m0] ; C8250101 v_interp_p1_f32 v10, v0, 2, 0, [m0] ; C8280200 v_interp_p2_f32 v10, [v10], v1, 2, 0, [m0] ; C8290201 v_interp_p1_f32 v11, v0, 3, 0, [m0] ; C82C0300 v_interp_p2_f32 v11, [v11], v1, 3, 0, [m0] ; C82D0301 v_interp_p1_f32 v12, v0, 0, 1, [m0] ; C8300400 v_interp_p2_f32 v12, [v12], v1, 0, 1, [m0] ; C8310401 v_interp_p1_f32 v13, v0, 1, 1, [m0] ; C8340500 v_interp_p2_f32 v13, [v13], v1, 1, 1, [m0] ; C8350501 v_interp_p1_f32 v14, v0, 2, 1, [m0] ; C8380600 v_interp_p2_f32 v14, [v14], v1, 2, 1, [m0] ; C8390601 v_interp_p1_f32 v15, v0, 3, 1, [m0] ; C83C0700 v_interp_p2_f32 v15, [v15], v1, 3, 1, [m0] ; C83D0701 s_load_dwordx4 s[24:27], s[2:3], 0x0 ; C08C0300 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508 s_load_dwordx4 s[40:43], s[4:5], 0xc ; C094050C s_load_dwordx4 s[20:23], s[4:5], 0x10 ; C08A0510 s_load_dwordx4 s[60:63], s[4:5], 0x14 ; C09E0514 s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700 s_load_dwordx8 s[52:59], s[6:7], 0x8 ; C0DA0708 s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s2, s[24:27], 0x0 ; C2011900 s_buffer_load_dword s3, s[24:27], 0x1 ; C2019901 s_buffer_load_dword s4, s[24:27], 0x2 ; C2021902 s_buffer_load_dword s5, s[24:27], 0x3 ; C2029903 s_buffer_load_dword s28, s[24:27], 0x4 ; C20E1904 s_buffer_load_dword s29, s[24:27], 0x5 ; C20E9905 s_buffer_load_dword s30, s[24:27], 0x6 ; C20F1906 s_buffer_load_dword s64, s[24:27], 0x7 ; C2201907 s_buffer_load_dword s65, s[24:27], 0x8 ; C2209908 s_buffer_load_dword s66, s[24:27], 0x9 ; C2211909 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s2 ; 7E000202 s_buffer_load_dword s67, s[24:27], 0xa ; C221990A v_mov_b32_e32 v1, s3 ; 7E020203 s_buffer_load_dword s68, s[24:27], 0xb ; C222190B v_mov_b32_e32 v16, s4 ; 7E200204 s_buffer_load_dword s0, s[24:27], 0xc ; C200190C s_buffer_load_dword s1, s[24:27], 0xe ; C200990E s_buffer_load_dword s69, s[24:27], 0xf ; C222990F v_sub_f32_e32 v0, s28, v0 ; 0800001C v_sub_f32_e32 v1, s29, v1 ; 0802021D v_sub_f32_e32 v16, s30, v16 ; 0820201E s_load_dwordx8 s[72:79], s[6:7], 0x18 ; C0E40718 s_load_dwordx8 s[24:31], s[6:7], 0x20 ; C0CC0720 s_load_dwordx8 s[80:87], s[6:7], 0x28 ; C0E80728 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v4, s69, 1.0 ; D2820004 03C88B04 image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[80:87], s[60:63] ; F0800F00 01F4110A image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[72:79], s[40:43] ; F0800700 01521508 image_sample v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[44:51], s[32:35] ; F0800F00 010B1808 image_sample v10, 2, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[52:59], s[36:39] ; F0800200 012D0A0C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v27, v24, v10 ; 3E361518 v_mad_f32 v0, v17, v0, s2 ; D2820000 000A0111 v_mad_f32 v1, v17, v1, s3 ; D2820001 000E0311 v_mad_f32 v10, v17, v16, s4 ; D282000A 00122111 v_mad_f32 v0, -v18, v0, v0 ; D2820000 24020112 v_mac_f32_e32 v0, s65, v18 ; 3E002441 v_mad_f32 v1, -v18, v1, v1 ; D2820001 24060312 v_mac_f32_e32 v1, s66, v18 ; 3E022442 v_mad_f32 v10, -v18, v10, v10 ; D282000A 242A1512 v_mac_f32_e32 v10, s67, v18 ; 3E142443 v_mad_f32 v0, -v19, v0, v0 ; D2820000 24020113 v_mac_f32_e32 v0, s5, v19 ; 3E002605 v_mad_f32 v1, -v19, v1, v1 ; D2820001 24060313 v_mac_f32_e32 v1, s64, v19 ; 3E022640 v_mad_f32 v10, -v19, v10, v10 ; D282000A 242A1513 v_mac_f32_e32 v10, s68, v19 ; 3E142644 v_add_f32_e32 v11, -0.5, v20 ; 061628F1 v_madmk_f32_e32 v12, v20, v2, 0xc1200000 ; 40180514 C1200000 v_mul_f32_e32 v11, v2, v11 ; 10161702 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_mad_f32 v13, -2.0, v11, v3 ; D282000D 040E16F5 v_mul_f32_e32 v11, v11, v11 ; 1016170B v_mul_f32_e32 v11, v13, v11 ; 1016170D v_mad_f32 v13, -2.0, v12, v3 ; D282000D 040E18F5 v_mul_f32_e32 v12, v12, v12 ; 1018190C v_mul_f32_e32 v12, v13, v12 ; 1018190D v_mad_f32 v0, -v22, v0, v0 ; D2820000 24020116 v_mac_f32_e32 v0, s65, v22 ; 3E002C41 v_mad_f32 v1, -v22, v1, v1 ; D2820001 24060316 v_mac_f32_e32 v1, s66, v22 ; 3E022C42 v_mad_f32 v10, -v22, v10, v10 ; D282000A 242A1516 v_mac_f32_e32 v10, s67, v22 ; 3E142C43 v_mad_f32 v0, -v23, v0, v0 ; D2820000 24020117 v_mac_f32_e32 v0, s5, v23 ; 3E002E05 v_mad_f32 v1, -v23, v1, v1 ; D2820001 24060317 v_mac_f32_e32 v1, s64, v23 ; 3E022E40 v_mad_f32 v10, -v23, v10, v10 ; D282000A 242A1517 v_mac_f32_e32 v10, s68, v23 ; 3E142E44 v_madak_f32_e32 v11, v12, v11, 0xbf147ae1 ; 4216170C BF147AE1 v_mac_f32_e32 v11, v4, v27 ; 3E163704 v_mul_f32_e32 v2, v2, v11 ; 10041702 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mad_f32 v4, -2.0, v2, v3 ; D2820004 040E04F5 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mul_f32_e32 v11, v4, v2 ; 10160504 v_mad_f32 v2, v2, v4, 1.0 ; D2820002 03CA0902 v_subrev_f32_e32 v2, v21, v2 ; 0A040515 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mul_f32_e32 v4, 0x42c80000, v11 ; 100816FF 42C80000 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_mul_f32_e32 v5, s69, v5 ; 100A0A45 v_sub_f32_e32 v11, 1.0, v24 ; 081630F2 v_mad_f32 v11, -v24, v11, v11 ; D282000B 242E1718 v_mul_f32_e32 v11, v11, v11 ; 1016170B v_madmk_f32_e32 v5, v11, v5, 0x3e800000 ; 400A0B0B 3E800000 image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[24:31], s[20:23] ; F0800F00 00A60B0E s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v11, -1.0, v11 ; 061616F3 v_add_f32_e32 v12, -1.0, v12 ; 061818F3 v_add_f32_e32 v13, -1.0, v13 ; 061A1AF3 v_mad_f32 v14, v14, v5, -v5 ; D282000E 84160B0E v_mad_f32 v15, -2.0, v4, v3 ; D282000F 040E08F5 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mul_f32_e32 v16, v15, v4 ; 1020090F v_mad_f32 v4, -v4, v15, 1.0 ; D2820004 23CA1F04 v_mul_f32_e32 v15, 0x3d4ccccd, v16 ; 101E20FF 3D4CCCCD v_mad_f32 v0, v4, v0, v15 ; D2820000 043E0104 v_mad_f32 v1, v4, v1, v15 ; D2820001 043E0304 v_mad_f32 v10, v4, v10, v15 ; D282000A 043E1504 v_mad_f32 v11, v5, v11, 1.0 ; D282000B 03CA1705 v_mad_f32 v12, v5, v12, 1.0 ; D282000C 03CA1905 v_mad_f32 v5, v5, v13, 1.0 ; D2820005 03CA1B05 v_mad_f32 v11, v4, v11, v16 ; D282000B 04421704 v_mad_f32 v12, v4, v12, v16 ; D282000C 04421904 v_mad_f32 v5, v4, v5, v16 ; D2820005 04420B04 v_mul_f32_e32 v13, v0, v11 ; 101A1700 v_mul_f32_e32 v15, v1, v12 ; 101E1901 v_mul_f32_e32 v17, v10, v5 ; 10220B0A v_mul_f32_e32 v13, 0x3e99999a, v13 ; 101A1AFF 3E99999A v_madmk_f32_e32 v13, v15, v13, 0x3f170a3d ; 401A1B0F 3F170A3D v_madmk_f32_e32 v13, v17, v13, 0x3de147ae ; 401A1B11 3DE147AE v_add_f32_e32 v6, v13, v6 ; 060C0D0D v_mul_f32_e32 v6, 0x41649249, v6 ; 100C0CFF 41649249 v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 v_mac_f32_e32 v3, -2.0, v6 ; 3E060CF5 v_mul_f32_e32 v6, v6, v6 ; 100C0D06 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mad_f32 v3, -v3, v7, v7 ; D2820003 241E0F03 v_mad_f32 v6, v0, v11, v3 ; D2820006 040E1700 v_mad_f32 v7, v1, v12, v3 ; D2820007 040E1901 v_mad_f32 v3, v10, v5, v3 ; D2820003 040E0B0A v_mul_f32_e32 v13, v26, v6 ; 101A0D1A v_mul_f32_e32 v15, v26, v7 ; 101E0F1A v_mul_f32_e32 v17, v26, v3 ; 1022071A v_mac_f32_e32 v13, v26, v6 ; 3E1A0D1A v_mac_f32_e32 v13, v0, v11 ; 3E1A1700 v_mac_f32_e32 v15, v26, v7 ; 3E1E0F1A v_mac_f32_e32 v15, v1, v12 ; 3E1E1901 v_mac_f32_e32 v17, v26, v3 ; 3E22071A v_mac_f32_e32 v17, v10, v5 ; 3E220B0A v_add_f32_e64 v0, 0, v13 clamp ; D2060800 00021A80 v_add_f32_e64 v1, 0, v15 clamp ; D2060801 00021E80 v_add_f32_e64 v3, 0, v17 clamp ; D2060803 00022280 v_mul_f32_e32 v5, v0, v25 ; 100A3300 v_mul_f32_e32 v6, v1, v25 ; 100C3301 v_mul_f32_e32 v7, v3, v25 ; 100E3303 image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[12:19], s[8:11] ; F0800F00 00430808 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, -v25, v0, v8 ; D2820000 24220119 v_mad_f32 v1, -v25, v1, v9 ; D2820001 24260319 v_mad_f32 v3, -v25, v3, v10 ; D2820003 242A0719 v_mul_f32_e32 v8, s1, v25 ; 10103201 v_mac_f32_e32 v8, v8, v14 ; 3E101D08 v_mac_f32_e32 v5, v0, v2 ; 3E0A0500 v_mac_f32_e32 v6, v1, v2 ; 3E0C0501 v_mac_f32_e32 v7, v3, v2 ; 3E0E0503 v_mac_f32_e32 v16, s1, v4 ; 3E200801 v_mul_f32_e32 v0, v16, v8 ; 10001110 v_mul_f32_e32 v1, s0, v0 ; 10020000 v_mad_f32 v0, -s0, v0, v11 ; D2820000 242E0000 v_mac_f32_e32 v1, v0, v2 ; 3E020500 v_cvt_pkrtz_f16_f32_e32 v0, v5, v6 ; 5E000D05 v_cvt_pkrtz_f16_f32_e32 v1, v7, v1 ; 5E020307 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 88 VGPRS: 28 Code Size: 1060 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 6.0000, 1.0000, -0.5800, 10.0000} IMM[1] FLT32 { -2.0000, 3.0000, -1.0000, -0.0000} IMM[2] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xw, TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[1].xyyy 3: TEX TEMP[1].y, TEMP[1], SAMP[1], 2D 4: MAD TEMP[0].x, TEMP[1].yyyy, TEMP[0].xxxx, TEMP[0].wwww 5: MAD TEMP[2].x, CONST[3].wwww, IMM[0].xxxx, IMM[0].yyyy 6: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx, IMM[0].zzzz 7: MUL TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 8: MOV_SAT TEMP[2].x, TEMP[0].xxxx 9: MAD TEMP[3].x, TEMP[2].xxxx, IMM[1].xxxx, IMM[1].yyyy 10: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[2].xxxx 11: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx 12: MOV TEMP[2].xy, IN[0].xyyy 13: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 14: MAD TEMP[2].xyz, CONST[3].yyyy, IMM[1].zwww, TEMP[2].xyzz 15: MUL TEMP[1].xyz, IMM[2].xyyy, CONST[3].yyyy 16: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[2].xyzz, TEMP[1].xyzz 17: MOV TEMP[0].w, IMM[0].yyyy 18: MOV OUT[0], TEMP[0] 19: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %27 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 %29 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %30 = load <4 x i32>, <4 x i32> addrspace(2)* %29, align 16, !tbaa !0 %31 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %32 = load <8 x i32>, <8 x i32> addrspace(2)* %31, align 32, !tbaa !0 %33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %34 = load <4 x i32>, <4 x i32> addrspace(2)* %33, align 16, !tbaa !0 %35 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %36 = load <8 x i32>, <8 x i32> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !tbaa !0 %39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %41 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %42 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %43 = bitcast float %39 to i32 %44 = bitcast float %40 to i32 %45 = insertelement <2 x i32> undef, i32 %43, i32 0 %46 = insertelement <2 x i32> %45, i32 %44, i32 1 %47 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %46, <8 x i32> %28, <4 x i32> %30, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 3 %50 = bitcast float %41 to i32 %51 = bitcast float %42 to i32 %52 = insertelement <2 x i32> undef, i32 %50, i32 0 %53 = insertelement <2 x i32> %52, i32 %51, i32 1 %54 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %53, <8 x i32> %32, <4 x i32> %34, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %55 = extractelement <4 x float> %54, i32 1 %56 = fmul float %55, %48 %57 = fadd float %56, %49 %58 = fmul float %26, 6.000000e+00 %59 = fadd float %58, 1.000000e+00 %60 = fmul float %57, %59 %61 = fadd float %60, 0xBFE28F5C20000000 %62 = fmul float %61, 1.000000e+01 %63 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00) %64 = fmul float %63, -2.000000e+00 %65 = fadd float %64, 3.000000e+00 %66 = fmul float %63, %63 %67 = fmul float %66, %65 %68 = bitcast float %39 to i32 %69 = bitcast float %40 to i32 %70 = insertelement <2 x i32> undef, i32 %68, i32 0 %71 = insertelement <2 x i32> %70, i32 %69, i32 1 %72 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %71, <8 x i32> %36, <4 x i32> %38, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = extractelement <4 x float> %72, i32 2 %76 = fsub float %73, %25 %77 = fmul float %25, -0.000000e+00 %78 = fadd float %77, %74 %79 = fmul float %25, -0.000000e+00 %80 = fadd float %79, %75 %81 = fmul float %25, 0.000000e+00 %82 = fmul float %25, 0.000000e+00 %83 = fmul float %67, %76 %84 = fadd float %83, %25 %85 = fmul float %67, %78 %86 = fadd float %85, %81 %87 = fmul float %67, %80 %88 = fadd float %87, %82 %89 = call i32 @llvm.SI.packf16(float %84, float %86) %90 = bitcast i32 %89 to float %91 = call i32 @llvm.SI.packf16(float %88, float 1.000000e+00) %92 = bitcast i32 %91 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %90, float %92, float %90, float %92) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0xf ; C202010F v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 image_sample v[0:1], 9, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800900 00450002 image_sample v4, 2, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[28:35], s[12:15] ; F0800200 00670404 image_sample v[5:7], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[16:19] ; F0800700 00890502 s_buffer_load_dword s0, s[0:3], 0xd ; C200010D s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v1, v0, v4 ; 3E020900 v_mov_b32_e32 v0, 0x40c00000 ; 7E0002FF 40C00000 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, 1.0 ; D2820000 03C80900 v_madak_f32_e32 v0, v1, v0, 0xbf147ae1 ; 42000101 BF147AE1 v_mul_f32_e32 v0, 0x41200000, v0 ; 100000FF 41200000 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_madak_f32_e32 v1, -2.0, v0, 0x40400000 ; 420200F5 40400000 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v0, v1, v0 ; 10000101 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v1, s0, v5 ; 0A020A00 v_mov_b32_e32 v2, 0x80000000 ; 7E0402FF 80000000 v_mad_f32 v3, s0, v2, v6 ; D2820003 041A0400 v_mac_f32_e32 v7, s0, v2 ; 3E0E0400 v_mul_f32_e64 v2, 0, s0 ; D2100002 00000080 v_mad_f32 v1, v0, v1, s0 ; D2820001 00020300 v_mad_f32 v3, v3, v0, v2 ; D2820003 040A0103 v_mac_f32_e32 v2, v7, v0 ; 3E040107 v_cvt_pkrtz_f16_f32_e32 v0, v1, v3 ; 5E000701 v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 8 Code Size: 252 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..54] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0].xy, CONST[48].xyyy, IN[1].xyyy 1: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[48].wwww 3: MOV TEMP[1].z, TEMP[1].xxxx 4: MUL TEMP[0].xy, CONST[49].xyyy, IN[1].xyyy 5: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 6: ADD TEMP[2].x, TEMP[0].xxxx, CONST[49].wwww 7: MOV TEMP[1].w, TEMP[2].xxxx 8: MUL TEMP[0].xy, CONST[50].xyyy, IN[1].xyyy 9: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 10: ADD TEMP[2].x, TEMP[0].xxxx, CONST[50].wwww 11: MUL TEMP[0].xy, CONST[51].xyyy, IN[1].xyyy 12: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 13: ADD TEMP[3].x, TEMP[0].xxxx, CONST[51].wwww 14: MOV TEMP[2].y, TEMP[3].xxxx 15: MUL TEMP[0].xy, CONST[52].xyyy, IN[1].xyyy 16: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 17: ADD TEMP[3].x, TEMP[0].xxxx, CONST[52].wwww 18: MOV TEMP[2].z, TEMP[3].xxxx 19: MUL TEMP[0].xy, CONST[53].xyyy, IN[1].xyyy 20: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 21: ADD TEMP[0].x, TEMP[0].xxxx, CONST[53].wwww 22: MOV TEMP[2].w, TEMP[0].xxxx 23: MOV TEMP[0].xw, IN[0].xxxw 24: MOV TEMP[1].xy, IN[1].xyxx 25: MAD TEMP[3].x, IN[0].zzzz, CONST[0].zzzz, -IN[0].wwww 26: MOV TEMP[0].z, TEMP[3].xxxx 27: MOV TEMP[0].y, -IN[0].yyyy 28: MAD TEMP[0].xy, CONST[54].xyyy, IN[0].wwww, TEMP[0].xyyy 29: MOV OUT[2], TEMP[1] 30: MOV OUT[3], TEMP[2] 31: MOV OUT[0], TEMP[0] 32: MOV OUT[1], IN[0] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 804) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 812) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 816) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 820) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 828) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 832) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 836) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 844) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 848) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 852) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 860) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %8 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %8 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %15, %47 %50 = fmul float %16, %48 %51 = fadd float %50, %49 %52 = fadd float %51, %17 %53 = fmul float %18, %47 %54 = fmul float %19, %48 %55 = fadd float %54, %53 %56 = fadd float %55, %20 %57 = fmul float %21, %47 %58 = fmul float %22, %48 %59 = fadd float %58, %57 %60 = fadd float %59, %23 %61 = fmul float %24, %47 %62 = fmul float %25, %48 %63 = fadd float %62, %61 %64 = fadd float %63, %26 %65 = fmul float %27, %47 %66 = fmul float %28, %48 %67 = fadd float %66, %65 %68 = fadd float %67, %29 %69 = fmul float %30, %47 %70 = fmul float %31, %48 %71 = fadd float %70, %69 %72 = fadd float %71, %32 %73 = fmul float %41, %14 %74 = fsub float %73, %42 %75 = fmul float %33, %42 %76 = fadd float %75, %39 %77 = fmul float %34, %42 %78 = fsub float %77, %40 %79 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.load.const(<16 x i8> %80, i32 0) %82 = fmul float %81, %39 %83 = call float @llvm.SI.load.const(<16 x i8> %80, i32 4) %84 = fmul float %83, %40 %85 = fadd float %82, %84 %86 = call float @llvm.SI.load.const(<16 x i8> %80, i32 8) %87 = fmul float %86, %41 %88 = fadd float %85, %87 %89 = call float @llvm.SI.load.const(<16 x i8> %80, i32 12) %90 = fmul float %89, %42 %91 = fadd float %88, %90 %92 = call float @llvm.SI.load.const(<16 x i8> %80, i32 16) %93 = fmul float %92, %39 %94 = call float @llvm.SI.load.const(<16 x i8> %80, i32 20) %95 = fmul float %94, %40 %96 = fadd float %93, %95 %97 = call float @llvm.SI.load.const(<16 x i8> %80, i32 24) %98 = fmul float %97, %41 %99 = fadd float %96, %98 %100 = call float @llvm.SI.load.const(<16 x i8> %80, i32 28) %101 = fmul float %100, %42 %102 = fadd float %99, %101 %103 = call float @llvm.SI.load.const(<16 x i8> %80, i32 32) %104 = fmul float %103, %39 %105 = call float @llvm.SI.load.const(<16 x i8> %80, i32 36) %106 = fmul float %105, %40 %107 = fadd float %104, %106 %108 = call float @llvm.SI.load.const(<16 x i8> %80, i32 40) %109 = fmul float %108, %41 %110 = fadd float %107, %109 %111 = call float @llvm.SI.load.const(<16 x i8> %80, i32 44) %112 = fmul float %111, %42 %113 = fadd float %110, %112 %114 = call float @llvm.SI.load.const(<16 x i8> %80, i32 48) %115 = fmul float %114, %39 %116 = call float @llvm.SI.load.const(<16 x i8> %80, i32 52) %117 = fmul float %116, %40 %118 = fadd float %115, %117 %119 = call float @llvm.SI.load.const(<16 x i8> %80, i32 56) %120 = fmul float %119, %41 %121 = fadd float %118, %120 %122 = call float @llvm.SI.load.const(<16 x i8> %80, i32 60) %123 = fmul float %122, %42 %124 = fadd float %121, %123 %125 = call float @llvm.SI.load.const(<16 x i8> %80, i32 64) %126 = fmul float %125, %39 %127 = call float @llvm.SI.load.const(<16 x i8> %80, i32 68) %128 = fmul float %127, %40 %129 = fadd float %126, %128 %130 = call float @llvm.SI.load.const(<16 x i8> %80, i32 72) %131 = fmul float %130, %41 %132 = fadd float %129, %131 %133 = call float @llvm.SI.load.const(<16 x i8> %80, i32 76) %134 = fmul float %133, %42 %135 = fadd float %132, %134 %136 = call float @llvm.SI.load.const(<16 x i8> %80, i32 80) %137 = fmul float %136, %39 %138 = call float @llvm.SI.load.const(<16 x i8> %80, i32 84) %139 = fmul float %138, %40 %140 = fadd float %137, %139 %141 = call float @llvm.SI.load.const(<16 x i8> %80, i32 88) %142 = fmul float %141, %41 %143 = fadd float %140, %142 %144 = call float @llvm.SI.load.const(<16 x i8> %80, i32 92) %145 = fmul float %144, %42 %146 = fadd float %143, %145 %147 = call float @llvm.SI.load.const(<16 x i8> %80, i32 96) %148 = fmul float %147, %39 %149 = call float @llvm.SI.load.const(<16 x i8> %80, i32 100) %150 = fmul float %149, %40 %151 = fadd float %148, %150 %152 = call float @llvm.SI.load.const(<16 x i8> %80, i32 104) %153 = fmul float %152, %41 %154 = fadd float %151, %153 %155 = call float @llvm.SI.load.const(<16 x i8> %80, i32 108) %156 = fmul float %155, %42 %157 = fadd float %154, %156 %158 = call float @llvm.SI.load.const(<16 x i8> %80, i32 112) %159 = fmul float %158, %39 %160 = call float @llvm.SI.load.const(<16 x i8> %80, i32 116) %161 = fmul float %160, %40 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %80, i32 120) %164 = fmul float %163, %41 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %80, i32 124) %167 = fmul float %166, %42 %168 = fadd float %165, %167 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %52, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %60, float %64, float %68, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %76, float %78, float %74, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %91, float %102, float %113, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %135, float %146, float %157, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[4:7], 0x2 ; C2040502 s_buffer_load_dword s9, s[4:7], 0xc0 ; C20485C0 s_buffer_load_dword s10, s[4:7], 0xc1 ; C20505C1 s_buffer_load_dword s11, s[4:7], 0xc3 ; C20585C3 s_buffer_load_dword s12, s[4:7], 0xc4 ; C20605C4 s_buffer_load_dword s13, s[4:7], 0xc5 ; C20685C5 s_buffer_load_dword s14, s[4:7], 0xc7 ; C20705C7 s_buffer_load_dword s15, s[4:7], 0xc8 ; C20785C8 s_buffer_load_dword s16, s[4:7], 0xc9 ; C20805C9 s_buffer_load_dword s17, s[4:7], 0xcb ; C20885CB s_buffer_load_dword s18, s[4:7], 0xcc ; C20905CC s_buffer_load_dword s19, s[4:7], 0xcd ; C20985CD s_buffer_load_dword s20, s[4:7], 0xcf ; C20A05CF s_buffer_load_dword s21, s[4:7], 0xd0 ; C20A85D0 s_buffer_load_dword s22, s[4:7], 0xd1 ; C20B05D1 s_buffer_load_dword s23, s[4:7], 0xd3 ; C20B85D3 s_buffer_load_dword s24, s[4:7], 0xd4 ; C20C05D4 s_buffer_load_dword s25, s[4:7], 0xd5 ; C20C85D5 s_buffer_load_dword s26, s[4:7], 0xd7 ; C20D05D7 s_buffer_load_dword s27, s[4:7], 0xd8 ; C20D85D8 s_buffer_load_dword s4, s[4:7], 0xd9 ; C20205D9 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s28, s[0:3], 0x3 ; C20E0103 s_buffer_load_dword s29, s[0:3], 0x4 ; C20E8104 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s31, s[0:3], 0x6 ; C20F8106 s_buffer_load_dword s32, s[0:3], 0x7 ; C2100107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s35, s[0:3], 0xa ; C211810A s_buffer_load_dword s36, s[0:3], 0xb ; C212010B s_buffer_load_dword s37, s[0:3], 0xc ; C212810C s_buffer_load_dword s38, s[0:3], 0xd ; C213010D s_buffer_load_dword s39, s[0:3], 0xe ; C213810E s_buffer_load_dword s40, s[0:3], 0xf ; C214010F s_buffer_load_dword s41, s[0:3], 0x10 ; C2148110 s_buffer_load_dword s42, s[0:3], 0x11 ; C2150111 s_buffer_load_dword s43, s[0:3], 0x12 ; C2158112 s_buffer_load_dword s44, s[0:3], 0x13 ; C2160113 s_buffer_load_dword s45, s[0:3], 0x14 ; C2168114 s_buffer_load_dword s46, s[0:3], 0x15 ; C2170115 s_buffer_load_dword s47, s[0:3], 0x16 ; C2178116 s_buffer_load_dword s48, s[0:3], 0x17 ; C2180117 s_buffer_load_dword s49, s[0:3], 0x18 ; C2188118 s_buffer_load_dword s50, s[0:3], 0x19 ; C2190119 s_buffer_load_dword s51, s[0:3], 0x1a ; C219811A s_buffer_load_dword s52, s[0:3], 0x1b ; C21A011B s_buffer_load_dword s53, s[0:3], 0x1c ; C21A811C s_buffer_load_dword s54, s[0:3], 0x1d ; C21B011D s_buffer_load_dword s55, s[0:3], 0x1e ; C21B811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s11 ; 7E00020B s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v8, s17 ; 7E100211 v_mov_b32_e32 v9, s20 ; 7E120214 v_mov_b32_e32 v10, s23 ; 7E140217 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v0, s9, v5 ; 3E000A09 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mac_f32_e32 v11, s24, v5 ; 3E160A18 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v7, s13, v6 ; 3E0E0C0D v_mac_f32_e32 v8, s16, v6 ; 3E100C10 v_mac_f32_e32 v9, s19, v6 ; 3E120C13 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 v_mac_f32_e32 v10, s22, v6 ; 3E140C16 v_mac_f32_e32 v11, s25, v6 ; 3E160C19 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v3, s8, -v4 ; D2820000 84101103 v_mad_f32 v5, s27, v4, v1 ; D2820005 0406081B v_mad_f32 v6, s4, v4, -v2 ; D2820006 840A0804 v_mul_f32_e32 v7, s6, v2 ; 100E0406 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s30, v2 ; 1010041E v_mac_f32_e32 v7, s5, v1 ; 3E0E0205 exp 15, 12, 0, 0, 0, v5, v6, v0, v4 ; F80000CF 04000605 v_mac_f32_e32 v8, s29, v1 ; 3E10021D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s34, v2 ; 10000422 v_mac_f32_e32 v0, s33, v1 ; 3E000221 v_mul_f32_e32 v5, s38, v2 ; 100A0426 v_mac_f32_e32 v5, s37, v1 ; 3E0A0225 v_mul_f32_e32 v6, s42, v2 ; 100C042A v_mul_f32_e32 v9, s46, v2 ; 1012042E v_mul_f32_e32 v10, s50, v2 ; 10140432 v_mul_f32_e32 v2, s54, v2 ; 10040436 v_mac_f32_e32 v6, s41, v1 ; 3E0C0229 v_mac_f32_e32 v9, s45, v1 ; 3E12022D v_mac_f32_e32 v10, s49, v1 ; 3E140231 v_mac_f32_e32 v2, s53, v1 ; 3E040235 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v8, s31, v3 ; 3E10061F v_mac_f32_e32 v0, s35, v3 ; 3E000623 v_mac_f32_e32 v5, s39, v3 ; 3E0A0627 v_mac_f32_e32 v6, s43, v3 ; 3E0C062B v_mac_f32_e32 v9, s47, v3 ; 3E12062F v_mac_f32_e32 v10, s51, v3 ; 3E140633 v_mac_f32_e32 v2, s55, v3 ; 3E040637 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v8, s32, v4 ; 3E100820 v_mac_f32_e32 v0, s36, v4 ; 3E000824 v_mac_f32_e32 v5, s40, v4 ; 3E0A0828 v_mac_f32_e32 v6, s44, v4 ; 3E0C082C v_mac_f32_e32 v9, s48, v4 ; 3E120830 v_mac_f32_e32 v10, s52, v4 ; 3E140834 v_mac_f32_e32 v2, s0, v4 ; 3E040800 exp 15, 13, 0, 0, 0, v7, v8, v0, v5 ; F80000DF 05000807 exp 15, 14, 0, 1, 0, v6, v9, v10, v2 ; F80008EF 020A0906 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 12 Code Size: 544 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL SVIEW[6], 2D, FLOAT DCL CONST[0..11] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0588, 16.0000} IMM[1] FLT32 { -0.0011, 0.9960, 0.0020, 0.9966} IMM[2] FLT32 { 0.0040, 0.9989, -0.0007, 0.9973} IMM[3] FLT32 { 0.0010, 0.9983, 0.0027, 0.9993} IMM[4] FLT32 { -0.0034, 0.9980, -0.0017, 0.9990} IMM[5] FLT32 { 0.0000, 1.0000, 0.0017, 1.0010} IMM[6] FLT32 { 0.0034, 1.0020, -0.0027, 1.0007} IMM[7] FLT32 { -0.0010, 1.0017, 0.0007, 1.0027} IMM[8] FLT32 { -0.0040, 1.0011, -0.0020, 1.0034} IMM[9] FLT32 { 0.0011, 1.0040, 2.0000, 7.0000} IMM[10] FLT32 { 0.0600, 0.1200, 0.1800, 6.0000} IMM[11] FLT32 { -0.0600, -0.1200, -0.1800, -0.5800} IMM[12] FLT32 { 0.5600, -0.0200, 0.5400, 0.5200} IMM[13] FLT32 { 10.0000, -2.0000, 3.0000, 0.7500} IMM[14] FLT32 { 0.2500, 0.3000, 0.5900, 0.1100} IMM[15] FLT32 { -0.0800, 14.2857, 0.0300, 0.5000} IMM[16] FLT32 { -0.9000, -0.0100, 10.0000, -100.0000} 0: MAD TEMP[0], IN[0].xyxy, IMM[0].xyxy, IMM[1] 1: MOV TEMP[1].xy, TEMP[0].zwww 2: TEX TEMP[1], TEMP[1], SAMP[5], 2D 3: MUL TEMP[1], TEMP[1], IMM[0].zzzz 4: MOV TEMP[2].xy, TEMP[0].xyyy 5: TEX TEMP[2], TEMP[2], SAMP[5], 2D 6: MAD TEMP[0], TEMP[2], IMM[0].zzzz, TEMP[1] 7: MAD TEMP[1], IN[0].xyxy, IMM[0].xyxy, IMM[2] 8: MOV TEMP[2].xy, TEMP[1].xyyy 9: TEX TEMP[2], TEMP[2], SAMP[5], 2D 10: MAD TEMP[0], TEMP[2], IMM[0].zzzz, TEMP[0] 11: MOV TEMP[2].xy, TEMP[1].zwww 12: TEX TEMP[2], TEMP[2], SAMP[5], 2D 13: MAD TEMP[0], TEMP[2], IMM[0].zzzz, TEMP[0] 14: MAD TEMP[1], IN[0].xyxy, IMM[0].xyxy, IMM[3] 15: MOV TEMP[2].xy, TEMP[1].xyyy 16: TEX TEMP[2], TEMP[2], SAMP[5], 2D 17: MAD TEMP[0], TEMP[2], IMM[0].zzzz, TEMP[0] 18: MOV TEMP[2].xy, TEMP[1].zwww 19: TEX TEMP[2], TEMP[2], SAMP[5], 2D 20: MAD TEMP[0], TEMP[2], IMM[0].zzzz, TEMP[0] 21: MAD TEMP[1], IN[0].xyxy, IMM[0].xyxy, IMM[4] 22: MOV TEMP[2].xy, TEMP[1].xyyy 23: TEX TEMP[2], TEMP[2], SAMP[5], 2D 24: MAD TEMP[0], TEMP[2], IMM[0].zzzz, TEMP[0] 25: MOV TEMP[2].xy, TEMP[1].zwww 26: TEX TEMP[2], TEMP[2], SAMP[5], 2D 27: MAD TEMP[0], TEMP[2], IMM[0].zzzz, TEMP[0] 28: MAD TEMP[1], IN[0].xyxy, IMM[0].xyxy, IMM[5] 29: MOV TEMP[2].xy, TEMP[1].xyyy 30: TEX TEMP[2], TEMP[2], SAMP[5], 2D 31: MAD TEMP[0], TEMP[2], IMM[0].zzzz, TEMP[0] 32: MOV TEMP[2].xy, TEMP[1].zwww 33: TEX TEMP[2], TEMP[2], SAMP[5], 2D 34: MAD TEMP[0], TEMP[2], IMM[0].zzzz, TEMP[0] 35: MAD TEMP[1], IN[0].xyxy, IMM[0].xyxy, IMM[6] 36: MOV TEMP[2].xy, TEMP[1].xyyy 37: TEX TEMP[2], TEMP[2], SAMP[5], 2D 38: MAD TEMP[0], TEMP[2], IMM[0].zzzz, TEMP[0] 39: MOV TEMP[2].xy, TEMP[1].zwww 40: TEX TEMP[2], TEMP[2], SAMP[5], 2D 41: MAD TEMP[0], TEMP[2], IMM[0].zzzz, TEMP[0] 42: MAD TEMP[1], IN[0].xyxy, IMM[0].xyxy, IMM[7] 43: MOV TEMP[2].xy, TEMP[1].xyyy 44: TEX TEMP[2], TEMP[2], SAMP[5], 2D 45: MAD TEMP[0], TEMP[2], IMM[0].zzzz, TEMP[0] 46: MOV TEMP[2].xy, TEMP[1].zwww 47: TEX TEMP[2], TEMP[2], SAMP[5], 2D 48: MAD TEMP[0], TEMP[2], IMM[0].zzzz, TEMP[0] 49: MAD TEMP[2], IN[0].xyxy, IMM[0].xyxy, IMM[8] 50: MOV TEMP[3].xy, TEMP[2].xyyy 51: TEX TEMP[3], TEMP[3], SAMP[5], 2D 52: MAD TEMP[0], TEMP[3], IMM[0].zzzz, TEMP[0] 53: MOV TEMP[3].xy, TEMP[2].zwww 54: TEX TEMP[3], TEMP[3], SAMP[5], 2D 55: MAD TEMP[2], TEMP[3], IMM[0].zzzz, TEMP[0] 56: MAD TEMP[0].xy, IN[0].xyyy, IMM[0].xyyy, IMM[9].xyyy 57: MOV TEMP[3].xy, TEMP[0].xyyy 58: TEX TEMP[3], TEMP[3], SAMP[5], 2D 59: MAD TEMP[2], TEMP[3], IMM[0].zzzz, TEMP[2] 60: MUL TEMP[3].x, TEMP[2].wwww, IMM[0].wwww 61: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz 62: DP2 TEMP[3].x, TEMP[2].yzzz, CONST[10].xyyy 63: ADD TEMP[0].x, TEMP[3].xxxx, CONST[10].wwww 64: DP2 TEMP[3].x, TEMP[2].yzzz, CONST[11].xyyy 65: ADD TEMP[3].x, TEMP[3].xxxx, CONST[11].wwww 66: MOV TEMP[0].y, TEMP[3].xxxx 67: MOV TEMP[3].xy, TEMP[0].xyyy 68: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 2D 69: DP2 TEMP[4].x, TEMP[2].xzzz, CONST[10].xyyy 70: ADD TEMP[1].x, TEMP[4].xxxx, CONST[10].wwww 71: DP2 TEMP[4].x, TEMP[2].xzzz, CONST[11].xyyy 72: ADD TEMP[4].x, TEMP[4].xxxx, CONST[11].wwww 73: MOV TEMP[1].y, TEMP[4].xxxx 74: MOV TEMP[4].xy, IN[0].xyyy 75: TEX TEMP[4].xyz, TEMP[4], SAMP[4], 2D 76: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[9].zzzz, IMM[0].yyyy 77: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 78: RSQ TEMP[5].x, TEMP[5].xxxx 79: MUL TEMP[5].xy, TEMP[5].xxxx, TEMP[4].yzzz 80: ABS TEMP[6].x, TEMP[5].xxxx 81: POW TEMP[6].x, TEMP[6].xxxx, IMM[9].wwww 82: MOV TEMP[7].xy, TEMP[1].xyyy 83: TEX TEMP[7].xyz, TEMP[7], SAMP[6], 2D 84: LRP TEMP[4].xyz, TEMP[6].xxxx, TEMP[7].xyzz, TEMP[3].xyzz 85: DP2 TEMP[3].x, TEMP[2].yxxx, CONST[10].xyyy 86: ADD TEMP[0].x, TEMP[3].xxxx, CONST[10].wwww 87: DP2 TEMP[3].x, TEMP[2].yxxx, CONST[11].xyyy 88: ADD TEMP[3].x, TEMP[3].xxxx, CONST[11].wwww 89: MOV TEMP[0].y, TEMP[3].xxxx 90: MOV TEMP[3].xy, TEMP[0].xyyy 91: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 2D 92: ABS TEMP[5].x, TEMP[5].yyyy 93: POW TEMP[5].x, TEMP[5].xxxx, IMM[9].wwww 94: LRP TEMP[0].xyz, TEMP[5].xxxx, TEMP[3].xyzz, TEMP[4].xyzz 95: MAD TEMP[1], CONST[3].wwww, IMM[11].xxyz, IMM[12] 96: ADD TEMP[2].xy, -TEMP[1].xzzz, TEMP[1].zwww 97: RCP TEMP[2].x, TEMP[2].xxxx 98: RCP TEMP[3].x, TEMP[2].yyyy 99: MOV TEMP[2].y, TEMP[3].xxxx 100: MAD TEMP[3].x, CONST[3].wwww, IMM[10].wwww, IMM[0].xxxx 101: MOV TEMP[5].xy, IN[1].xyyy 102: TEX TEMP[5].y, TEMP[5], SAMP[1], 2D 103: MOV TEMP[6].xy, IN[0].xyyy 104: TEX TEMP[6], TEMP[6], SAMP[0], 2D 105: MAD TEMP[5].x, TEMP[5].yyyy, TEMP[6].xxxx, TEMP[6].wwww 106: MAD TEMP[7].xy, TEMP[5].xxxx, TEMP[3].xxxx, -TEMP[1].xzzz 107: MAD TEMP[3].x, TEMP[5].xxxx, TEMP[3].xxxx, IMM[11].wwww 108: RCP TEMP[5].x, TEMP[1].yyyy 109: MUL TEMP[5].x, TEMP[3].xxxx, TEMP[5].xxxx 110: MOV_SAT TEMP[5].x, TEMP[5].xxxx 111: MUL TEMP[3].x, TEMP[3].xxxx, IMM[13].xxxx 112: MOV_SAT TEMP[3].x, TEMP[3].xxxx 113: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[7].xyyy 114: MOV_SAT TEMP[7].xy, TEMP[2].xyyy 115: MAD TEMP[1].x, TEMP[7].xxxx, IMM[13].yyyy, IMM[13].zzzz 116: MUL TEMP[2].x, TEMP[7].xxxx, TEMP[7].xxxx 117: MUL TEMP[8].x, TEMP[2].xxxx, TEMP[1].xxxx 118: MOV TEMP[1].y, TEMP[8].xxxx 119: MAD TEMP[2].x, TEMP[7].yyyy, IMM[13].yyyy, IMM[13].zzzz 120: MUL TEMP[7].x, TEMP[7].yyyy, TEMP[7].yyyy 121: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[2].xxxx 122: MOV TEMP[1].z, TEMP[7].xxxx 123: MAD TEMP[2].x, TEMP[5].xxxx, IMM[13].yyyy, IMM[13].zzzz 124: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx 125: MUL TEMP[1].x, TEMP[5].xxxx, TEMP[2].xxxx 126: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz 127: ADD TEMP[1].xyz, -CONST[0].xyzz, CONST[1].xyzz 128: MAD TEMP[1].xyz, TEMP[0].xxxx, TEMP[1].xyzz, CONST[0].xyzz 129: LRP TEMP[4].xyz, TEMP[0].yyyy, CONST[2].xyzz, TEMP[1].xyzz 130: ADD TEMP[1].x, -TEMP[4].xxxx, CONST[0].wwww 131: ADD TEMP[5].x, -TEMP[4].yyyy, CONST[1].wwww 132: MOV TEMP[1].y, TEMP[5].xxxx 133: ADD TEMP[5].x, -TEMP[4].zzzz, CONST[2].wwww 134: MOV TEMP[1].z, TEMP[5].xxxx 135: MAD TEMP[0].xyz, TEMP[0].zzzz, TEMP[1].xyzz, TEMP[4].xyzz 136: MUL TEMP[2].x, CONST[3].wwww, IMM[13].wwww 137: ADD TEMP[5].x, -TEMP[6].xxxx, IMM[0].xxxx 138: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx 139: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx 140: MAD TEMP[2].x, TEMP[5].xxxx, IMM[14].xxxx, TEMP[2].xxxx 141: MOV TEMP[5].xy, IN[1].zwww 142: TEX TEMP[5], TEMP[5], SAMP[3], 2D 143: ADD TEMP[5], TEMP[5], IMM[0].yyyy 144: MAD TEMP[4], TEMP[2].xxxx, TEMP[5], IMM[0].xxxx 145: MUL TEMP[5].xyz, TEMP[0].xyzz, TEMP[4].xyzz 146: DP3 TEMP[7].x, TEMP[5].xyzz, IMM[14].yzww 147: ADD TEMP[0].x, TEMP[7].xxxx, IMM[15].xxxx 148: MUL TEMP[0].x, TEMP[0].xxxx, IMM[15].yyyy 149: MOV_SAT TEMP[7].x, TEMP[0].xxxx 150: MAD TEMP[8].x, TEMP[7].xxxx, IMM[13].yyyy, IMM[13].zzzz 151: MUL TEMP[0].x, TEMP[7].xxxx, TEMP[7].xxxx 152: MAD TEMP[0].x, TEMP[8].xxxx, -TEMP[0].xxxx, IMM[0].xxxx 153: MAD TEMP[0].xyz, TEMP[0].xxxx, IMM[15].zzzz, TEMP[5].xyzz 154: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[6].zzzz 155: MAD TEMP[5].xyz, TEMP[0].xyzz, IMM[15].wwww, TEMP[5].xyzz 156: MOV_SAT TEMP[5].xyz, TEMP[5].xyzz 157: MUL TEMP[0].xyz, TEMP[6].yyyy, TEMP[5].xyzz 158: MOV TEMP[7].xy, IN[0].xyyy 159: TEX TEMP[7], TEMP[7], SAMP[2], 2D 160: MAD TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].yyyy, TEMP[7].xyzz 161: MUL TEMP[6].x, TEMP[6].yyyy, CONST[3].zzzz 162: MUL TEMP[4].x, TEMP[4].wwww, TEMP[6].xxxx 163: MAD TEMP[1].x, TEMP[3].xxxx, IMM[13].yyyy, IMM[13].zzzz 164: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx 165: MUL TEMP[6].x, TEMP[3].xxxx, TEMP[1].xxxx 166: MAD TEMP[1].xy, TEMP[1].xxxx, TEMP[3].xxxx, IMM[16].xyyy 167: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[16].zwww 168: MAD TEMP[0].xyz, TEMP[6].xxxx, TEMP[5].xyzz, TEMP[0].xyzz 169: MAX TEMP[3].x, TEMP[1].yyyy, IMM[5].xxxx 170: MOV_SAT TEMP[1].x, TEMP[1].xxxx 171: MAD TEMP[5].x, TEMP[3].xxxx, IMM[13].yyyy, IMM[13].zzzz 172: MUL TEMP[2].x, TEMP[3].xxxx, TEMP[3].xxxx 173: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[5].xxxx 174: MAD TEMP[3].x, TEMP[4].xxxx, -TEMP[2].xxxx, TEMP[7].wwww 175: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx 176: MAD TEMP[4].x, TEMP[1].xxxx, IMM[13].yyyy, IMM[13].zzzz 177: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx 178: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx 179: MAD TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx, TEMP[2].xxxx 180: MOV TEMP[0].w, TEMP[1].xxxx 181: MOV OUT[0], TEMP[0] 182: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 160) %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 164) %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 172) %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 176) %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 180) %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 188) %45 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %46 = load <8 x i32>, <8 x i32> addrspace(2)* %45, align 32, !tbaa !0 %47 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %48 = load <4 x i32>, <4 x i32> addrspace(2)* %47, align 16, !tbaa !0 %49 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %50 = load <8 x i32>, <8 x i32> addrspace(2)* %49, align 32, !tbaa !0 %51 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %54 = load <8 x i32>, <8 x i32> addrspace(2)* %53, align 32, !tbaa !0 %55 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %58 = load <8 x i32>, <8 x i32> addrspace(2)* %57, align 32, !tbaa !0 %59 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %60 = load <4 x i32>, <4 x i32> addrspace(2)* %59, align 16, !tbaa !0 %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0 %63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %64 = load <4 x i32>, <4 x i32> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %66 = load <8 x i32>, <8 x i32> addrspace(2)* %65, align 32, !tbaa !0 %67 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %68 = load <4 x i32>, <4 x i32> addrspace(2)* %67, align 16, !tbaa !0 %69 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %70 = load <8 x i32>, <8 x i32> addrspace(2)* %69, align 32, !tbaa !0 %71 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %72 = load <4 x i32>, <4 x i32> addrspace(2)* %71, align 16, !tbaa !0 %73 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %74 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %75 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %76 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %77 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %78 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %79 = fadd float %73, 0xBF5191B840000000 %80 = fsub float 0x3FEFDF3720000000, %74 %81 = fadd float %73, 0x3F5FFFFAA0000000 %82 = fsub float 0x3FEFE44980000000, %74 %83 = bitcast float %81 to i32 %84 = bitcast float %82 to i32 %85 = insertelement <2 x i32> undef, i32 %83, i32 0 %86 = insertelement <2 x i32> %85, i32 %84, i32 1 %87 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %86, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %88 = extractelement <4 x float> %87, i32 0 %89 = extractelement <4 x float> %87, i32 1 %90 = extractelement <4 x float> %87, i32 2 %91 = extractelement <4 x float> %87, i32 3 %92 = fmul float %88, 0x3FAE1E1E20000000 %93 = fmul float %89, 0x3FAE1E1E20000000 %94 = fmul float %90, 0x3FAE1E1E20000000 %95 = fmul float %91, 0x3FAE1E1E20000000 %96 = bitcast float %79 to i32 %97 = bitcast float %80 to i32 %98 = insertelement <2 x i32> undef, i32 %96, i32 0 %99 = insertelement <2 x i32> %98, i32 %97, i32 1 %100 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %99, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %101 = extractelement <4 x float> %100, i32 0 %102 = extractelement <4 x float> %100, i32 1 %103 = extractelement <4 x float> %100, i32 2 %104 = extractelement <4 x float> %100, i32 3 %105 = fmul float %101, 0x3FAE1E1E20000000 %106 = fadd float %105, %92 %107 = fmul float %102, 0x3FAE1E1E20000000 %108 = fadd float %107, %93 %109 = fmul float %103, 0x3FAE1E1E20000000 %110 = fadd float %109, %94 %111 = fmul float %104, 0x3FAE1E1E20000000 %112 = fadd float %111, %95 %113 = fadd float %73, 0x3F70646EC0000000 %114 = fsub float 0x3FEFF73720000000, %74 %115 = fadd float %73, 0xBF476CFB80000000 %116 = fsub float 0x3FEFEA24C0000000, %74 %117 = bitcast float %113 to i32 %118 = bitcast float %114 to i32 %119 = insertelement <2 x i32> undef, i32 %117, i32 0 %120 = insertelement <2 x i32> %119, i32 %118, i32 1 %121 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %120, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %122 = extractelement <4 x float> %121, i32 0 %123 = extractelement <4 x float> %121, i32 1 %124 = extractelement <4 x float> %121, i32 2 %125 = extractelement <4 x float> %121, i32 3 %126 = fmul float %122, 0x3FAE1E1E20000000 %127 = fadd float %126, %106 %128 = fmul float %123, 0x3FAE1E1E20000000 %129 = fadd float %128, %108 %130 = fmul float %124, 0x3FAE1E1E20000000 %131 = fadd float %130, %110 %132 = fmul float %125, 0x3FAE1E1E20000000 %133 = fadd float %132, %112 %134 = bitcast float %115 to i32 %135 = bitcast float %116 to i32 %136 = insertelement <2 x i32> undef, i32 %134, i32 0 %137 = insertelement <2 x i32> %136, i32 %135, i32 1 %138 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %137, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %139 = extractelement <4 x float> %138, i32 0 %140 = extractelement <4 x float> %138, i32 1 %141 = extractelement <4 x float> %138, i32 2 %142 = extractelement <4 x float> %138, i32 3 %143 = fmul float %139, 0x3FAE1E1E20000000 %144 = fadd float %143, %127 %145 = fmul float %140, 0x3FAE1E1E20000000 %146 = fadd float %145, %129 %147 = fmul float %141, 0x3FAE1E1E20000000 %148 = fadd float %147, %131 %149 = fmul float %142, 0x3FAE1E1E20000000 %150 = fadd float %149, %133 %151 = fadd float %73, 0x3F500002A0000000 %152 = fsub float 0x3FEFF224C0000000, %74 %153 = fadd float %73, 0x3F65DB3E60000000 %154 = fsub float 0x3FEFFA24C0000000, %74 %155 = bitcast float %151 to i32 %156 = bitcast float %152 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %158, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %160 = extractelement <4 x float> %159, i32 0 %161 = extractelement <4 x float> %159, i32 1 %162 = extractelement <4 x float> %159, i32 2 %163 = extractelement <4 x float> %159, i32 3 %164 = fmul float %160, 0x3FAE1E1E20000000 %165 = fadd float %164, %144 %166 = fmul float %161, 0x3FAE1E1E20000000 %167 = fadd float %166, %146 %168 = fmul float %162, 0x3FAE1E1E20000000 %169 = fadd float %168, %148 %170 = fmul float %163, 0x3FAE1E1E20000000 %171 = fadd float %170, %150 %172 = bitcast float %153 to i32 %173 = bitcast float %154 to i32 %174 = insertelement <2 x i32> undef, i32 %172, i32 0 %175 = insertelement <2 x i32> %174, i32 %173, i32 1 %176 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %175, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %177 = extractelement <4 x float> %176, i32 0 %178 = extractelement <4 x float> %176, i32 1 %179 = extractelement <4 x float> %176, i32 2 %180 = extractelement <4 x float> %176, i32 3 %181 = fmul float %177, 0x3FAE1E1E20000000 %182 = fadd float %181, %165 %183 = fmul float %178, 0x3FAE1E1E20000000 %184 = fadd float %183, %167 %185 = fmul float %179, 0x3FAE1E1E20000000 %186 = fadd float %185, %169 %187 = fmul float %180, 0x3FAE1E1E20000000 %188 = fadd float %187, %171 %189 = fadd float %73, 0xBF6BB67A00000000 %190 = fsub float 0x3FEFF00000000000, %74 %191 = fadd float %73, 0xBF5BB67F60000000 %192 = fsub float 0x3FEFF80000000000, %74 %193 = bitcast float %189 to i32 %194 = bitcast float %190 to i32 %195 = insertelement <2 x i32> undef, i32 %193, i32 0 %196 = insertelement <2 x i32> %195, i32 %194, i32 1 %197 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %196, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %198 = extractelement <4 x float> %197, i32 0 %199 = extractelement <4 x float> %197, i32 1 %200 = extractelement <4 x float> %197, i32 2 %201 = extractelement <4 x float> %197, i32 3 %202 = fmul float %198, 0x3FAE1E1E20000000 %203 = fadd float %202, %182 %204 = fmul float %199, 0x3FAE1E1E20000000 %205 = fadd float %204, %184 %206 = fmul float %200, 0x3FAE1E1E20000000 %207 = fadd float %206, %186 %208 = fmul float %201, 0x3FAE1E1E20000000 %209 = fadd float %208, %188 %210 = bitcast float %191 to i32 %211 = bitcast float %192 to i32 %212 = insertelement <2 x i32> undef, i32 %210, i32 0 %213 = insertelement <2 x i32> %212, i32 %211, i32 1 %214 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %213, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %215 = extractelement <4 x float> %214, i32 0 %216 = extractelement <4 x float> %214, i32 1 %217 = extractelement <4 x float> %214, i32 2 %218 = extractelement <4 x float> %214, i32 3 %219 = fmul float %215, 0x3FAE1E1E20000000 %220 = fadd float %219, %203 %221 = fmul float %216, 0x3FAE1E1E20000000 %222 = fadd float %221, %205 %223 = fmul float %217, 0x3FAE1E1E20000000 %224 = fadd float %223, %207 %225 = fmul float %218, 0x3FAE1E1E20000000 %226 = fadd float %225, %209 %227 = fadd float %73, 0.000000e+00 %228 = fsub float 1.000000e+00, %74 %229 = fadd float %73, 0x3F5BB67F60000000 %230 = fsub float 0x3FF0040000000000, %74 %231 = bitcast float %227 to i32 %232 = bitcast float %228 to i32 %233 = insertelement <2 x i32> undef, i32 %231, i32 0 %234 = insertelement <2 x i32> %233, i32 %232, i32 1 %235 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %234, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %236 = extractelement <4 x float> %235, i32 0 %237 = extractelement <4 x float> %235, i32 1 %238 = extractelement <4 x float> %235, i32 2 %239 = extractelement <4 x float> %235, i32 3 %240 = fmul float %236, 0x3FAE1E1E20000000 %241 = fadd float %240, %220 %242 = fmul float %237, 0x3FAE1E1E20000000 %243 = fadd float %242, %222 %244 = fmul float %238, 0x3FAE1E1E20000000 %245 = fadd float %244, %224 %246 = fmul float %239, 0x3FAE1E1E20000000 %247 = fadd float %246, %226 %248 = bitcast float %229 to i32 %249 = bitcast float %230 to i32 %250 = insertelement <2 x i32> undef, i32 %248, i32 0 %251 = insertelement <2 x i32> %250, i32 %249, i32 1 %252 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %251, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %253 = extractelement <4 x float> %252, i32 0 %254 = extractelement <4 x float> %252, i32 1 %255 = extractelement <4 x float> %252, i32 2 %256 = extractelement <4 x float> %252, i32 3 %257 = fmul float %253, 0x3FAE1E1E20000000 %258 = fadd float %257, %241 %259 = fmul float %254, 0x3FAE1E1E20000000 %260 = fadd float %259, %243 %261 = fmul float %255, 0x3FAE1E1E20000000 %262 = fadd float %261, %245 %263 = fmul float %256, 0x3FAE1E1E20000000 %264 = fadd float %263, %247 %265 = fadd float %73, 0x3F6BB67A00000000 %266 = fsub float 0x3FF0080000000000, %74 %267 = fadd float %73, 0xBF65DB3E60000000 %268 = fsub float 0x3FF002EDA0000000, %74 %269 = bitcast float %265 to i32 %270 = bitcast float %266 to i32 %271 = insertelement <2 x i32> undef, i32 %269, i32 0 %272 = insertelement <2 x i32> %271, i32 %270, i32 1 %273 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %272, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %274 = extractelement <4 x float> %273, i32 0 %275 = extractelement <4 x float> %273, i32 1 %276 = extractelement <4 x float> %273, i32 2 %277 = extractelement <4 x float> %273, i32 3 %278 = fmul float %274, 0x3FAE1E1E20000000 %279 = fadd float %278, %258 %280 = fmul float %275, 0x3FAE1E1E20000000 %281 = fadd float %280, %260 %282 = fmul float %276, 0x3FAE1E1E20000000 %283 = fadd float %282, %262 %284 = fmul float %277, 0x3FAE1E1E20000000 %285 = fadd float %284, %264 %286 = bitcast float %267 to i32 %287 = bitcast float %268 to i32 %288 = insertelement <2 x i32> undef, i32 %286, i32 0 %289 = insertelement <2 x i32> %288, i32 %287, i32 1 %290 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %289, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %291 = extractelement <4 x float> %290, i32 0 %292 = extractelement <4 x float> %290, i32 1 %293 = extractelement <4 x float> %290, i32 2 %294 = extractelement <4 x float> %290, i32 3 %295 = fmul float %291, 0x3FAE1E1E20000000 %296 = fadd float %295, %279 %297 = fmul float %292, 0x3FAE1E1E20000000 %298 = fadd float %297, %281 %299 = fmul float %293, 0x3FAE1E1E20000000 %300 = fadd float %299, %283 %301 = fmul float %294, 0x3FAE1E1E20000000 %302 = fadd float %301, %285 %303 = fadd float %73, 0xBF500002A0000000 %304 = fsub float 0x3FF006EDA0000000, %74 %305 = fadd float %73, 0x3F476CFB80000000 %306 = fsub float 0x3FF00AEDA0000000, %74 %307 = bitcast float %303 to i32 %308 = bitcast float %304 to i32 %309 = insertelement <2 x i32> undef, i32 %307, i32 0 %310 = insertelement <2 x i32> %309, i32 %308, i32 1 %311 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %310, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %312 = extractelement <4 x float> %311, i32 0 %313 = extractelement <4 x float> %311, i32 1 %314 = extractelement <4 x float> %311, i32 2 %315 = extractelement <4 x float> %311, i32 3 %316 = fmul float %312, 0x3FAE1E1E20000000 %317 = fadd float %316, %296 %318 = fmul float %313, 0x3FAE1E1E20000000 %319 = fadd float %318, %298 %320 = fmul float %314, 0x3FAE1E1E20000000 %321 = fadd float %320, %300 %322 = fmul float %315, 0x3FAE1E1E20000000 %323 = fadd float %322, %302 %324 = bitcast float %305 to i32 %325 = bitcast float %306 to i32 %326 = insertelement <2 x i32> undef, i32 %324, i32 0 %327 = insertelement <2 x i32> %326, i32 %325, i32 1 %328 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %327, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %329 = extractelement <4 x float> %328, i32 0 %330 = extractelement <4 x float> %328, i32 1 %331 = extractelement <4 x float> %328, i32 2 %332 = extractelement <4 x float> %328, i32 3 %333 = fmul float %329, 0x3FAE1E1E20000000 %334 = fadd float %333, %317 %335 = fmul float %330, 0x3FAE1E1E20000000 %336 = fadd float %335, %319 %337 = fmul float %331, 0x3FAE1E1E20000000 %338 = fadd float %337, %321 %339 = fmul float %332, 0x3FAE1E1E20000000 %340 = fadd float %339, %323 %341 = fadd float %73, 0xBF70646EC0000000 %342 = fsub float 0x3FF0046460000000, %74 %343 = fadd float %73, 0xBF5FFFFAA0000000 %344 = fsub float 0x3FF00DDB40000000, %74 %345 = bitcast float %341 to i32 %346 = bitcast float %342 to i32 %347 = insertelement <2 x i32> undef, i32 %345, i32 0 %348 = insertelement <2 x i32> %347, i32 %346, i32 1 %349 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %348, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %350 = extractelement <4 x float> %349, i32 0 %351 = extractelement <4 x float> %349, i32 1 %352 = extractelement <4 x float> %349, i32 2 %353 = extractelement <4 x float> %349, i32 3 %354 = fmul float %350, 0x3FAE1E1E20000000 %355 = fadd float %354, %334 %356 = fmul float %351, 0x3FAE1E1E20000000 %357 = fadd float %356, %336 %358 = fmul float %352, 0x3FAE1E1E20000000 %359 = fadd float %358, %338 %360 = fmul float %353, 0x3FAE1E1E20000000 %361 = fadd float %360, %340 %362 = bitcast float %343 to i32 %363 = bitcast float %344 to i32 %364 = insertelement <2 x i32> undef, i32 %362, i32 0 %365 = insertelement <2 x i32> %364, i32 %363, i32 1 %366 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %365, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %367 = extractelement <4 x float> %366, i32 0 %368 = extractelement <4 x float> %366, i32 1 %369 = extractelement <4 x float> %366, i32 2 %370 = extractelement <4 x float> %366, i32 3 %371 = fmul float %367, 0x3FAE1E1E20000000 %372 = fadd float %371, %355 %373 = fmul float %368, 0x3FAE1E1E20000000 %374 = fadd float %373, %357 %375 = fmul float %369, 0x3FAE1E1E20000000 %376 = fadd float %375, %359 %377 = fmul float %370, 0x3FAE1E1E20000000 %378 = fadd float %377, %361 %379 = fadd float %73, 0x3F5191B840000000 %380 = fsub float 0x3FF0106460000000, %74 %381 = bitcast float %379 to i32 %382 = bitcast float %380 to i32 %383 = insertelement <2 x i32> undef, i32 %381, i32 0 %384 = insertelement <2 x i32> %383, i32 %382, i32 1 %385 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %384, <8 x i32> %66, <4 x i32> %68, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %386 = extractelement <4 x float> %385, i32 0 %387 = extractelement <4 x float> %385, i32 1 %388 = extractelement <4 x float> %385, i32 2 %389 = extractelement <4 x float> %385, i32 3 %390 = fmul float %386, 0x3FAE1E1E20000000 %391 = fadd float %390, %372 %392 = fmul float %387, 0x3FAE1E1E20000000 %393 = fadd float %392, %374 %394 = fmul float %388, 0x3FAE1E1E20000000 %395 = fadd float %394, %376 %396 = fmul float %389, 0x3FAE1E1E20000000 %397 = fadd float %396, %378 %398 = fmul float %397, 1.600000e+01 %399 = fmul float %398, %391 %400 = fmul float %398, %393 %401 = fmul float %398, %395 %402 = fmul float %400, %39 %403 = fmul float %401, %40 %404 = fadd float %402, %403 %405 = fadd float %404, %41 %406 = fmul float %400, %42 %407 = fmul float %401, %43 %408 = fadd float %406, %407 %409 = fadd float %408, %44 %410 = bitcast float %405 to i32 %411 = bitcast float %409 to i32 %412 = insertelement <2 x i32> undef, i32 %410, i32 0 %413 = insertelement <2 x i32> %412, i32 %411, i32 1 %414 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %413, <8 x i32> %70, <4 x i32> %72, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %415 = extractelement <4 x float> %414, i32 0 %416 = extractelement <4 x float> %414, i32 1 %417 = extractelement <4 x float> %414, i32 2 %418 = fmul float %399, %39 %419 = fmul float %401, %40 %420 = fadd float %418, %419 %421 = fadd float %420, %41 %422 = fmul float %399, %42 %423 = fmul float %401, %43 %424 = fadd float %422, %423 %425 = fadd float %424, %44 %426 = bitcast float %73 to i32 %427 = bitcast float %74 to i32 %428 = insertelement <2 x i32> undef, i32 %426, i32 0 %429 = insertelement <2 x i32> %428, i32 %427, i32 1 %430 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %429, <8 x i32> %62, <4 x i32> %64, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %431 = extractelement <4 x float> %430, i32 0 %432 = extractelement <4 x float> %430, i32 1 %433 = extractelement <4 x float> %430, i32 2 %434 = fmul float %431, 2.000000e+00 %435 = fadd float %434, -1.000000e+00 %436 = fmul float %432, 2.000000e+00 %437 = fadd float %436, -1.000000e+00 %438 = fmul float %433, 2.000000e+00 %439 = fadd float %438, -1.000000e+00 %440 = fmul float %435, %435 %441 = fmul float %437, %437 %442 = fadd float %441, %440 %443 = fmul float %439, %439 %444 = fadd float %442, %443 %445 = call float @llvm.AMDGPU.rsq.clamped.f32(float %444) %446 = fmul float %445, %437 %447 = fmul float %445, %439 %448 = call float @llvm.fabs.f32(float %446) %449 = call float @llvm.pow.f32(float %448, float 7.000000e+00) %450 = bitcast float %421 to i32 %451 = bitcast float %425 to i32 %452 = insertelement <2 x i32> undef, i32 %450, i32 0 %453 = insertelement <2 x i32> %452, i32 %451, i32 1 %454 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %453, <8 x i32> %70, <4 x i32> %72, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %455 = extractelement <4 x float> %454, i32 0 %456 = extractelement <4 x float> %454, i32 1 %457 = extractelement <4 x float> %454, i32 2 %458 = fsub float 1.000000e+00, %449 %459 = fmul float %455, %449 %460 = fmul float %415, %458 %461 = fadd float %459, %460 %462 = fsub float 1.000000e+00, %449 %463 = fmul float %456, %449 %464 = fmul float %416, %462 %465 = fadd float %463, %464 %466 = fsub float 1.000000e+00, %449 %467 = fmul float %457, %449 %468 = fmul float %417, %466 %469 = fadd float %467, %468 %470 = fmul float %400, %39 %471 = fmul float %399, %40 %472 = fadd float %470, %471 %473 = fadd float %472, %41 %474 = fmul float %400, %42 %475 = fmul float %399, %43 %476 = fadd float %474, %475 %477 = fadd float %476, %44 %478 = bitcast float %473 to i32 %479 = bitcast float %477 to i32 %480 = insertelement <2 x i32> undef, i32 %478, i32 0 %481 = insertelement <2 x i32> %480, i32 %479, i32 1 %482 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %481, <8 x i32> %70, <4 x i32> %72, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %483 = extractelement <4 x float> %482, i32 0 %484 = extractelement <4 x float> %482, i32 1 %485 = extractelement <4 x float> %482, i32 2 %486 = call float @llvm.fabs.f32(float %447) %487 = call float @llvm.pow.f32(float %486, float 7.000000e+00) %488 = fsub float 1.000000e+00, %487 %489 = fmul float %483, %487 %490 = fmul float %461, %488 %491 = fadd float %489, %490 %492 = fsub float 1.000000e+00, %487 %493 = fmul float %484, %487 %494 = fmul float %465, %492 %495 = fadd float %493, %494 %496 = fsub float 1.000000e+00, %487 %497 = fmul float %485, %487 %498 = fmul float %469, %496 %499 = fadd float %497, %498 %500 = fmul float %38, 0xBFAEB851E0000000 %501 = fadd float %500, 0x3FE1EB8520000000 %502 = fmul float %38, 0xBFAEB851E0000000 %503 = fadd float %502, 0xBF947AE140000000 %504 = fmul float %38, 0xBFBEB851E0000000 %505 = fadd float %504, 0x3FE147AE20000000 %506 = fmul float %38, 0xBFC70A3D80000000 %507 = fadd float %506, 0x3FE0A3D700000000 %508 = fsub float %505, %501 %509 = fsub float %507, %505 %510 = fdiv float 1.000000e+00, %508 %511 = fdiv float 1.000000e+00, %509 %512 = fmul float %38, 6.000000e+00 %513 = fadd float %512, 1.000000e+00 %514 = bitcast float %75 to i32 %515 = bitcast float %76 to i32 %516 = insertelement <2 x i32> undef, i32 %514, i32 0 %517 = insertelement <2 x i32> %516, i32 %515, i32 1 %518 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %517, <8 x i32> %50, <4 x i32> %52, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %519 = extractelement <4 x float> %518, i32 1 %520 = bitcast float %73 to i32 %521 = bitcast float %74 to i32 %522 = insertelement <2 x i32> undef, i32 %520, i32 0 %523 = insertelement <2 x i32> %522, i32 %521, i32 1 %524 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %523, <8 x i32> %46, <4 x i32> %48, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %525 = extractelement <4 x float> %524, i32 0 %526 = extractelement <4 x float> %524, i32 1 %527 = extractelement <4 x float> %524, i32 2 %528 = extractelement <4 x float> %524, i32 3 %529 = fmul float %519, %525 %530 = fadd float %529, %528 %531 = fmul float %530, %513 %532 = fsub float %531, %501 %533 = fmul float %530, %513 %534 = fsub float %533, %505 %535 = fmul float %530, %513 %536 = fadd float %535, 0xBFE28F5C20000000 %537 = fdiv float 1.000000e+00, %503 %538 = fmul float %536, %537 %539 = call float @llvm.AMDIL.clamp.(float %538, float 0.000000e+00, float 1.000000e+00) %540 = fmul float %536, 1.000000e+01 %541 = call float @llvm.AMDIL.clamp.(float %540, float 0.000000e+00, float 1.000000e+00) %542 = fmul float %510, %532 %543 = fmul float %511, %534 %544 = call float @llvm.AMDIL.clamp.(float %542, float 0.000000e+00, float 1.000000e+00) %545 = call float @llvm.AMDIL.clamp.(float %543, float 0.000000e+00, float 1.000000e+00) %546 = fmul float %544, -2.000000e+00 %547 = fadd float %546, 3.000000e+00 %548 = fmul float %544, %544 %549 = fmul float %548, %547 %550 = fmul float %545, -2.000000e+00 %551 = fadd float %550, 3.000000e+00 %552 = fmul float %545, %545 %553 = fmul float %552, %551 %554 = fmul float %539, -2.000000e+00 %555 = fadd float %554, 3.000000e+00 %556 = fmul float %539, %539 %557 = fmul float %556, %555 %558 = fmul float %491, %557 %559 = fmul float %495, %549 %560 = fmul float %499, %553 %561 = fsub float %29, %25 %562 = fsub float %30, %26 %563 = fsub float %31, %27 %564 = fmul float %558, %561 %565 = fadd float %564, %25 %566 = fmul float %558, %562 %567 = fadd float %566, %26 %568 = fmul float %558, %563 %569 = fadd float %568, %27 %570 = fsub float 1.000000e+00, %559 %571 = fmul float %33, %559 %572 = fmul float %565, %570 %573 = fadd float %571, %572 %574 = fsub float 1.000000e+00, %559 %575 = fmul float %34, %559 %576 = fmul float %567, %574 %577 = fadd float %575, %576 %578 = fsub float 1.000000e+00, %559 %579 = fmul float %35, %559 %580 = fmul float %569, %578 %581 = fadd float %579, %580 %582 = fsub float %28, %573 %583 = fsub float %32, %577 %584 = fsub float %36, %581 %585 = fmul float %560, %582 %586 = fadd float %585, %573 %587 = fmul float %560, %583 %588 = fadd float %587, %577 %589 = fmul float %560, %584 %590 = fadd float %589, %581 %591 = fmul float %38, 7.500000e-01 %592 = fsub float 1.000000e+00, %525 %593 = fmul float %592, %592 %594 = fmul float %593, %593 %595 = fmul float %594, 2.500000e-01 %596 = fadd float %595, %591 %597 = bitcast float %77 to i32 %598 = bitcast float %78 to i32 %599 = insertelement <2 x i32> undef, i32 %597, i32 0 %600 = insertelement <2 x i32> %599, i32 %598, i32 1 %601 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %600, <8 x i32> %58, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %602 = extractelement <4 x float> %601, i32 0 %603 = extractelement <4 x float> %601, i32 1 %604 = extractelement <4 x float> %601, i32 2 %605 = extractelement <4 x float> %601, i32 3 %606 = fadd float %602, -1.000000e+00 %607 = fadd float %603, -1.000000e+00 %608 = fadd float %604, -1.000000e+00 %609 = fadd float %605, -1.000000e+00 %610 = fmul float %596, %606 %611 = fadd float %610, 1.000000e+00 %612 = fmul float %596, %607 %613 = fadd float %612, 1.000000e+00 %614 = fmul float %596, %608 %615 = fadd float %614, 1.000000e+00 %616 = fmul float %596, %609 %617 = fadd float %616, 1.000000e+00 %618 = fmul float %586, %611 %619 = fmul float %588, %613 %620 = fmul float %590, %615 %621 = fmul float %618, 0x3FD3333340000000 %622 = fmul float %619, 0x3FE2E147A0000000 %623 = fadd float %622, %621 %624 = fmul float %620, 0x3FBC28F5C0000000 %625 = fadd float %623, %624 %626 = fadd float %625, 0xBFB47AE140000000 %627 = fmul float %626, 0x402C924920000000 %628 = call float @llvm.AMDIL.clamp.(float %627, float 0.000000e+00, float 1.000000e+00) %629 = fmul float %628, -2.000000e+00 %630 = fadd float %629, 3.000000e+00 %631 = fmul float %628, %628 %632 = fmul float %631, %630 %633 = fsub float 1.000000e+00, %632 %634 = fmul float %633, 0x3F9EB851E0000000 %635 = fadd float %634, %618 %636 = fmul float %633, 0x3F9EB851E0000000 %637 = fadd float %636, %619 %638 = fmul float %633, 0x3F9EB851E0000000 %639 = fadd float %638, %620 %640 = fmul float %635, %527 %641 = fmul float %637, %527 %642 = fmul float %639, %527 %643 = fmul float %640, 5.000000e-01 %644 = fadd float %643, %618 %645 = fmul float %641, 5.000000e-01 %646 = fadd float %645, %619 %647 = fmul float %642, 5.000000e-01 %648 = fadd float %647, %620 %649 = call float @llvm.AMDIL.clamp.(float %644, float 0.000000e+00, float 1.000000e+00) %650 = call float @llvm.AMDIL.clamp.(float %646, float 0.000000e+00, float 1.000000e+00) %651 = call float @llvm.AMDIL.clamp.(float %648, float 0.000000e+00, float 1.000000e+00) %652 = fmul float %526, %649 %653 = fmul float %526, %650 %654 = fmul float %526, %651 %655 = bitcast float %73 to i32 %656 = bitcast float %74 to i32 %657 = insertelement <2 x i32> undef, i32 %655, i32 0 %658 = insertelement <2 x i32> %657, i32 %656, i32 1 %659 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %658, <8 x i32> %54, <4 x i32> %56, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %660 = extractelement <4 x float> %659, i32 0 %661 = extractelement <4 x float> %659, i32 1 %662 = extractelement <4 x float> %659, i32 2 %663 = extractelement <4 x float> %659, i32 3 %664 = fmul float %526, %649 %665 = fsub float %660, %664 %666 = fmul float %526, %650 %667 = fsub float %661, %666 %668 = fmul float %526, %651 %669 = fsub float %662, %668 %670 = fmul float %526, %37 %671 = fmul float %617, %670 %672 = fmul float %541, -2.000000e+00 %673 = fadd float %672, 3.000000e+00 %674 = fmul float %541, %541 %675 = fmul float %674, %673 %676 = fmul float %673, %674 %677 = fadd float %676, 0xBFECCCCCC0000000 %678 = fmul float %673, %674 %679 = fadd float %678, 0xBF847AE140000000 %680 = fmul float %677, 1.000000e+01 %681 = fmul float %679, -1.000000e+02 %682 = fmul float %675, %665 %683 = fadd float %682, %652 %684 = fmul float %675, %667 %685 = fadd float %684, %653 %686 = fmul float %675, %669 %687 = fadd float %686, %654 %688 = call float @llvm.maxnum.f32(float %681, float 0.000000e+00) %689 = call float @llvm.AMDIL.clamp.(float %680, float 0.000000e+00, float 1.000000e+00) %690 = fmul float %688, -2.000000e+00 %691 = fadd float %690, 3.000000e+00 %692 = fmul float %688, %688 %693 = fmul float %692, %691 %694 = fmul float %693, %671 %695 = fsub float %663, %694 %696 = fmul float %693, %671 %697 = fmul float %689, -2.000000e+00 %698 = fadd float %697, 3.000000e+00 %699 = fmul float %689, %689 %700 = fmul float %699, %698 %701 = fmul float %700, %695 %702 = fadd float %701, %696 %703 = call i32 @llvm.SI.packf16(float %683, float %685) %704 = bitcast i32 %703 to float %705 = call i32 @llvm.SI.packf16(float %687, float %702) %706 = bitcast i32 %705 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %704, float %706, float %704, float %706) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A v_mov_b32_e32 v10, 0xba8c8dc2 ; 7E1402FF BA8C8DC2 v_mov_b32_e32 v11, 0x3d70f0f1 ; 7E1602FF 3D70F0F1 v_mov_b32_e32 v12, 0xba3b67dc ; 7E1802FF BA3B67DC v_mov_b32_e32 v13, 0xbb5db3d0 ; 7E1A02FF BB5DB3D0 v_mov_b32_e32 v14, 0xbaddb3fb ; 7E1C02FF BADDB3FB v_mov_b32_e32 v15, 0xbb2ed9f3 ; 7E1E02FF BB2ED9F3 v_mov_b32_e32 v16, 0xba800015 ; 7E2002FF BA800015 v_mov_b32_e32 v17, 0xbb832376 ; 7E2202FF BB832376 v_mov_b32_e32 v18, 0xbaffffd5 ; 7E2402FF BAFFFFD5 v_mov_b32_e32 v7, 0x7fffffff ; 7E0E02FF 7FFFFFFF v_mov_b32_e32 v6, 0x40e00000 ; 7E0C02FF 40E00000 v_mov_b32_e32 v4, 0x3f0f5c29 ; 7E0802FF 3F0F5C29 v_mov_b32_e32 v5, 0xbd75c28f ; 7E0A02FF BD75C28F s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v19, v0, 0, 1, [m0] ; C84C0400 v_interp_p2_f32 v19, [v19], v1, 0, 1, [m0] ; C84D0401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[12:15], 0x0 ; C2040D00 v_interp_p1_f32 v20, v0, 1, 1, [m0] ; C8500500 v_interp_p2_f32 v20, [v20], v1, 1, 1, [m0] ; C8510501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 s_buffer_load_dword s11, s[12:15], 0x29 ; C2058D29 s_buffer_load_dword s33, s[12:15], 0x2b ; C2108D2B s_buffer_load_dword s10, s[12:15], 0x2c ; C2050D2C s_buffer_load_dword s9, s[12:15], 0x2d ; C2048D2D s_buffer_load_dword s32, s[12:15], 0x2f ; C2100D2F s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_add_f32_e32 v0, v2, v10 ; 06001502 v_sub_f32_e32 v1, 0x3f7ef9b9, v3 ; 080206FF 3F7EF9B9 v_add_f32_e32 v21, 0x3affffd5, v2 ; 062A04FF 3AFFFFD5 v_sub_f32_e32 v22, 0x3f7f224c, v3 ; 082C06FF 3F7F224C v_add_f32_e32 v23, 0x3b832376, v2 ; 062E04FF 3B832376 v_sub_f32_e32 v24, 0x3f7fb9b9, v3 ; 083006FF 3F7FB9B9 v_add_f32_e32 v25, v2, v12 ; 06321902 v_sub_f32_e32 v26, 0x3f7f5126, v3 ; 083406FF 3F7F5126 v_add_f32_e32 v27, 0x3a800015, v2 ; 063604FF 3A800015 v_sub_f32_e32 v28, 0x3f7f9126, v3 ; 083806FF 3F7F9126 v_add_f32_e32 v29, 0x3b2ed9f3, v2 ; 063A04FF 3B2ED9F3 v_sub_f32_e32 v30, 0x3f7fd126, v3 ; 083C06FF 3F7FD126 v_add_f32_e32 v12, v2, v13 ; 06181B02 v_sub_f32_e32 v13, 0x3f7f8000, v3 ; 081A06FF 3F7F8000 v_add_f32_e32 v31, v2, v14 ; 063E1D02 v_sub_f32_e32 v32, 0x3f7fc000, v3 ; 084006FF 3F7FC000 v_add_f32_e32 v33, 0, v2 ; 06420480 v_sub_f32_e32 v34, 1.0, v3 ; 084406F2 v_add_f32_e32 v35, 0x3addb3fb, v2 ; 064604FF 3ADDB3FB v_sub_f32_e32 v36, 0x3f802000, v3 ; 084806FF 3F802000 v_add_f32_e32 v37, 0x3b5db3d0, v2 ; 064A04FF 3B5DB3D0 v_sub_f32_e32 v38, 0x3f804000, v3 ; 084C06FF 3F804000 v_add_f32_e32 v14, v2, v15 ; 061C1F02 v_sub_f32_e32 v15, 0x3f80176d, v3 ; 081E06FF 3F80176D v_add_f32_e32 v39, v2, v16 ; 064E2102 v_sub_f32_e32 v40, 0x3f80376d, v3 ; 085006FF 3F80376D v_add_f32_e32 v41, 0x3a3b67dc, v2 ; 065204FF 3A3B67DC s_load_dwordx8 s[48:55], s[6:7], 0x28 ; C0D80728 s_load_dwordx4 s[56:59], s[4:5], 0x14 ; C09C0514 v_sub_f32_e32 v42, 0x3f80576d, v3 ; 085406FF 3F80576D v_add_f32_e32 v16, v2, v17 ; 06202302 v_sub_f32_e32 v17, 0x3f802323, v3 ; 082206FF 3F802323 v_add_f32_e32 v43, v2, v18 ; 06562502 v_sub_f32_e32 v44, 0x3f806eda, v3 ; 085806FF 3F806EDA v_add_f32_e32 v45, 0x3a8c8dc2, v2 ; 065A04FF 3A8C8DC2 v_sub_f32_e32 v46, 0x3f808323, v3 ; 085C06FF 3F808323 s_load_dwordx4 s[36:39], s[4:5], 0x18 ; C0920518 s_load_dwordx8 s[40:47], s[6:7], 0x30 ; C0D40730 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[47:50], 15, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[48:55], s[56:59] ; F0800F00 01CC2F15 image_sample v[51:54], 15, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[48:55], s[56:59] ; F0800F00 01CC3300 image_sample v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[48:55], s[56:59] ; F0800F00 01CC1517 image_sample v[55:58], 15, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[48:55], s[56:59] ; F0800F00 01CC3719 image_sample v[25:28], 15, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[48:55], s[56:59] ; F0800F00 01CC191B image_sample v[59:62], 15, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[48:55], s[56:59] ; F0800F00 01CC3B1D image_sample v[63:66], 15, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[48:55], s[56:59] ; F0800F00 01CC3F0C image_sample v[29:32], 15, 0, 0, 0, 0, 0, 0, 0, v[31:32], s[48:55], s[56:59] ; F0800F00 01CC1D1F image_sample v[67:70], 15, 0, 0, 0, 0, 0, 0, 0, v[33:34], s[48:55], s[56:59] ; F0800F00 01CC4321 image_sample v[33:36], 15, 0, 0, 0, 0, 0, 0, 0, v[35:36], s[48:55], s[56:59] ; F0800F00 01CC2123 image_sample v[71:74], 15, 0, 0, 0, 0, 0, 0, 0, v[37:38], s[48:55], s[56:59] ; F0800F00 01CC4725 image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[48:55], s[56:59] ; F0800F00 01CC0C0E image_sample v[37:40], 15, 0, 0, 0, 0, 0, 0, 0, v[39:40], s[48:55], s[56:59] ; F0800F00 01CC2527 image_sample v[75:78], 15, 0, 0, 0, 0, 0, 0, 0, v[41:42], s[48:55], s[56:59] ; F0800F00 01CC4B29 image_sample v[79:82], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[48:55], s[56:59] ; F0800F00 01CC4F10 image_sample v[41:44], 15, 0, 0, 0, 0, 0, 0, 0, v[43:44], s[48:55], s[56:59] ; F0800F00 01CC292B image_sample v[83:86], 15, 0, 0, 0, 0, 0, 0, 0, v[45:46], s[48:55], s[56:59] ; F0800F00 01CC532D v_mov_b32_e32 v0, s33 ; 7E000221 s_waitcnt ; BF8C077F v_mul_f32_e32 v1, v11, v49 ; 1002630B v_mac_f32_e32 v1, v11, v53 ; 3E026B0B v_mul_f32_e32 v10, v11, v50 ; 1014650B v_mac_f32_e32 v10, v11, v54 ; 3E146D0B s_waitcnt vmcnt(14) ; BF8C077E v_mac_f32_e32 v1, v11, v23 ; 3E022F0B v_mac_f32_e32 v10, v11, v24 ; 3E14310B s_waitcnt vmcnt(13) ; BF8C077D v_mac_f32_e32 v1, v11, v57 ; 3E02730B v_mac_f32_e32 v10, v11, v58 ; 3E14750B s_waitcnt vmcnt(12) ; BF8C077C v_mac_f32_e32 v1, v11, v27 ; 3E02370B v_mac_f32_e32 v10, v11, v28 ; 3E14390B s_waitcnt vmcnt(11) ; BF8C077B v_mac_f32_e32 v1, v11, v61 ; 3E027B0B v_mac_f32_e32 v10, v11, v62 ; 3E147D0B s_waitcnt vmcnt(10) ; BF8C077A v_mac_f32_e32 v1, v11, v65 ; 3E02830B v_mac_f32_e32 v10, v11, v66 ; 3E14850B s_waitcnt vmcnt(9) ; BF8C0779 v_mac_f32_e32 v1, v11, v31 ; 3E023F0B v_mac_f32_e32 v10, v11, v32 ; 3E14410B s_waitcnt vmcnt(8) ; BF8C0778 v_mac_f32_e32 v1, v11, v69 ; 3E028B0B v_mac_f32_e32 v10, v11, v70 ; 3E148D0B s_waitcnt vmcnt(7) ; BF8C0777 v_mac_f32_e32 v1, v11, v35 ; 3E02470B v_mac_f32_e32 v10, v11, v36 ; 3E14490B s_waitcnt vmcnt(6) ; BF8C0776 v_mac_f32_e32 v1, v11, v73 ; 3E02930B v_mac_f32_e32 v10, v11, v74 ; 3E14950B s_waitcnt vmcnt(5) ; BF8C0775 v_mac_f32_e32 v1, v11, v14 ; 3E021D0B v_mac_f32_e32 v10, v11, v15 ; 3E141F0B s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v1, v11, v39 ; 3E024F0B v_mac_f32_e32 v10, v11, v40 ; 3E14510B s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v1, v11, v77 ; 3E029B0B v_mac_f32_e32 v10, v11, v78 ; 3E149D0B s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v1, v11, v81 ; 3E02A30B v_mac_f32_e32 v10, v11, v82 ; 3E14A50B s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v1, v11, v43 ; 3E02570B v_mac_f32_e32 v10, v11, v44 ; 3E14590B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v1, v11, v85 ; 3E02AB0B v_mac_f32_e32 v10, v11, v86 ; 3E14AD0B v_mul_f32_e32 v10, 0x41800000, v10 ; 101414FF 41800000 v_mul_f32_e32 v14, v1, v10 ; 101C1501 v_mad_f32 v15, s11, v14, v0 ; D282000F 04021C0B v_mov_b32_e32 v1, s32 ; 7E020220 v_mul_f32_e32 v16, v11, v48 ; 1020610B v_mac_f32_e32 v16, v11, v52 ; 3E20690B v_mac_f32_e32 v16, v11, v22 ; 3E202D0B v_mac_f32_e32 v16, v11, v56 ; 3E20710B v_mac_f32_e32 v16, v11, v26 ; 3E20350B v_mac_f32_e32 v16, v11, v60 ; 3E20790B v_mac_f32_e32 v16, v11, v64 ; 3E20810B v_mac_f32_e32 v16, v11, v30 ; 3E203D0B v_mac_f32_e32 v16, v11, v68 ; 3E20890B v_mac_f32_e32 v16, v11, v34 ; 3E20450B v_mac_f32_e32 v16, v11, v72 ; 3E20910B s_buffer_load_dword s32, s[12:15], 0x28 ; C2100D28 v_mac_f32_e32 v16, v11, v13 ; 3E201B0B v_mac_f32_e32 v16, v11, v38 ; 3E204D0B s_load_dwordx4 s[48:51], s[4:5], 0x10 ; C0980510 s_load_dwordx8 s[52:59], s[6:7], 0x20 ; C0DA0720 v_mac_f32_e32 v16, v11, v76 ; 3E20990B v_mac_f32_e32 v16, v11, v80 ; 3E20A10B v_mac_f32_e32 v16, v11, v42 ; 3E20550B v_mac_f32_e32 v16, v11, v84 ; 3E20A90B v_mul_f32_e32 v13, v16, v10 ; 101A1510 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v17, s32, v13, v15 ; D2820011 043E1A20 v_mad_f32 v16, s9, v14, v1 ; D2820010 04061C09 v_mad_f32 v18, s10, v13, v16 ; D2820012 04421A0A image_sample v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[40:47], s[36:39] ; F0800700 012A1611 image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[52:59], s[48:51] ; F0800700 018D1A02 v_mul_f32_e32 v14, v11, v47 ; 101C5F0B v_mac_f32_e32 v14, v11, v51 ; 3E1C670B v_mac_f32_e32 v14, v11, v21 ; 3E1C2B0B v_mac_f32_e32 v14, v11, v55 ; 3E1C6F0B v_mac_f32_e32 v14, v11, v25 ; 3E1C330B v_mac_f32_e32 v14, v11, v59 ; 3E1C770B v_mac_f32_e32 v14, v11, v63 ; 3E1C7F0B v_mac_f32_e32 v14, v11, v29 ; 3E1C3B0B v_mac_f32_e32 v14, v11, v67 ; 3E1C870B v_mac_f32_e32 v14, v11, v33 ; 3E1C430B v_mac_f32_e32 v14, v11, v71 ; 3E1C8F0B v_mac_f32_e32 v14, v11, v12 ; 3E1C190B v_mac_f32_e32 v14, v11, v37 ; 3E1C4B0B v_mac_f32_e32 v14, v11, v75 ; 3E1C970B v_mac_f32_e32 v14, v11, v79 ; 3E1C9F0B v_mac_f32_e32 v14, v11, v41 ; 3E1C530B v_mac_f32_e32 v14, v11, v83 ; 3E1CA70B v_mul_f32_e32 v10, v14, v10 ; 1014150E v_mac_f32_e32 v15, s32, v10 ; 3E1E1420 v_mac_f32_e32 v0, s32, v13 ; 3E001A20 v_mac_f32_e32 v0, s11, v10 ; 3E00140B v_mac_f32_e32 v16, s10, v10 ; 3E20140A v_mac_f32_e32 v1, s10, v13 ; 3E021A0A v_mac_f32_e32 v1, s9, v10 ; 3E021409 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v10, v26, v26 ; 0614351A v_mad_f32 v11, 2.0, v26, -1.0 ; D282000B 03CE34F4 v_mad_f32 v10, v10, v11, -v11 ; D282000A 842E170A v_mad_f32 v11, 2.0, v27, -1.0 ; D282000B 03CE36F4 v_mac_f32_e32 v10, v11, v11 ; 3E14170B v_mad_f32 v11, 2.0, v28, -1.0 ; D282000B 03CE38F4 v_mac_f32_e32 v10, v11, v11 ; 3E14170B v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A s_load_dwordx8 s[52:59], s[6:7], 0x8 ; C0DA0708 image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[40:47], s[36:39] ; F0800700 012A0B0F image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[40:47], s[36:39] ; F0800700 012A0E00 v_add_f32_e32 v0, v27, v27 ; 0600371B v_mad_f32 v0, v0, v10, -v10 ; D2820000 842A1500 v_and_b32_e32 v0, v0, v7 ; 36000F00 v_log_f32_e32 v0, v0 ; 7E004F00 s_load_dwordx4 s[40:43], s[4:5], 0xc ; C094050C s_load_dwordx8 s[32:39], s[6:7], 0x10 ; C0D00710 s_load_dwordx8 s[44:51], s[6:7], 0x18 ; C0D60718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 2, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[52:59], s[20:23] ; F0800200 00AD0113 image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[16:19] ; F0800F00 00861102 v_mul_legacy_f32_e32 v0, v6, v0 ; 0E000106 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mad_f32 v21, -v0, v22, v22 ; D2820015 245A2D00 v_mad_f32 v22, -v0, v23, v23 ; D2820016 245E2F00 v_mad_f32 v23, -v0, v24, v24 ; D2820017 24623100 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v21, v0, v11 ; 3E2A1700 v_mac_f32_e32 v22, v0, v12 ; 3E2C1900 v_mac_f32_e32 v23, v0, v13 ; 3E2E1B00 v_add_f32_e32 v0, v28, v28 ; 0600391C image_sample v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[44:51], s[40:43] ; F0800F00 014B1808 s_buffer_load_dword s4, s[12:15], 0x1 ; C2020D01 s_buffer_load_dword s5, s[12:15], 0x2 ; C2028D02 s_buffer_load_dword s6, s[12:15], 0x3 ; C2030D03 s_buffer_load_dword s7, s[12:15], 0x4 ; C2038D04 s_buffer_load_dword s9, s[12:15], 0x5 ; C2048D05 s_buffer_load_dword s10, s[12:15], 0x6 ; C2050D06 s_buffer_load_dword s11, s[12:15], 0xa ; C2058D0A s_buffer_load_dword s16, s[12:15], 0xb ; C2080D0B s_buffer_load_dword s17, s[12:15], 0xe ; C2088D0E s_buffer_load_dword s18, s[12:15], 0xf ; C2090D0F s_buffer_load_dword s19, s[12:15], 0x7 ; C2098D07 s_buffer_load_dword s20, s[12:15], 0x8 ; C20A0D08 s_buffer_load_dword s12, s[12:15], 0x9 ; C2060D09 v_mad_f32 v0, v0, v10, -v10 ; D2820000 842A1500 v_and_b32_e32 v0, v0, v7 ; 36000F00 v_log_f32_e32 v0, v0 ; 7E004F00 v_mov_b32_e32 v7, s8 ; 7E0E0208 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v7, s7, v7 ; 080E0E07 v_mov_b32_e32 v8, s4 ; 7E100204 v_sub_f32_e32 v8, s9, v8 ; 08101009 v_mov_b32_e32 v9, s5 ; 7E120205 v_sub_f32_e32 v9, s10, v9 ; 0812120A v_mul_legacy_f32_e32 v0, v6, v0 ; 0E000106 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mad_f32 v6, -v0, v21, v21 ; D2820006 24562B00 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v6, v0, v14 ; 3E0C1D00 v_mad_f32 v10, -v0, v22, v22 ; D282000A 245A2D00 v_mac_f32_e32 v10, v0, v15 ; 3E141F00 v_mad_f32 v11, -v0, v23, v23 ; D282000B 245E2F00 v_mac_f32_e32 v11, v0, v16 ; 3E162100 v_mov_b32_e32 v0, 0xbca3d70a ; 7E0002FF BCA3D70A v_mac_f32_e32 v0, s18, v5 ; 3E000A12 v_mov_b32_e32 v12, 0x40c00000 ; 7E1802FF 40C00000 v_rcp_f32_e32 v0, v0 ; 7E005500 v_mad_f32 v12, v12, s18, 1.0 ; D282000C 03C8250C s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v20, v17, v1 ; 3E280311 v_madak_f32_e32 v1, v20, v12, 0xbf147ae1 ; 42021914 BF147AE1 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mov_b32_e32 v13, 0x40400000 ; 7E1A02FF 40400000 v_mad_f32 v14, -2.0, v0, v13 ; D282000E 043600F5 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v0, v14, v0 ; 1000010E v_mul_f32_e32 v0, v0, v6 ; 10000D00 v_mad_f32 v6, v0, v7, s8 ; D2820006 00220F00 v_mac_f32_e32 v4, s18, v5 ; 3E080A12 v_mov_b32_e32 v5, 0x3f0a3d71 ; 7E0A02FF 3F0A3D71 v_mov_b32_e32 v7, 0xbdf5c28f ; 7E0E02FF BDF5C28F v_mac_f32_e32 v5, s18, v7 ; 3E0A0E12 v_subrev_f32_e32 v7, v4, v5 ; 0A0E0B04 v_rcp_f32_e32 v7, v7 ; 7E0E5507 v_mad_f32 v8, v0, v8, s4 ; D2820008 00121100 v_mad_f32 v0, v0, v9, s5 ; D2820000 00161300 v_mad_f32 v4, v20, v12, -v4 ; D2820004 84121914 v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_mad_f32 v7, -2.0, v4, v13 ; D2820007 043608F5 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mad_f32 v7, -v10, v4, 1.0 ; D2820007 23CA090A v_mad_f32 v9, -v6, v7, s6 ; D2820009 201A0F06 v_mul_f32_e32 v4, v4, v10 ; 10081504 v_mad_f32 v6, -v4, v6, v6 ; D2820006 241A0D04 v_mac_f32_e32 v6, s20, v4 ; 3E0C0814 v_mad_f32 v9, -s20, v4, v9 ; D2820009 24260814 v_mad_f32 v10, -v8, v7, s19 ; D282000A 204E0F08 v_mad_f32 v8, -v4, v8, v8 ; D2820008 24221104 v_mac_f32_e32 v8, s12, v4 ; 3E10080C v_mad_f32 v10, -s12, v4, v10 ; D282000A 242A080C v_mad_f32 v7, -v0, v7, s16 ; D2820007 20420F00 v_mad_f32 v0, -v4, v0, v0 ; D2820000 24020104 v_mac_f32_e32 v0, s11, v4 ; 3E00080B v_mad_f32 v4, -s11, v4, v7 ; D2820004 241E080B v_mov_b32_e32 v7, 0x3f051eb8 ; 7E0E02FF 3F051EB8 v_mov_b32_e32 v14, 0xbe3851ec ; 7E1C02FF BE3851EC v_mac_f32_e32 v7, s18, v14 ; 3E0E1C12 image_sample v[28:31], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[0:3] ; F0800F00 00081C02 v_subrev_f32_e32 v2, v5, v7 ; 0A040F05 v_rcp_f32_e32 v2, v2 ; 7E045502 v_mad_f32 v3, v20, v12, -v5 ; D2820003 84161914 v_mov_b32_e32 v5, 0x3f400000 ; 7E0A02FF 3F400000 v_mul_f32_e32 v5, s18, v5 ; 100A0A12 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mad_f32 v3, -2.0, v2, v13 ; D2820003 043604F5 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v3, s17, v18 ; 10062411 v_mul_f32_e32 v2, v2, v11 ; 10041702 v_mac_f32_e32 v6, v9, v2 ; 3E0C0509 v_mov_b32_e32 v7, 0x41200000 ; 7E0E02FF 41200000 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mac_f32_e32 v8, v10, v2 ; 3E10050A v_mac_f32_e32 v0, v4, v2 ; 3E000504 v_sub_f32_e32 v2, 1.0, v17 ; 080422F2 v_mad_f32 v2, -v17, v2, v2 ; D2820002 240A0511 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_madmk_f32_e32 v2, v2, v5, 0x3e800000 ; 40040B02 3E800000 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v4, v24, v2, -v2 ; D2820004 840A0518 v_mad_f32 v5, v25, v2, -v2 ; D2820005 840A0519 v_mad_f32 v9, v26, v2, -v2 ; D2820009 840A051A v_mad_f32 v2, v27, v2, -v2 ; D2820002 840A051B v_mac_f32_e32 v6, v6, v4 ; 3E0C0906 v_mac_f32_e32 v8, v8, v5 ; 3E100B08 v_mac_f32_e32 v0, v0, v9 ; 3E001300 v_mul_f32_e32 v4, 0x3e99999a, v6 ; 10080CFF 3E99999A v_madmk_f32_e32 v4, v8, v4, 0x3f170a3d ; 40080908 3F170A3D v_madmk_f32_e32 v4, v0, v4, 0x3de147ae ; 40080900 3DE147AE v_mov_b32_e32 v5, 0xbda3d70a ; 7E0A02FF BDA3D70A v_add_f32_e32 v4, v4, v5 ; 06080B04 v_mul_f32_e32 v4, 0x41649249, v4 ; 100808FF 41649249 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_mad_f32 v5, -2.0, v4, v13 ; D2820005 043608F5 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mad_f32 v4, -v4, v5, 1.0 ; D2820004 23CA0B04 v_mov_b32_e32 v5, 0x3cf5c28f ; 7E0A02FF 3CF5C28F v_mad_f32 v9, v5, v4, v6 ; D2820009 041A0905 v_mad_f32 v10, v5, v4, v8 ; D282000A 04220905 v_mad_f32 v4, v5, v4, v0 ; D2820004 04020905 v_mul_f32_e32 v5, v19, v9 ; 100A1313 v_mul_f32_e32 v9, v19, v10 ; 10121513 v_mul_f32_e32 v4, v19, v4 ; 10080913 v_mac_f32_e32 v6, 0.5, v5 ; 3E0C0AF0 v_mac_f32_e32 v8, 0.5, v9 ; 3E1012F0 v_mac_f32_e32 v0, 0.5, v4 ; 3E0008F0 v_add_f32_e64 v4, 0, v6 clamp ; D2060804 00020C80 v_add_f32_e64 v5, 0, v8 clamp ; D2060805 00021080 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v6, v4, v18 ; 100C2504 v_mul_f32_e32 v8, v5, v18 ; 10102505 v_mul_f32_e32 v9, v0, v18 ; 10122500 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, -v18, v4, v28 ; D2820004 24720912 v_mad_f32 v5, -v18, v5, v29 ; D2820005 24760B12 v_mad_f32 v0, -v18, v0, v30 ; D2820000 247A0112 v_mac_f32_e32 v3, v3, v2 ; 3E060503 v_mad_f32 v2, -2.0, v1, v13 ; D2820002 043602F5 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mul_f32_e32 v10, v2, v1 ; 10140302 v_madak_f32_e32 v11, v1, v2, 0xbf666666 ; 42160501 BF666666 v_madak_f32_e32 v1, v1, v2, 0xbc23d70a ; 42020501 BC23D70A v_mul_f32_e32 v2, v7, v11 ; 10041707 v_mov_b32_e32 v7, 0xc2c80000 ; 7E0E02FF C2C80000 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mac_f32_e32 v6, v4, v10 ; 3E0C1504 v_mac_f32_e32 v8, v5, v10 ; 3E101505 v_mac_f32_e32 v9, v0, v10 ; 3E121500 v_max_f32_e32 v0, 0, v1 ; 20000280 v_add_f32_e64 v1, 0, v2 clamp ; D2060801 00020480 v_mad_f32 v2, -2.0, v0, v13 ; D2820002 043600F5 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mul_f32_e32 v2, v3, v0 ; 10040103 v_mad_f32 v0, -v0, v3, v31 ; D2820000 247E0700 v_mac_f32_e32 v13, -2.0, v1 ; 3E1A02F5 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mul_f32_e32 v1, v13, v1 ; 1002030D v_mac_f32_e32 v2, v0, v1 ; 3E040300 v_cvt_pkrtz_f16_f32_e32 v0, v6, v8 ; 5E001106 v_cvt_pkrtz_f16_f32_e32 v1, v9, v2 ; 5E020509 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 88 Code Size: 2180 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 64) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 68) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 72) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 76) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 92) %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %22 %47 = fmul float %43, %23 %48 = fadd float %46, %47 %49 = fmul float %44, %24 %50 = fadd float %48, %49 %51 = fmul float %45, %25 %52 = fadd float %50, %51 %53 = fmul float %42, %26 %54 = fmul float %43, %27 %55 = fadd float %53, %54 %56 = fmul float %44, %28 %57 = fadd float %55, %56 %58 = fmul float %45, %29 %59 = fadd float %57, %58 %60 = fmul float %42, %30 %61 = fmul float %43, %31 %62 = fadd float %60, %61 %63 = fmul float %44, %32 %64 = fadd float %62, %63 %65 = fmul float %45, %33 %66 = fadd float %64, %65 %67 = fmul float %42, %34 %68 = fmul float %43, %35 %69 = fadd float %67, %68 %70 = fmul float %44, %36 %71 = fadd float %69, %70 %72 = fmul float %45, %37 %73 = fadd float %71, %72 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %66, float %73, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %52, float %59, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mov_b32_e32 v6, s6 ; 7E0C0206 v_mov_b32_e32 v7, s7 ; 7E0E0207 exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, s8 ; 7E080208 v_mov_b32_e32 v5, s9 ; 7E0A0209 v_mov_b32_e32 v6, s10 ; 7E0C020A v_mov_b32_e32 v7, s11 ; 7E0E020B exp 15, 33, 0, 0, 0, v4, v5, v6, v7 ; F800021F 07060504 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mul_f32_e32 v5, s17, v1 ; 100A0211 v_mul_f32_e32 v6, s21, v1 ; 100C0215 v_mul_f32_e32 v1, s23, v1 ; 10020217 v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mac_f32_e32 v6, s20, v0 ; 3E0C0014 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mac_f32_e32 v4, s14, v2 ; 3E08040E v_mac_f32_e32 v5, s18, v2 ; 3E0A0412 v_mac_f32_e32 v6, s24, v2 ; 3E0C0418 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s25, v3 ; 3E0C0619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 34, 0, 0, 0, v6, v1, v0, v0 ; F800022F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0] DCL CONST[2] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1], IMM[0].xxxx 2: MOV TEMP[2], IMM[0].xxxx 3: MOV TEMP[3].x, -CONST[0].xxxx 4: BGNLOOP :0 5: FSLT TEMP[4].x, CONST[0].xxxx, TEMP[3].xxxx 6: UIF TEMP[4].xxxx :0 7: BRK 8: ENDIF 9: MAD TEMP[5].xy, TEMP[3].xxxx, CONST[2].xyyy, TEMP[0].xyyy 10: MOV TEMP[6].xy, TEMP[5].xyyy 11: MOV TEMP[6].w, IMM[0].xxxx 12: TXB TEMP[7], TEMP[6], SAMP[0], 2D 13: ADD TEMP[2], TEMP[2], TEMP[7] 14: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 15: ENDLOOP :0 16: MUL TEMP[1], TEMP[2], CONST[0].wwww 17: MOV TEMP[0].w, IMM[0].yyyy 18: MOV TEMP[0].xyz, IN[1].xyzx 19: MUL TEMP[0], TEMP[1], TEMP[0] 20: MUL TEMP[1], TEMP[0], IN[1].wwww 21: MAD TEMP[1], IN[0], TEMP[1].wwww, TEMP[1] 22: MOV OUT[0], TEMP[1] 23: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) %29 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 %31 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %36 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %37 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %38 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %39 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %40 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %41 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %42 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %43 = fsub float -0.000000e+00, %25 br label %LOOP LOOP: ; preds = %ENDIF, %main_body %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %81, %ENDIF ] %temp9.0 = phi float [ 0.000000e+00, %main_body ], [ %82, %ENDIF ] %temp10.0 = phi float [ 0.000000e+00, %main_body ], [ %83, %ENDIF ] %temp11.0 = phi float [ 0.000000e+00, %main_body ], [ %84, %ENDIF ] %temp12.0 = phi float [ %43, %main_body ], [ %85, %ENDIF ] %44 = fcmp olt float %25, %temp12.0 br i1 %44, label %IF, label %ENDIF IF: ; preds = %LOOP %45 = fmul float %temp8.0, %26 %46 = fmul float %temp9.0, %26 %47 = fmul float %temp10.0, %26 %48 = fmul float %temp11.0, %26 %49 = fmul float %45, %37 %50 = fmul float %46, %38 %51 = fmul float %47, %39 %52 = fmul float %49, %40 %53 = fmul float %50, %40 %54 = fmul float %51, %40 %55 = fmul float %48, %40 %56 = fmul float %33, %55 %57 = fadd float %56, %52 %58 = fmul float %34, %55 %59 = fadd float %58, %53 %60 = fmul float %35, %55 %61 = fadd float %60, %54 %62 = fmul float %36, %55 %63 = fadd float %62, %55 %64 = call i32 @llvm.SI.packf16(float %57, float %59) %65 = bitcast i32 %64 to float %66 = call i32 @llvm.SI.packf16(float %61, float %63) %67 = bitcast i32 %66 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %65, float %67, float %65, float %67) ret void ENDIF: ; preds = %LOOP %68 = fmul float %temp12.0, %27 %69 = fadd float %68, %41 %70 = fmul float %temp12.0, %28 %71 = fadd float %70, %42 %72 = bitcast float %69 to i32 %73 = bitcast float %71 to i32 %74 = insertelement <4 x i32> , i32 %72, i32 1 %75 = insertelement <4 x i32> %74, i32 %73, i32 2 %76 = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> %75, <8 x i32> %30, <4 x i32> %32, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %77 = extractelement <4 x float> %76, i32 0 %78 = extractelement <4 x float> %76, i32 1 %79 = extractelement <4 x float> %76, i32 2 %80 = extractelement <4 x float> %76, i32 3 %81 = fadd float %temp8.0, %77 %82 = fadd float %temp9.0, %78 %83 = fadd float %temp10.0, %79 %84 = fadd float %temp11.0, %80 %85 = fadd float %temp12.0, 1.000000e+00 br label %LOOP } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[16:19], s[2:3], 0x0 ; C0880300 s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[16:19], 0x0 ; C2009100 s_buffer_load_dword s0, s[16:19], 0x3 ; C2001103 s_buffer_load_dword s2, s[16:19], 0x8 ; C2011108 s_buffer_load_dword s3, s[16:19], 0x9 ; C2019109 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900 v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901 v_mov_b32_e32 v1, 0x80000000 ; 7E0202FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v11, s1, v1 ; 3A160201 v_mov_b32_e32 v15, 0 ; 7E1E0280 s_mov_b64 s[16:17], 0 ; BE900480 v_mov_b32_e32 v16, 0 ; 7E200280 v_mov_b32_e32 v17, 0 ; 7E220280 v_mov_b32_e32 v18, 0 ; 7E240280 v_mov_b32_e32 v1, v18 ; 7E020312 v_mov_b32_e32 v12, v17 ; 7E180311 v_mov_b32_e32 v13, v16 ; 7E1A0310 v_mov_b32_e32 v14, v15 ; 7E1C030F v_cmp_nlt_f32_e32 vcc, s1, v11 ; 7C1C1601 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mad_f32 v16, s2, v11, v10 ; D2820010 042A1602 v_mad_f32 v17, s3, v11, v0 ; D2820011 04021603 v_mov_b32_e32 v15, 0 ; 7E1E0280 image_sample_b v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[8:15], s[4:7] ; F0940F00 0022120F s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v15, v18, v14 ; 061E1D12 v_add_f32_e32 v16, v19, v13 ; 06201B13 v_add_f32_e32 v17, v20, v12 ; 06221914 v_add_f32_e32 v18, v21, v1 ; 06240315 v_add_f32_e32 v11, 1.0, v11 ; 061616F2 s_or_b64 exec, exec, s[18:19] ; 88FE127E s_or_b64 s[16:17], s[18:19], s[16:17] ; 88901012 s_andn2_b64 exec, exec, s[16:17] ; 8AFE107E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[16:17] ; 88FE107E v_mul_f32_e32 v0, s0, v14 ; 10001C00 v_mul_f32_e32 v10, s0, v13 ; 10141A00 v_mul_f32_e32 v11, s0, v12 ; 10161800 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mul_f32_e32 v6, v7, v10 ; 100C1507 v_mul_f32_e32 v7, v8, v11 ; 100E1708 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mac_f32_e32 v0, v1, v2 ; 3E000501 v_mac_f32_e32 v6, v1, v3 ; 3E0C0701 v_mac_f32_e32 v7, v1, v4 ; 3E0E0901 v_mac_f32_e32 v1, v1, v5 ; 3E020B01 v_cvt_pkrtz_f16_f32_e32 v0, v0, v6 ; 5E000D00 v_cvt_pkrtz_f16_f32_e32 v1, v7, v1 ; 5E020307 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 24 Code Size: 336 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 64) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 68) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 72) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 76) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 92) %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %22 %47 = fmul float %43, %23 %48 = fadd float %46, %47 %49 = fmul float %44, %24 %50 = fadd float %48, %49 %51 = fmul float %45, %25 %52 = fadd float %50, %51 %53 = fmul float %42, %26 %54 = fmul float %43, %27 %55 = fadd float %53, %54 %56 = fmul float %44, %28 %57 = fadd float %55, %56 %58 = fmul float %45, %29 %59 = fadd float %57, %58 %60 = fmul float %42, %30 %61 = fmul float %43, %31 %62 = fadd float %60, %61 %63 = fmul float %44, %32 %64 = fadd float %62, %63 %65 = fmul float %45, %33 %66 = fadd float %64, %65 %67 = fmul float %42, %34 %68 = fmul float %43, %35 %69 = fadd float %67, %68 %70 = fmul float %44, %36 %71 = fadd float %69, %70 %72 = fmul float %45, %37 %73 = fadd float %71, %72 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %66, float %73, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %52, float %59, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mov_b32_e32 v6, s6 ; 7E0C0206 v_mov_b32_e32 v7, s7 ; 7E0E0207 exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, s8 ; 7E080208 v_mov_b32_e32 v5, s9 ; 7E0A0209 v_mov_b32_e32 v6, s10 ; 7E0C020A v_mov_b32_e32 v7, s11 ; 7E0E020B exp 15, 33, 0, 0, 0, v4, v5, v6, v7 ; F800021F 07060504 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mul_f32_e32 v5, s17, v1 ; 100A0211 v_mul_f32_e32 v6, s21, v1 ; 100C0215 v_mul_f32_e32 v1, s23, v1 ; 10020217 v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mac_f32_e32 v6, s20, v0 ; 3E0C0014 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mac_f32_e32 v4, s14, v2 ; 3E08040E v_mac_f32_e32 v5, s18, v2 ; 3E0A0412 v_mac_f32_e32 v6, s24, v2 ; 3E0C0418 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s25, v3 ; 3E0C0619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 34, 0, 0, 0, v6, v1, v0, v0 ; F800022F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0..2] DCL CONST[4] DCL CONST[6] DCL TEMP[0..10], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1], IMM[0].xxxx 2: MOV TEMP[2], IMM[0].xxxx 3: MOV TEMP[3].y, IMM[0].xxxx 4: MOV TEMP[3].x, -CONST[0].xxxx 5: BGNLOOP :0 6: FSLT TEMP[4].x, CONST[0].xxxx, TEMP[3].xxxx 7: UIF TEMP[4].xxxx :0 8: BRK 9: ENDIF 10: MOV TEMP[3].y, -CONST[0].yyyy 11: BGNLOOP :0 12: FSLT TEMP[5].x, CONST[0].yyyy, TEMP[3].yyyy 13: UIF TEMP[5].xxxx :0 14: BRK 15: ENDIF 16: ADD TEMP[6].xy, CONST[1].xyyy, TEMP[3].xyyy 17: MAD TEMP[7].xy, TEMP[6].xyyy, CONST[6].xyyy, TEMP[0].xyyy 18: MOV TEMP[8].xy, TEMP[7].xyyy 19: MOV TEMP[8].w, IMM[0].xxxx 20: TXB TEMP[9], TEMP[8], SAMP[1], 2D 21: ADD TEMP[2], TEMP[2], TEMP[9] 22: ADD TEMP[10].x, TEMP[3].yyyy, IMM[0].yyyy 23: MOV TEMP[3].y, TEMP[10].xxxx 24: ENDLOOP :0 25: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 26: ENDLOOP :0 27: MUL TEMP[1].w, TEMP[2], CONST[0].wwww 28: MUL TEMP[0].xy, IN[2].xyyy, CONST[4].xyyy 29: MOV TEMP[0].xy, TEMP[0].xyyy 30: MOV TEMP[0].w, IMM[0].xxxx 31: TXB TEMP[0], TEMP[0], SAMP[0], 2D 32: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[0].wwww 33: MUL TEMP[2].x, TEMP[1].wwww, TEMP[2].xxxx 34: MUL TEMP[2].x, TEMP[2].xxxx, CONST[0].zzzz 35: MOV_SAT TEMP[2].x, TEMP[2].xxxx 36: MAD TEMP[1], CONST[2], TEMP[2].xxxx, TEMP[0] 37: MOV TEMP[0].w, IMM[0].yyyy 38: MOV TEMP[0].xyz, IN[1].xyzx 39: MUL TEMP[0], TEMP[1], TEMP[0] 40: MUL TEMP[1], TEMP[0], IN[1].wwww 41: MAD TEMP[1], IN[0], TEMP[1].wwww, TEMP[1] 42: MOV OUT[0], TEMP[1] 43: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 64) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 68) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 100) %39 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %40 = load <8 x i32>, <8 x i32> addrspace(2)* %39, align 32, !tbaa !0 %41 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %42 = load <4 x i32>, <4 x i32> addrspace(2)* %41, align 16, !tbaa !0 %43 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !tbaa !0 %47 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %48 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %49 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %50 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %51 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %52 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %53 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %54 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %55 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %56 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %57 = fsub float -0.000000e+00, %25 %58 = fsub float -0.000000e+00, %26 br label %LOOP LOOP: ; preds = %IF47, %main_body %temp11.0 = phi float [ 0.000000e+00, %main_body ], [ %temp11.1, %IF47 ] %temp12.0 = phi float [ %57, %main_body ], [ %109, %IF47 ] %59 = fcmp olt float %25, %temp12.0 br i1 %59, label %IF, label %ENDIF IF: ; preds = %LOOP %60 = fmul float %temp11.0, %28 %61 = fmul float %55, %35 %62 = fmul float %56, %36 %63 = bitcast float %61 to i32 %64 = bitcast float %62 to i32 %65 = insertelement <4 x i32> , i32 %63, i32 1 %66 = insertelement <4 x i32> %65, i32 %64, i32 2 %67 = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> %66, <8 x i32> %40, <4 x i32> %42, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = extractelement <4 x float> %67, i32 2 %71 = extractelement <4 x float> %67, i32 3 %72 = fsub float 1.000000e+00, %71 %73 = fmul float %60, %72 %74 = fmul float %73, %27 %75 = call float @llvm.AMDIL.clamp.(float %74, float 0.000000e+00, float 1.000000e+00) %76 = fmul float %31, %75 %77 = fadd float %76, %68 %78 = fmul float %32, %75 %79 = fadd float %78, %69 %80 = fmul float %33, %75 %81 = fadd float %80, %70 %82 = fmul float %34, %75 %83 = fadd float %82, %71 %84 = fmul float %77, %51 %85 = fmul float %79, %52 %86 = fmul float %81, %53 %87 = fmul float %84, %54 %88 = fmul float %85, %54 %89 = fmul float %86, %54 %90 = fmul float %83, %54 %91 = fmul float %47, %90 %92 = fadd float %91, %87 %93 = fmul float %48, %90 %94 = fadd float %93, %88 %95 = fmul float %49, %90 %96 = fadd float %95, %89 %97 = fmul float %50, %90 %98 = fadd float %97, %90 %99 = call i32 @llvm.SI.packf16(float %92, float %94) %100 = bitcast i32 %99 to float %101 = call i32 @llvm.SI.packf16(float %96, float %98) %102 = bitcast i32 %101 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %100, float %102, float %100, float %102) ret void ENDIF: ; preds = %LOOP %103 = fadd float %29, %temp12.0 %104 = fmul float %103, %37 %105 = fadd float %104, %55 %106 = bitcast float %105 to i32 %107 = insertelement <4 x i32> , i32 %106, i32 1 br label %LOOP45 LOOP45: ; preds = %ENDIF46, %ENDIF %temp11.1 = phi float [ %temp11.0, %ENDIF ], [ %117, %ENDIF46 ] %temp13.0 = phi float [ %58, %ENDIF ], [ %118, %ENDIF46 ] %108 = fcmp olt float %26, %temp13.0 br i1 %108, label %IF47, label %ENDIF46 IF47: ; preds = %LOOP45 %109 = fadd float %temp12.0, 1.000000e+00 br label %LOOP ENDIF46: ; preds = %LOOP45 %110 = fadd float %30, %temp13.0 %111 = fmul float %110, %38 %112 = fadd float %111, %56 %113 = bitcast float %112 to i32 %114 = insertelement <4 x i32> %107, i32 %113, i32 2 %115 = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> %114, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %116 = extractelement <4 x float> %115, i32 3 %117 = fadd float %temp11.1, %116 %118 = fadd float %temp13.0, 1.000000e+00 br label %LOOP45 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[32:35], s[2:3], 0x0 ; C0900300 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 2, 0, [m0] ; C8080200 v_interp_p2_f32 v2, [v2], v1, 2, 0, [m0] ; C8090201 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[32:35], 0x0 ; C2042100 s_buffer_load_dword s9, s[32:35], 0x1 ; C204A101 s_buffer_load_dword s3, s[32:35], 0x2 ; C201A102 s_buffer_load_dword s7, s[32:35], 0x3 ; C203A103 s_buffer_load_dword s28, s[32:35], 0x4 ; C20E2104 s_buffer_load_dword s29, s[32:35], 0x5 ; C20EA105 s_buffer_load_dword s6, s[32:35], 0x8 ; C2032108 s_buffer_load_dword s2, s[32:35], 0x9 ; C2012109 s_buffer_load_dword s1, s[32:35], 0xa ; C200A10A s_buffer_load_dword s0, s[32:35], 0xb ; C200210B s_buffer_load_dword s10, s[32:35], 0x10 ; C2052110 s_buffer_load_dword s11, s[32:35], 0x11 ; C205A111 s_buffer_load_dword s30, s[32:35], 0x18 ; C20F2118 s_buffer_load_dword s31, s[32:35], 0x19 ; C20FA119 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900 v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901 v_mov_b32_e32 v1, 0x80000000 ; 7E0202FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v11, s8, v1 ; 3A160208 v_xor_b32_e32 v12, s9, v1 ; 3A180209 v_mov_b32_e32 v13, 0 ; 7E1A0280 s_mov_b64 s[4:5], 0 ; BE840480 v_mov_b32_e32 v1, v13 ; 7E02030D v_cmp_nlt_f32_e32 vcc, s8, v11 ; 7C1C1608 s_and_saveexec_b64 s[40:41], vcc ; BEA8246A s_xor_b64 s[40:41], exec, s[40:41] ; 89A8287E s_cbranch_execz BB0_4 ; BF880000 v_add_f32_e32 v13, s28, v11 ; 061A161C v_mad_f32 v15, s30, v13, v10 ; D282000F 042A1A1E v_mov_b32_e32 v14, 0 ; 7E1C0280 s_mov_b64 s[42:43], 0 ; BEAA0480 v_mov_b32_e32 v16, v1 ; 7E200301 v_mov_b32_e32 v17, v12 ; 7E22030C v_mov_b32_e32 v13, v16 ; 7E1A0310 v_cmp_nlt_f32_e32 vcc, s9, v17 ; 7C1C2209 s_and_saveexec_b64 s[44:45], vcc ; BEAC246A s_xor_b64 s[44:45], exec, s[44:45] ; 89AC2C7E v_add_f32_e32 v16, s29, v17 ; 0620221D v_mad_f32 v16, s31, v16, v0 ; D2820010 0402201F image_sample_b v16, 8, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[20:27], s[36:39] ; F0940800 0125100E s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v16, v16, v13 ; 06201B10 v_add_f32_e32 v17, 1.0, v17 ; 062222F2 s_or_b64 exec, exec, s[44:45] ; 88FE2C7E s_or_b64 s[42:43], s[44:45], s[42:43] ; 88AA2A2C s_andn2_b64 exec, exec, s[42:43] ; 8AFE2A7E s_cbranch_execnz BB0_5 ; BF890000 s_or_b64 exec, exec, s[42:43] ; 88FE2A7E v_add_f32_e32 v11, 1.0, v11 ; 061616F2 s_or_b64 exec, exec, s[40:41] ; 88FE287E s_or_b64 s[4:5], s[40:41], s[4:5] ; 88840428 s_andn2_b64 exec, exec, s[4:5] ; 8AFE047E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_mul_f32_e32 v1, s7, v1 ; 10020207 v_mul_f32_e32 v11, s10, v10 ; 1016140A v_mul_f32_e32 v12, s11, v0 ; 1018000B v_mov_b32_e32 v10, 0 ; 7E140280 image_sample_b v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[12:19], s[32:35] ; F0940F00 01030A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, -v13, v1, v1 ; D2820000 2406030D v_mul_f32_e32 v0, s3, v0 ; 10000003 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mad_f32 v1, s6, v0, v10 ; D2820001 042A0006 v_mad_f32 v10, s2, v0, v11 ; D282000A 042E0002 v_mad_f32 v11, s1, v0, v12 ; D282000B 04320001 v_mac_f32_e32 v13, s0, v0 ; 3E1A0000 v_mul_f32_e32 v0, v6, v1 ; 10000306 v_mul_f32_e32 v1, v7, v10 ; 10021507 v_mul_f32_e32 v6, v8, v11 ; 100C1708 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v9, v13 ; 100E1B09 v_mac_f32_e32 v0, v7, v4 ; 3E000907 v_mac_f32_e32 v1, v7, v3 ; 3E020707 v_mac_f32_e32 v6, v7, v2 ; 3E0C0507 v_mac_f32_e32 v7, v7, v5 ; 3E0E0B07 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 20 Code Size: 476 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL CONST[0..47] DCL TEMP[0..1], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 2.0000, 0.1000} IMM[1] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MAD TEMP[1].x, IN[1].xxxx, IMM[0].zzzz, IMM[0].wwww 2: F2I TEMP[1].x, TEMP[1].xxxx 3: UARL ADDR[0].x, TEMP[1].xxxx 4: UARL ADDR[0].x, TEMP[1].xxxx 5: DP4 TEMP[0].x, IN[0], CONST[ADDR[0].x] 6: MAD TEMP[1].x, IN[1].xxxx, IMM[0].zzzz, IMM[1].xxxx 7: F2I TEMP[1].x, TEMP[1].xxxx 8: UARL ADDR[0].x, TEMP[1].xxxx 9: DP4 TEMP[1].x, IN[0], CONST[ADDR[0].x] 10: MOV TEMP[0].y, TEMP[1].xxxx 11: MOV OUT[0], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 %16 = add i32 %5, %8 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %16) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = add i32 %5, %8 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = fmul float %26, 2.000000e+00 %28 = fadd float %27, 0x3FB99999A0000000 %29 = fptosi float %28 to i32 %30 = shl i32 %29, 4 %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %30) %32 = shl i32 %29, 4 %33 = or i32 %32, 4 %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %33) %35 = shl i32 %29, 4 %36 = or i32 %35, 8 %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %36) %38 = shl i32 %29, 4 %39 = or i32 %38, 12 %40 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %39) %41 = fmul float %18, %31 %42 = fmul float %19, %34 %43 = fadd float %41, %42 %44 = fmul float %20, %37 %45 = fadd float %43, %44 %46 = fmul float %21, %40 %47 = fadd float %45, %46 %48 = fmul float %26, 2.000000e+00 %49 = fadd float %48, 0x3FF19999A0000000 %50 = fptosi float %49 to i32 %51 = shl i32 %50, 4 %52 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %51) %53 = shl i32 %50, 4 %54 = or i32 %53, 4 %55 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %54) %56 = shl i32 %50, 4 %57 = or i32 %56, 8 %58 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %57) %59 = shl i32 %50, 4 %60 = or i32 %59, 12 %61 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %60) %62 = fmul float %18, %52 %63 = fmul float %19, %55 %64 = fadd float %62, %63 %65 = fmul float %20, %58 %66 = fadd float %64, %65 %67 = fmul float %21, %61 %68 = fadd float %66, %67 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %47, float %68, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_waitcnt vmcnt(0) ; BF8C0770 v_madak_f32_e32 v0, 2.0, v6, 0x3dcccccd ; 42000CF4 3DCCCCCD v_madak_f32_e32 v6, 2.0, v6, 0x3f8ccccd ; 420C0CF4 3F8CCCCD v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v6, v6 ; 7E0C1106 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v6, 4, v6 ; 340C0C84 buffer_load_dword v7, v0, s[0:3], 0 offen ; E0301000 80000700 v_or_b32_e32 v8, 4, v0 ; 38100084 v_or_b32_e32 v9, 8, v0 ; 38120088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v10, v6, s[0:3], 0 offen ; E0301000 80000A06 v_or_b32_e32 v11, 4, v6 ; 38160C84 buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 v_or_b32_e32 v12, 8, v6 ; 38180C88 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B v_or_b32_e32 v6, 12, v6 ; 380C0C8C buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v8, v8, v3 ; 10100708 v_mac_f32_e32 v8, v7, v2 ; 3E100507 s_waitcnt vmcnt(4) ; BF8C0774 v_mul_f32_e32 v3, v11, v3 ; 1006070B v_mac_f32_e32 v3, v10, v2 ; 3E06050A s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v8, v9, v4 ; 3E100909 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v3, v12, v4 ; 3E06090C s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v8, v0, v5 ; 3E100B00 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v3, v6, v5 ; 3E060B06 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v8, v3, v0, v1 ; F80008CF 01000308 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 236 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL OUT[0], COLOR DCL CONST[0] 0: MOV OUT[0], CONST[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %29 = call i32 @llvm.SI.packf16(float %25, float %26) %30 = bitcast i32 %29 to float %31 = call i32 @llvm.SI.packf16(float %27, float %28) %32 = bitcast i32 %31 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %30, float %32, float %30, float %32) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_cvt_pkrtz_f16_f32_e32 v0, s4, v0 ; 5E000004 v_mov_b32_e32 v1, s0 ; 7E020200 v_cvt_pkrtz_f16_f32_e32 v1, s6, v1 ; 5E020206 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[0] 2: DP4 TEMP[1].x, IN[0], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = add i32 %5, %8 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = extractelement <4 x float> %25, i32 3 %30 = fmul float %26, %14 %31 = fmul float %27, %15 %32 = fadd float %30, %31 %33 = fmul float %28, %16 %34 = fadd float %32, %33 %35 = fmul float %29, %17 %36 = fadd float %34, %35 %37 = fmul float %26, %18 %38 = fmul float %27, %19 %39 = fadd float %37, %38 %40 = fmul float %28, %20 %41 = fadd float %39, %40 %42 = fmul float %29, %21 %43 = fadd float %41, %42 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %36, float %43, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s8, s[0:3], 0x2 ; C2040102 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x3 ; C2050103 s_buffer_load_dword s0, s[0:3], 0x7 ; C2000107 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 v_mul_f32_e32 v1, s5, v1 ; 10020205 v_mac_f32_e32 v4, s6, v0 ; 3E080006 v_mac_f32_e32 v1, s7, v0 ; 3E020007 v_mac_f32_e32 v4, s8, v2 ; 3E080408 v_mac_f32_e32 v1, s9, v2 ; 3E020409 v_mac_f32_e32 v4, s10, v3 ; 3E08060A v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 12, 0, 1, 0, v4, v1, v2, v0 ; F80008CF 00020104 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 116 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL OUT[0], COLOR DCL CONST[0] 0: MOV OUT[0], CONST[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %29 = call i32 @llvm.SI.packf16(float %25, float %26) %30 = bitcast i32 %29 to float %31 = call i32 @llvm.SI.packf16(float %27, float %28) %32 = bitcast i32 %31 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %30, float %32, float %30, float %32) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_cvt_pkrtz_f16_f32_e32 v0, s4, v0 ; 5E000004 v_mov_b32_e32 v1, s0 ; 7E020200 v_cvt_pkrtz_f16_f32_e32 v1, s6, v1 ; 5E020206 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[0] 2: DP4 TEMP[1].x, IN[2], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV OUT[2], IN[1] 5: MOV OUT[1], IN[0] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = add i32 %5, %8 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = extractelement <4 x float> %25, i32 3 %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %14 %47 = fmul float %43, %15 %48 = fadd float %46, %47 %49 = fmul float %44, %16 %50 = fadd float %48, %49 %51 = fmul float %45, %17 %52 = fadd float %50, %51 %53 = fmul float %42, %18 %54 = fmul float %43, %19 %55 = fadd float %53, %54 %56 = fmul float %44, %20 %57 = fadd float %55, %56 %58 = fmul float %45, %21 %59 = fadd float %57, %58 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %26, float %27, float %28, float %29) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %34, float %35, float %36, float %37) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %52, float %59, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103 s_buffer_load_dword s20, s[0:3], 0x4 ; C20A0104 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s0, s[0:3], 0x7 ; C2000107 s_waitcnt vmcnt(2) ; BF8C0772 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 33, 0, 0, 0, v6, v7, v8, v9 ; F800021F 09080706 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s9, v11 ; 10001609 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s4, v11 ; 10041604 v_mac_f32_e32 v0, s8, v10 ; 3E001408 v_mac_f32_e32 v2, s20, v10 ; 3E041414 v_mac_f32_e32 v0, s10, v12 ; 3E00180A v_mac_f32_e32 v2, s5, v12 ; 3E041805 v_mac_f32_e32 v0, s11, v13 ; 3E001A0B v_mac_f32_e32 v2, s0, v13 ; 3E041A00 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 12, 0, 1, 0, v0, v2, v3, v1 ; F80008CF 01030200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 164 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MUL TEMP[1].x, IN[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %24 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %25 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %27 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %28 = fmul float %26, %27 %29 = call i32 @llvm.SI.packf16(float %23, float %24) %30 = bitcast i32 %29 to float %31 = call i32 @llvm.SI.packf16(float %25, float %28) %32 = bitcast i32 %31 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %30, float %32, float %30, float %32) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 4.0000, 2.1000} IMM[1] FLT32 { 3.1000, 1.1000, 0.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MAD TEMP[1].x, IN[3].xxxx, IMM[0].zzzz, IMM[0].wwww 2: F2I TEMP[1].x, TEMP[1].xxxx 3: UARL ADDR[0].x, TEMP[1].xxxx 4: UARL ADDR[0].x, TEMP[1].xxxx 5: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 6: MAD TEMP[1].x, IN[3].xxxx, IMM[0].zzzz, IMM[1].xxxx 7: F2I TEMP[1].x, TEMP[1].xxxx 8: UARL ADDR[0].x, TEMP[1].xxxx 9: DP4 TEMP[1].x, IN[2], CONST[ADDR[0].x] 10: MOV TEMP[0].y, TEMP[1].xxxx 11: MAD TEMP[1].x, IN[3].xxxx, IMM[0].zzzz, IMM[1].yyyy 12: F2I TEMP[1].x, TEMP[1].xxxx 13: MAD TEMP[2].x, IN[3].xxxx, IMM[0].zzzz, IMM[1].zzzz 14: F2I TEMP[2].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: UARL ADDR[0].x, TEMP[2].xxxx 17: MOV TEMP[2], CONST[ADDR[0].x] 18: UARL ADDR[0].x, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: MAD TEMP[1], IN[0], CONST[ADDR[0].x], TEMP[2] 21: MOV TEMP[2].xy, IN[1].xyxx 22: MOV OUT[1], TEMP[1] 23: MOV OUT[0], TEMP[0] 24: MOV OUT[2], TEMP[2] 25: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 %16 = add i32 %5, %8 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %16) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = add i32 %5, %8 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 %30 = add i32 %5, %8 %31 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %29, i32 0, i32 %30) %32 = extractelement <4 x float> %31, i32 0 %33 = extractelement <4 x float> %31, i32 1 %34 = extractelement <4 x float> %31, i32 2 %35 = extractelement <4 x float> %31, i32 3 %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 %38 = add i32 %5, %8 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = fmul float %40, 4.000000e+00 %42 = fadd float %41, 0x4000CCCCC0000000 %43 = fptosi float %42 to i32 %44 = shl i32 %43, 4 %45 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %44) %46 = shl i32 %43, 4 %47 = or i32 %46, 4 %48 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %47) %49 = shl i32 %43, 4 %50 = or i32 %49, 8 %51 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %50) %52 = shl i32 %43, 4 %53 = or i32 %52, 12 %54 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %53) %55 = fmul float %32, %45 %56 = fmul float %33, %48 %57 = fadd float %55, %56 %58 = fmul float %34, %51 %59 = fadd float %57, %58 %60 = fmul float %35, %54 %61 = fadd float %59, %60 %62 = fmul float %40, 4.000000e+00 %63 = fadd float %62, 0x4008CCCCC0000000 %64 = fptosi float %63 to i32 %65 = shl i32 %64, 4 %66 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %65) %67 = shl i32 %64, 4 %68 = or i32 %67, 4 %69 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %68) %70 = shl i32 %64, 4 %71 = or i32 %70, 8 %72 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %71) %73 = shl i32 %64, 4 %74 = or i32 %73, 12 %75 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %74) %76 = fmul float %32, %66 %77 = fmul float %33, %69 %78 = fadd float %76, %77 %79 = fmul float %34, %72 %80 = fadd float %78, %79 %81 = fmul float %35, %75 %82 = fadd float %80, %81 %83 = fmul float %40, 4.000000e+00 %84 = fadd float %83, 0x3FF19999A0000000 %85 = fptosi float %84 to i32 %86 = fmul float %40, 4.000000e+00 %87 = fadd float %86, 0x3FB99999A0000000 %88 = fptosi float %87 to i32 %89 = shl i32 %88, 4 %90 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %89) %91 = shl i32 %88, 4 %92 = or i32 %91, 4 %93 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %92) %94 = shl i32 %88, 4 %95 = or i32 %94, 8 %96 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %95) %97 = shl i32 %88, 4 %98 = or i32 %97, 12 %99 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %98) %100 = shl i32 %85, 4 %101 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %100) %102 = fmul float %18, %101 %103 = fadd float %102, %90 %104 = shl i32 %85, 4 %105 = or i32 %104, 4 %106 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %105) %107 = fmul float %19, %106 %108 = fadd float %107, %93 %109 = shl i32 %85, 4 %110 = or i32 %109, 8 %111 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %110) %112 = fmul float %20, %111 %113 = fadd float %112, %96 %114 = shl i32 %85, 4 %115 = or i32 %114, 12 %116 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %115) %117 = fmul float %21, %116 %118 = fadd float %117, %99 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %103, float %108, float %113, float %118) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %26, float %27, float %96, float %99) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %61, float %82, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C v_mov_b32_e32 v1, 1.0 ; 7E0202F2 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800 buffer_load_format_xyzw v[12:15], v0, s[20:23], 0 idxen ; E00C2000 80050C00 s_waitcnt vmcnt(0) ; BF8C0770 v_madak_f32_e32 v0, 4.0, v12, 0x40066666 ; 420018F6 40066666 v_madak_f32_e32 v13, 4.0, v12, 0x40466666 ; 421A18F6 40466666 v_madak_f32_e32 v14, 4.0, v12, 0x3f8ccccd ; 421C18F6 3F8CCCCD v_madak_f32_e32 v12, 4.0, v12, 0x3dcccccd ; 421818F6 3DCCCCCD v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v13, v13 ; 7E1A110D v_cvt_i32_f32_e32 v12, v12 ; 7E18110C v_cvt_i32_f32_e32 v14, v14 ; 7E1C110E v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v13, 4, v13 ; 341A1A84 v_lshlrev_b32_e32 v12, 4, v12 ; 34181884 v_lshlrev_b32_e32 v14, 4, v14 ; 341C1C84 buffer_load_dword v15, v0, s[0:3], 0 offen ; E0301000 80000F00 v_or_b32_e32 v16, 4, v0 ; 38200084 v_or_b32_e32 v17, 8, v0 ; 38220088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v18, v12, s[0:3], 0 offen ; E0301000 8000120C v_or_b32_e32 v19, 4, v12 ; 38261884 v_or_b32_e32 v20, 8, v12 ; 38281888 v_or_b32_e32 v12, 12, v12 ; 3818188C buffer_load_dword v21, v14, s[0:3], 0 offen ; E0301000 8000150E v_or_b32_e32 v22, 4, v14 ; 382C1C84 v_or_b32_e32 v23, 8, v14 ; 382E1C88 v_or_b32_e32 v14, 12, v14 ; 381C1C8C buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 buffer_load_dword v14, v14, s[0:3], 0 offen ; E0301000 80000E0E buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_or_b32_e32 v24, 4, v13 ; 38301A84 buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 buffer_load_dword v25, v13, s[0:3], 0 offen ; E0301000 8000190D buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 v_or_b32_e32 v26, 8, v13 ; 38341A88 buffer_load_dword v26, v26, s[0:3], 0 offen ; E0301000 80001A1A v_or_b32_e32 v13, 12, v13 ; 381A1A8C buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D s_waitcnt vmcnt(13) ; BF8C077D v_mac_f32_e32 v18, v21, v2 ; 3E240515 s_waitcnt vmcnt(9) ; BF8C0779 v_mac_f32_e32 v19, v22, v3 ; 3E260716 s_waitcnt vmcnt(8) ; BF8C0778 v_mad_f32 v2, v23, v4, v20 ; D2820002 04520917 s_waitcnt vmcnt(7) ; BF8C0777 v_mad_f32 v3, v14, v5, v12 ; D2820003 04320B0E s_waitcnt vmcnt(6) ; BF8C0776 v_mul_f32_e32 v4, v16, v9 ; 10081310 v_mac_f32_e32 v4, v15, v8 ; 3E08110F s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v5, v24, v9 ; 100A1318 s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v5, v25, v8 ; 3E0A1119 exp 15, 32, 0, 0, 0, v18, v19, v2, v3 ; F800020F 03021312 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v4, v17, v10 ; 3E081511 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v5, v26, v10 ; 3E0A151A exp 15, 33, 0, 0, 0, v6, v7, v20, v12 ; F800021F 0C140706 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v4, v0, v11 ; 3E081700 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v5, v13, v11 ; 3E0A170D v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v4, v5, v0, v1 ; F80008CF 01000504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 28 Code Size: 444 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MOV TEMP[1].xy, IN[1].xyyy 2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 3: MUL TEMP[1].x, IN[0].wwww, TEMP[1].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %33 = bitcast float %31 to i32 %34 = bitcast float %32 to i32 %35 = insertelement <2 x i32> undef, i32 %33, i32 0 %36 = insertelement <2 x i32> %35, i32 %34, i32 1 %37 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %36, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %38 = extractelement <4 x float> %37, i32 3 %39 = fmul float %30, %38 %40 = call i32 @llvm.SI.packf16(float %27, float %28) %41 = bitcast i32 %40 to float %42 = call i32 @llvm.SI.packf16(float %29, float %39) %43 = bitcast i32 %42 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %41, float %43, float %41, float %43) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800800 00020006 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 104 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 64) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 68) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 72) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 76) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 92) %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %22 %47 = fmul float %43, %23 %48 = fadd float %46, %47 %49 = fmul float %44, %24 %50 = fadd float %48, %49 %51 = fmul float %45, %25 %52 = fadd float %50, %51 %53 = fmul float %42, %26 %54 = fmul float %43, %27 %55 = fadd float %53, %54 %56 = fmul float %44, %28 %57 = fadd float %55, %56 %58 = fmul float %45, %29 %59 = fadd float %57, %58 %60 = fmul float %42, %30 %61 = fmul float %43, %31 %62 = fadd float %60, %61 %63 = fmul float %44, %32 %64 = fadd float %62, %63 %65 = fmul float %45, %33 %66 = fadd float %64, %65 %67 = fmul float %42, %34 %68 = fmul float %43, %35 %69 = fadd float %67, %68 %70 = fmul float %44, %36 %71 = fadd float %69, %70 %72 = fmul float %45, %37 %73 = fadd float %71, %72 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %66, float %73, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %52, float %59, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mov_b32_e32 v6, s6 ; 7E0C0206 v_mov_b32_e32 v7, s7 ; 7E0E0207 exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, s8 ; 7E080208 v_mov_b32_e32 v5, s9 ; 7E0A0209 v_mov_b32_e32 v6, s10 ; 7E0C020A v_mov_b32_e32 v7, s11 ; 7E0E020B exp 15, 33, 0, 0, 0, v4, v5, v6, v7 ; F800021F 07060504 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mul_f32_e32 v5, s17, v1 ; 100A0211 v_mul_f32_e32 v6, s21, v1 ; 100C0215 v_mul_f32_e32 v1, s23, v1 ; 10020217 v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mac_f32_e32 v6, s20, v0 ; 3E0C0014 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mac_f32_e32 v4, s14, v2 ; 3E08040E v_mac_f32_e32 v5, s18, v2 ; 3E0A0412 v_mac_f32_e32 v6, s24, v2 ; 3E0C0418 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s25, v3 ; 3E0C0619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 34, 0, 0, 0, v6, v1, v0, v0 ; F800022F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].w, IMM[0].xxxx 1: MOV TEMP[0].xyz, IN[1].xyzx 2: MOV TEMP[1].xy, IN[2].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: MUL TEMP[0], TEMP[1], TEMP[0] 5: MUL TEMP[0], TEMP[0], IN[1].wwww 6: MAD TEMP[0], IN[0], TEMP[0].wwww, TEMP[0] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %36 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %40, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %31 %47 = fmul float %43, %32 %48 = fmul float %44, %33 %49 = fmul float %46, %34 %50 = fmul float %47, %34 %51 = fmul float %48, %34 %52 = fmul float %45, %34 %53 = fmul float %27, %52 %54 = fadd float %53, %49 %55 = fmul float %28, %52 %56 = fadd float %55, %50 %57 = fmul float %29, %52 %58 = fadd float %57, %51 %59 = fmul float %30, %52 %60 = fadd float %59, %52 %61 = call i32 @llvm.SI.packf16(float %54, float %56) %62 = bitcast i32 %61 to float %63 = call i32 @llvm.SI.packf16(float %58, float %60) %64 = bitcast i32 %63 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %62, float %64, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[8:15], s[0:3] ; F0800F00 00020A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v6, v10 ; 10001506 v_mul_f32_e32 v1, v7, v11 ; 10021707 v_mul_f32_e32 v6, v8, v12 ; 100C1908 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v9, v13 ; 100E1B09 v_mac_f32_e32 v0, v7, v2 ; 3E000507 v_mac_f32_e32 v1, v7, v3 ; 3E020707 v_mac_f32_e32 v6, v7, v4 ; 3E0C0907 v_mac_f32_e32 v7, v7, v5 ; 3E0E0B07 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 176 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..143] DCL TEMP[0..3], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 1530.0599, 2.1000} IMM[1] FLT32 { 3.1000, 4.1000, 5.1000, 0.1000} IMM[2] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MAD TEMP[1].x, IMM[0].zzzz, IN[0].zzzz, IMM[0].wwww 2: F2I TEMP[1].x, TEMP[1].xxxx 3: UARL ADDR[0].x, TEMP[1].xxxx 4: UARL ADDR[0].x, TEMP[1].xxxx 5: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 6: MAD TEMP[1].x, IMM[0].zzzz, IN[0].zzzz, IMM[1].xxxx 7: F2I TEMP[1].x, TEMP[1].xxxx 8: UARL ADDR[0].x, TEMP[1].xxxx 9: DP4 TEMP[1].x, IN[1], CONST[ADDR[0].x] 10: MOV TEMP[0].y, TEMP[1].xxxx 11: MAD TEMP[1].x, IMM[0].zzzz, IN[0].zzzz, IMM[1].yyyy 12: F2I TEMP[1].x, TEMP[1].xxxx 13: UARL ADDR[0].x, TEMP[1].xxxx 14: UARL ADDR[0].x, TEMP[1].xxxx 15: DP4 TEMP[1].x, IN[1], CONST[ADDR[0].x] 16: MAD TEMP[2].x, IMM[0].zzzz, IN[0].zzzz, IMM[1].zzzz 17: F2I TEMP[2].x, TEMP[2].xxxx 18: UARL ADDR[0].x, TEMP[2].xxxx 19: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 20: MOV TEMP[1].y, TEMP[2].xxxx 21: MAD TEMP[2].x, IMM[0].zzzz, IN[0].zzzz, IMM[1].wwww 22: F2I TEMP[2].x, TEMP[2].xxxx 23: UARL ADDR[0].x, TEMP[2].xxxx 24: MOV TEMP[2], CONST[ADDR[0].x] 25: MAD TEMP[3].x, IMM[0].zzzz, IN[0].zzzz, IMM[2].xxxx 26: F2I TEMP[3].x, TEMP[3].xxxx 27: UARL ADDR[0].x, TEMP[3].xxxx 28: MOV TEMP[3], CONST[ADDR[0].x] 29: MOV TEMP[1].xy, TEMP[1].xyxx 30: MOV OUT[1], IN[0] 31: MOV OUT[2], TEMP[2] 32: MOV OUT[3], TEMP[3] 33: MOV OUT[0], TEMP[0] 34: MOV OUT[4], TEMP[1] 35: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 %16 = add i32 %5, %8 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %16) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = add i32 %5, %8 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = extractelement <4 x float> %25, i32 3 %30 = fmul float %20, 0x4097E83D60000000 %31 = fadd float %30, 0x4000CCCCC0000000 %32 = fptosi float %31 to i32 %33 = shl i32 %32, 4 %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %33) %35 = shl i32 %32, 4 %36 = or i32 %35, 4 %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %36) %38 = shl i32 %32, 4 %39 = or i32 %38, 8 %40 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %39) %41 = shl i32 %32, 4 %42 = or i32 %41, 12 %43 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %42) %44 = fmul float %26, %34 %45 = fmul float %27, %37 %46 = fadd float %44, %45 %47 = fmul float %28, %40 %48 = fadd float %46, %47 %49 = fmul float %29, %43 %50 = fadd float %48, %49 %51 = fmul float %20, 0x4097E83D60000000 %52 = fadd float %51, 0x4008CCCCC0000000 %53 = fptosi float %52 to i32 %54 = shl i32 %53, 4 %55 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %54) %56 = shl i32 %53, 4 %57 = or i32 %56, 4 %58 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %57) %59 = shl i32 %53, 4 %60 = or i32 %59, 8 %61 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %60) %62 = shl i32 %53, 4 %63 = or i32 %62, 12 %64 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %63) %65 = fmul float %26, %55 %66 = fmul float %27, %58 %67 = fadd float %65, %66 %68 = fmul float %28, %61 %69 = fadd float %67, %68 %70 = fmul float %29, %64 %71 = fadd float %69, %70 %72 = fmul float %20, 0x4097E83D60000000 %73 = fadd float %72, 0x4010666660000000 %74 = fptosi float %73 to i32 %75 = shl i32 %74, 4 %76 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %75) %77 = shl i32 %74, 4 %78 = or i32 %77, 4 %79 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %78) %80 = shl i32 %74, 4 %81 = or i32 %80, 8 %82 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %81) %83 = shl i32 %74, 4 %84 = or i32 %83, 12 %85 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %84) %86 = fmul float %26, %76 %87 = fmul float %27, %79 %88 = fadd float %86, %87 %89 = fmul float %28, %82 %90 = fadd float %88, %89 %91 = fmul float %29, %85 %92 = fadd float %90, %91 %93 = fmul float %20, 0x4097E83D60000000 %94 = fadd float %93, 0x4014666660000000 %95 = fptosi float %94 to i32 %96 = shl i32 %95, 4 %97 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %96) %98 = shl i32 %95, 4 %99 = or i32 %98, 4 %100 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %99) %101 = shl i32 %95, 4 %102 = or i32 %101, 8 %103 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %102) %104 = shl i32 %95, 4 %105 = or i32 %104, 12 %106 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %105) %107 = fmul float %26, %97 %108 = fmul float %27, %100 %109 = fadd float %107, %108 %110 = fmul float %28, %103 %111 = fadd float %109, %110 %112 = fmul float %29, %106 %113 = fadd float %111, %112 %114 = fmul float %20, 0x4097E83D60000000 %115 = fadd float %114, 0x3FB99999A0000000 %116 = fptosi float %115 to i32 %117 = shl i32 %116, 4 %118 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %117) %119 = shl i32 %116, 4 %120 = or i32 %119, 4 %121 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %120) %122 = shl i32 %116, 4 %123 = or i32 %122, 8 %124 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %123) %125 = shl i32 %116, 4 %126 = or i32 %125, 12 %127 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %126) %128 = fmul float %20, 0x4097E83D60000000 %129 = fadd float %128, 0x3FF19999A0000000 %130 = fptosi float %129 to i32 %131 = shl i32 %130, 4 %132 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %131) %133 = shl i32 %130, 4 %134 = or i32 %133, 4 %135 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %134) %136 = shl i32 %130, 4 %137 = or i32 %136, 8 %138 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %137) %139 = shl i32 %130, 4 %140 = or i32 %139, 12 %141 = call float @llvm.SI.load.const(<16 x i8> %13, i32 %140) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %118, float %121, float %124, float %127) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %132, float %135, float %138, float %141) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %92, float %113, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %50, float %71, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_mov_b32_e32 v1, 0x44bf41eb ; 7E0202FF 44BF41EB v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_waitcnt vmcnt(1) ; BF8C0771 v_madak_f32_e32 v0, v4, v1, 0x40833333 ; 42000304 40833333 v_madak_f32_e32 v10, v4, v1, 0x40a33333 ; 42140304 40A33333 v_madak_f32_e32 v11, v4, v1, 0x40066666 ; 42160304 40066666 v_madak_f32_e32 v12, v4, v1, 0x40466666 ; 42180304 40466666 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v10, v10 ; 7E14110A v_madak_f32_e32 v13, v4, v1, 0x3dcccccd ; 421A0304 3DCCCCCD v_madak_f32_e32 v1, v4, v1, 0x3f8ccccd ; 42020304 3F8CCCCD v_cvt_i32_f32_e32 v11, v11 ; 7E16110B v_cvt_i32_f32_e32 v12, v12 ; 7E18110C v_cvt_i32_f32_e32 v13, v13 ; 7E1A110D v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v11, 4, v11 ; 34161684 v_lshlrev_b32_e32 v12, 4, v12 ; 34181884 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v10, 4, v10 ; 34141484 v_lshlrev_b32_e32 v13, 4, v13 ; 341A1A84 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 buffer_load_dword v14, v11, s[0:3], 0 offen ; E0301000 80000E0B v_or_b32_e32 v15, 4, v11 ; 381E1684 v_or_b32_e32 v16, 8, v11 ; 38201688 v_or_b32_e32 v11, 12, v11 ; 3816168C buffer_load_dword v17, v12, s[0:3], 0 offen ; E0301000 8000110C v_or_b32_e32 v18, 4, v12 ; 38241884 v_or_b32_e32 v19, 8, v12 ; 38261888 v_or_b32_e32 v12, 12, v12 ; 3818188C buffer_load_dword v20, v13, s[0:3], 0 offen ; E0301000 8000140D v_or_b32_e32 v21, 4, v13 ; 382A1A84 v_or_b32_e32 v22, 8, v13 ; 382C1A88 v_or_b32_e32 v13, 12, v13 ; 381A1A8C buffer_load_dword v23, v1, s[0:3], 0 offen ; E0301000 80001701 v_or_b32_e32 v24, 4, v1 ; 38300284 v_or_b32_e32 v25, 8, v1 ; 38320288 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 buffer_load_dword v25, v25, s[0:3], 0 offen ; E0301000 80001919 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 buffer_load_dword v26, v0, s[0:3], 0 offen ; E0301000 80001A00 v_or_b32_e32 v27, 4, v0 ; 38360084 v_or_b32_e32 v28, 8, v0 ; 38380088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v18, v18, s[0:3], 0 offen ; E0301000 80001212 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B v_or_b32_e32 v29, 4, v10 ; 383A1484 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D buffer_load_dword v30, v10, s[0:3], 0 offen ; E0301000 80001E0A v_or_b32_e32 v31, 8, v10 ; 383E1488 v_or_b32_e32 v10, 12, v10 ; 3814148C buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C buffer_load_dword v31, v31, s[0:3], 0 offen ; E0301000 80001F1F buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 s_waitcnt ; BF8C077F exp 15, 33, 0, 0, 0, v20, v21, v22, v13 ; F800021F 0D161514 s_waitcnt vmcnt(13) ; BF8C077D exp 15, 34, 0, 0, 0, v23, v24, v25, v1 ; F800022F 01191817 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, v15, v7 ; 10020F0F v_mac_f32_e32 v1, v14, v6 ; 3E020D0E s_waitcnt vmcnt(11) ; BF8C077B v_mul_f32_e32 v2, v18, v7 ; 10040F12 v_mac_f32_e32 v2, v17, v6 ; 3E040D11 s_waitcnt vmcnt(9) ; BF8C0779 v_mul_f32_e32 v3, v27, v7 ; 10060F1B v_mac_f32_e32 v3, v26, v6 ; 3E060D1A s_waitcnt vmcnt(8) ; BF8C0778 v_mul_f32_e32 v4, v29, v7 ; 10080F1D s_waitcnt vmcnt(7) ; BF8C0777 v_mac_f32_e32 v4, v30, v6 ; 3E080D1E v_mac_f32_e32 v1, v16, v8 ; 3E021110 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 s_waitcnt vmcnt(6) ; BF8C0776 v_mac_f32_e32 v2, v19, v8 ; 3E041113 s_waitcnt vmcnt(5) ; BF8C0775 v_mac_f32_e32 v3, v28, v8 ; 3E06111C s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v4, v31, v8 ; 3E08111F s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v1, v11, v9 ; 3E02130B s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v2, v12, v9 ; 3E04130C s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v3, v0, v9 ; 3E061300 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v4, v10, v9 ; 3E08130A exp 15, 35, 0, 0, 0, v3, v4, v0, v0 ; F800023F 00000403 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v1, v2, v0, v5 ; F80008CF 05000201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 32 Code Size: 584 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[2], IN[1] 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %36 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %37 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %38 = bitcast float %36 to i32 %39 = bitcast float %37 to i32 %40 = insertelement <2 x i32> undef, i32 %38, i32 0 %41 = insertelement <2 x i32> %40, i32 %39, i32 1 %42 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %41, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = fmul float %43, %32 %48 = fadd float %47, %28 %49 = fmul float %44, %33 %50 = fadd float %49, %29 %51 = fmul float %45, %34 %52 = fadd float %51, %30 %53 = fmul float %46, %35 %54 = fadd float %53, %31 %55 = fmul float %54, %27 %56 = call i32 @llvm.SI.packf16(float %48, float %50) %57 = bitcast i32 %56 to float %58 = call i32 @llvm.SI.packf16(float %52, float %55) %59 = bitcast i32 %58 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %57, float %59, float %57, float %59) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v3, v7, v11 ; 3E061707 v_mac_f32_e32 v4, v8, v12 ; 3E081908 v_mac_f32_e32 v5, v9, v13 ; 3E0A1B09 v_mac_f32_e32 v6, v10, v14 ; 3E0C1D0A v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[0] 2: DP4 TEMP[1].x, IN[2], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[2], CONST[2] 5: DP4 TEMP[2].x, IN[2], CONST[3] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[2], IN[1] 9: MOV OUT[1], IN[0] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %8 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = fmul float %50, %14 %55 = fmul float %51, %15 %56 = fadd float %54, %55 %57 = fmul float %52, %16 %58 = fadd float %56, %57 %59 = fmul float %53, %17 %60 = fadd float %58, %59 %61 = fmul float %50, %18 %62 = fmul float %51, %19 %63 = fadd float %61, %62 %64 = fmul float %52, %20 %65 = fadd float %63, %64 %66 = fmul float %53, %21 %67 = fadd float %65, %66 %68 = fmul float %50, %22 %69 = fmul float %51, %23 %70 = fadd float %68, %69 %71 = fmul float %52, %24 %72 = fadd float %70, %71 %73 = fmul float %53, %25 %74 = fadd float %72, %73 %75 = fmul float %50, %26 %76 = fmul float %51, %27 %77 = fadd float %75, %76 %78 = fmul float %52, %28 %79 = fadd float %77, %78 %80 = fmul float %53, %29 %81 = fadd float %79, %80 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %34, float %35, float %36, float %37) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %42, float %43, float %44, float %45) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %74, float %81, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %67, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107 s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108 s_buffer_load_dword s20, s[0:3], 0x9 ; C20A0109 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_buffer_load_dword s5, s[0:3], 0xb ; C202810B s_buffer_load_dword s6, s[0:3], 0xc ; C203010C s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s12, s[0:3], 0xe ; C206010E s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s16, s[0:3], 0x3 ; C2080103 s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(2) ; BF8C0772 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 33, 0, 0, 0, v6, v7, v8, v9 ; F800021F 09080706 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s14, v11 ; 1000160E s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s8, v11 ; 10041608 v_mul_f32_e32 v3, s20, v11 ; 10061614 v_mul_f32_e32 v4, s7, v11 ; 10081607 v_mac_f32_e32 v0, s13, v10 ; 3E00140D v_mac_f32_e32 v2, s17, v10 ; 3E041411 v_mac_f32_e32 v3, s11, v10 ; 3E06140B v_mac_f32_e32 v4, s6, v10 ; 3E081406 v_mac_f32_e32 v0, s15, v12 ; 3E00180F v_mac_f32_e32 v2, s9, v12 ; 3E041809 v_mac_f32_e32 v3, s4, v12 ; 3E061804 v_mac_f32_e32 v4, s12, v12 ; 3E08180C v_mac_f32_e32 v0, s16, v13 ; 3E001A10 v_mac_f32_e32 v2, s10, v13 ; 3E041A0A v_mac_f32_e32 v3, s5, v13 ; 3E061A05 v_mac_f32_e32 v4, s0, v13 ; 3E081A00 exp 15, 34, 0, 0, 0, v3, v4, v0, v0 ; F800022F 00000403 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 12, 0, 1, 0, v0, v2, v3, v1 ; F80008CF 01030200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 240 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[2].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: LRP TEMP[0], IN[1].xxxx, TEMP[0], IN[0] 3: MOV TEMP[1].xyz, TEMP[0].xyzx 4: MUL TEMP[0].x, TEMP[0].wwww, IN[1].wwww 5: MOV TEMP[1].w, TEMP[0].xxxx 6: MOV OUT[0], TEMP[1] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %35 = bitcast float %33 to i32 %36 = bitcast float %34 to i32 %37 = insertelement <2 x i32> undef, i32 %35, i32 0 %38 = insertelement <2 x i32> %37, i32 %36, i32 1 %39 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %38, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = extractelement <4 x float> %39, i32 2 %43 = extractelement <4 x float> %39, i32 3 %44 = fsub float 1.000000e+00, %31 %45 = fmul float %40, %31 %46 = fmul float %27, %44 %47 = fadd float %45, %46 %48 = fsub float 1.000000e+00, %31 %49 = fmul float %41, %31 %50 = fmul float %28, %48 %51 = fadd float %49, %50 %52 = fsub float 1.000000e+00, %31 %53 = fmul float %42, %31 %54 = fmul float %29, %52 %55 = fadd float %53, %54 %56 = fsub float 1.000000e+00, %31 %57 = fmul float %43, %31 %58 = fmul float %30, %56 %59 = fadd float %57, %58 %60 = fmul float %59, %32 %61 = call i32 @llvm.SI.packf16(float %47, float %51) %62 = bitcast i32 %61 to float %63 = call i32 @llvm.SI.packf16(float %55, float %60) %64 = bitcast i32 %63 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %62, float %64, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 3, 1, [m0] ; C81C0700 v_interp_p2_f32 v7, [v7], v1, 3, 1, [m0] ; C81D0701 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800F00 00020808 v_mad_f32 v0, -v6, v2, v2 ; D2820000 240A0506 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, v6, v8 ; 3E001106 v_mad_f32 v1, -v6, v3, v3 ; D2820001 240E0706 v_mac_f32_e32 v1, v6, v9 ; 3E021306 v_mad_f32 v2, -v6, v4, v4 ; D2820002 24120906 v_mac_f32_e32 v2, v6, v10 ; 3E041506 v_mad_f32 v3, -v6, v5, v5 ; D2820003 24160B06 v_mac_f32_e32 v3, v6, v11 ; 3E061706 v_mul_f32_e32 v3, v7, v3 ; 10060707 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 168 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[2] 2: DP4 TEMP[1].x, IN[2], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[2], CONST[4] 5: DP4 TEMP[2].x, IN[2], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[2], IN[1] 9: MOV OUT[3], CONST[0] 10: MOV OUT[1], IN[0] 11: MOV OUT[4], CONST[1] 12: MOV OUT[0], TEMP[0] 13: MOV OUT[5], TEMP[1] 14: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 64) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 68) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 72) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 76) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 92) %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %8 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %8 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = extractelement <4 x float> %57, i32 3 %62 = fmul float %58, %22 %63 = fmul float %59, %23 %64 = fadd float %62, %63 %65 = fmul float %60, %24 %66 = fadd float %64, %65 %67 = fmul float %61, %25 %68 = fadd float %66, %67 %69 = fmul float %58, %26 %70 = fmul float %59, %27 %71 = fadd float %69, %70 %72 = fmul float %60, %28 %73 = fadd float %71, %72 %74 = fmul float %61, %29 %75 = fadd float %73, %74 %76 = fmul float %58, %30 %77 = fmul float %59, %31 %78 = fadd float %76, %77 %79 = fmul float %60, %32 %80 = fadd float %78, %79 %81 = fmul float %61, %33 %82 = fadd float %80, %81 %83 = fmul float %58, %34 %84 = fmul float %59, %35 %85 = fadd float %83, %84 %86 = fmul float %60, %36 %87 = fadd float %85, %86 %88 = fmul float %61, %37 %89 = fadd float %87, %88 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %42, float %43, float %44, float %45) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %50, float %51, float %52, float %53) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %82, float %89, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %75, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_buffer_load_dword s9, s[0:3], 0x10 ; C2048110 s_buffer_load_dword s10, s[0:3], 0x11 ; C2050111 s_buffer_load_dword s11, s[0:3], 0x12 ; C2058112 s_buffer_load_dword s20, s[0:3], 0x13 ; C20A0113 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116 s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117 s_buffer_load_dword s12, s[0:3], 0x5 ; C2060105 s_buffer_load_dword s13, s[0:3], 0x6 ; C2068106 s_buffer_load_dword s14, s[0:3], 0x7 ; C2070107 s_buffer_load_dword s15, s[0:3], 0x8 ; C2078108 s_buffer_load_dword s16, s[0:3], 0x9 ; C2080109 s_buffer_load_dword s17, s[0:3], 0xa ; C208810A s_buffer_load_dword s18, s[0:3], 0xb ; C209010B s_buffer_load_dword s19, s[0:3], 0xc ; C209810C s_buffer_load_dword s21, s[0:3], 0xd ; C20A810D s_buffer_load_dword s22, s[0:3], 0xe ; C20B010E s_buffer_load_dword s23, s[0:3], 0x0 ; C20B8100 s_buffer_load_dword s24, s[0:3], 0x1 ; C20C0101 s_buffer_load_dword s25, s[0:3], 0x2 ; C20C8102 s_buffer_load_dword s26, s[0:3], 0x3 ; C20D0103 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s12 ; 7E00020C v_mov_b32_e32 v14, s23 ; 7E1C0217 v_mov_b32_e32 v15, s24 ; 7E1E0218 v_mov_b32_e32 v16, s25 ; 7E200219 v_mov_b32_e32 v17, s26 ; 7E22021A v_mov_b32_e32 v18, s0 ; 7E240200 s_waitcnt vmcnt(2) ; BF8C0772 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v2, s13 ; 7E04020D s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 33, 0, 0, 0, v6, v7, v8, v9 ; F800021F 09080706 v_mov_b32_e32 v3, s14 ; 7E06020E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, s16, v11 ; 10081610 v_mul_f32_e32 v5, s21, v11 ; 100A1615 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v6, s10, v11 ; 100C160A exp 15, 34, 0, 0, 0, v14, v15, v16, v17 ; F800022F 11100F0E v_mul_f32_e32 v7, s5, v11 ; 100E1605 v_mac_f32_e32 v4, s15, v10 ; 3E08140F v_mac_f32_e32 v5, s19, v10 ; 3E0A1413 v_mac_f32_e32 v6, s9, v10 ; 3E0C1409 v_mac_f32_e32 v7, s4, v10 ; 3E0E1404 v_mac_f32_e32 v4, s17, v12 ; 3E081811 v_mac_f32_e32 v5, s22, v12 ; 3E0A1816 v_mac_f32_e32 v6, s11, v12 ; 3E0C180B v_mac_f32_e32 v7, s6, v12 ; 3E0E1806 v_mac_f32_e32 v4, s18, v13 ; 3E081A12 v_mac_f32_e32 v5, s8, v13 ; 3E0A1A08 v_mac_f32_e32 v6, s20, v13 ; 3E0C1A14 v_mac_f32_e32 v7, s7, v13 ; 3E0E1A07 exp 15, 35, 0, 0, 0, v18, v0, v2, v3 ; F800023F 03020012 exp 15, 36, 0, 0, 0, v6, v7, v0, v0 ; F800024F 00000706 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v4, v5, v0, v1 ; F80008CF 01000504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 20 Code Size: 328 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[4].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: LRP TEMP[0], IN[1].xxxx, TEMP[0], IN[0] 3: MAD TEMP[0], TEMP[0], IN[3], IN[2] 4: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww 5: MOV TEMP[0].w, TEMP[1].xxxx 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %36 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %37 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %38 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %39 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) %40 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %6, <2 x i32> %8) %41 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) %42 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) %43 = bitcast float %41 to i32 %44 = bitcast float %42 to i32 %45 = insertelement <2 x i32> undef, i32 %43, i32 0 %46 = insertelement <2 x i32> %45, i32 %44, i32 1 %47 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %46, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = fsub float 1.000000e+00, %31 %53 = fmul float %48, %31 %54 = fmul float %27, %52 %55 = fadd float %53, %54 %56 = fsub float 1.000000e+00, %31 %57 = fmul float %49, %31 %58 = fmul float %28, %56 %59 = fadd float %57, %58 %60 = fsub float 1.000000e+00, %31 %61 = fmul float %50, %31 %62 = fmul float %29, %60 %63 = fadd float %61, %62 %64 = fsub float 1.000000e+00, %31 %65 = fmul float %51, %31 %66 = fmul float %30, %64 %67 = fadd float %65, %66 %68 = fmul float %55, %37 %69 = fadd float %68, %33 %70 = fmul float %59, %38 %71 = fadd float %70, %34 %72 = fmul float %63, %39 %73 = fadd float %72, %35 %74 = fmul float %67, %40 %75 = fadd float %74, %36 %76 = fmul float %75, %32 %77 = call i32 @llvm.SI.packf16(float %69, float %71) %78 = bitcast i32 %77 to float %79 = call i32 @llvm.SI.packf16(float %73, float %76) %80 = bitcast i32 %79 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %78, float %80, float %78, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 3, 1, [m0] ; C81C0700 v_interp_p2_f32 v7, [v7], v1, 3, 1, [m0] ; C81D0701 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00 v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01 v_interp_p1_f32 v11, v0, 3, 2, [m0] ; C82C0B00 v_interp_p2_f32 v11, [v11], v1, 3, 2, [m0] ; C82D0B01 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01 v_interp_p1_f32 v14, v0, 2, 3, [m0] ; C8380E00 v_interp_p2_f32 v14, [v14], v1, 2, 3, [m0] ; C8390E01 v_interp_p1_f32 v15, v0, 3, 3, [m0] ; C83C0F00 v_interp_p2_f32 v15, [v15], v1, 3, 3, [m0] ; C83D0F01 v_interp_p1_f32 v16, v0, 0, 4, [m0] ; C8401000 v_interp_p2_f32 v16, [v16], v1, 0, 4, [m0] ; C8411001 v_interp_p1_f32 v17, v0, 1, 4, [m0] ; C8441100 v_interp_p2_f32 v17, [v17], v1, 1, 4, [m0] ; C8451101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[8:15], s[0:3] ; F0800F00 00021010 v_mad_f32 v0, -v6, v2, v2 ; D2820000 240A0506 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, v6, v16 ; 3E002106 v_mad_f32 v1, -v6, v3, v3 ; D2820001 240E0706 v_mac_f32_e32 v1, v6, v17 ; 3E022306 v_mad_f32 v2, -v6, v4, v4 ; D2820002 24120906 v_mac_f32_e32 v2, v6, v18 ; 3E042506 v_mad_f32 v3, -v6, v5, v5 ; D2820003 24160B06 v_mac_f32_e32 v3, v6, v19 ; 3E062706 v_mac_f32_e32 v8, v12, v0 ; 3E10010C v_mac_f32_e32 v9, v13, v1 ; 3E12030D v_mac_f32_e32 v10, v14, v2 ; 3E14050E v_mac_f32_e32 v11, v15, v3 ; 3E16070F v_mul_f32_e32 v0, v7, v11 ; 10001707 v_cvt_pkrtz_f16_f32_e32 v1, v8, v9 ; 5E021308 v_cvt_pkrtz_f16_f32_e32 v0, v10, v0 ; 5E00010A exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 20 Code Size: 248 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..3] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[2] 2: DP4 TEMP[1].x, IN[1], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV TEMP[1].xy, IN[0].xyxx 5: MOV OUT[1], CONST[0] 6: MOV OUT[2], CONST[1] 7: MOV OUT[0], TEMP[0] 8: MOV OUT[3], TEMP[1] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = add i32 %5, %8 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 %38 = add i32 %5, %8 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = extractelement <4 x float> %39, i32 2 %43 = extractelement <4 x float> %39, i32 3 %44 = fmul float %40, %22 %45 = fmul float %41, %23 %46 = fadd float %44, %45 %47 = fmul float %42, %24 %48 = fadd float %46, %47 %49 = fmul float %43, %25 %50 = fadd float %48, %49 %51 = fmul float %40, %26 %52 = fmul float %41, %27 %53 = fadd float %51, %52 %54 = fmul float %42, %28 %55 = fadd float %53, %54 %56 = fmul float %43, %29 %57 = fadd float %55, %56 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %34, float %35, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %50, float %57, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107 s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108 s_buffer_load_dword s16, s[0:3], 0x9 ; C2080109 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[4:7], v0, s[12:15], 0 idxen ; E00C2000 80030400 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_buffer_load_dword s5, s[0:3], 0xb ; C202810B s_buffer_load_dword s6, s[0:3], 0xc ; C203010C s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s12, s[0:3], 0xe ; C206010E s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s17, s[0:3], 0x3 ; C2088103 s_buffer_load_dword s18, s[0:3], 0x4 ; C2090104 s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s8 ; 7E000208 v_mov_b32_e32 v8, s13 ; 7E10020D v_mov_b32_e32 v9, s14 ; 7E12020E v_mov_b32_e32 v10, s15 ; 7E14020F v_mov_b32_e32 v11, s17 ; 7E160211 exp 15, 32, 0, 0, 0, v8, v9, v10, v11 ; F800020F 0B0A0908 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v8, s18 ; 7E100212 v_mov_b32_e32 v9, s9 ; 7E120209 v_mov_b32_e32 v10, s10 ; 7E14020A exp 15, 33, 0, 0, 0, v8, v0, v9, v10 ; F800021F 0A090008 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v0, s16, v5 ; 10000A10 exp 15, 34, 0, 0, 0, v2, v3, v0, v0 ; F800022F 00000302 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s7, v5 ; 10040A07 v_mac_f32_e32 v0, s11, v4 ; 3E00080B v_mac_f32_e32 v2, s6, v4 ; 3E040806 v_mac_f32_e32 v0, s4, v6 ; 3E000C04 v_mac_f32_e32 v2, s12, v6 ; 3E040C0C v_mac_f32_e32 v0, s5, v7 ; 3E000E05 v_mac_f32_e32 v2, s0, v7 ; 3E040E00 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 12, 0, 1, 0, v0, v2, v3, v1 ; F80008CF 01030200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 228 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[2].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[1], IN[0] 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %32 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %33 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %36 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %40, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %31 %47 = fadd float %46, %27 %48 = fmul float %43, %32 %49 = fadd float %48, %28 %50 = fmul float %44, %33 %51 = fadd float %50, %29 %52 = fmul float %45, %34 %53 = fadd float %52, %30 %54 = call i32 @llvm.SI.packf16(float %47, float %49) %55 = bitcast i32 %54 to float %56 = call i32 @llvm.SI.packf16(float %51, float %53) %57 = bitcast i32 %56 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %55, float %57, float %55, float %57) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[8:15], s[0:3] ; F0800F00 00020A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v6, v10 ; 3E041506 v_mac_f32_e32 v3, v7, v11 ; 3E061707 v_mac_f32_e32 v4, v8, v12 ; 3E081908 v_mac_f32_e32 v5, v9, v13 ; 3E0A1B09 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 v_cvt_pkrtz_f16_f32_e32 v1, v4, v5 ; 5E020B04 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 148 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG 0: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) ret void } declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } Shader Disassembly: v_mov_b32_e32 v0, 0 ; 7E000280 exp 0, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001800 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 16 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** Looking up breakpad interfaces from steamclient Calling BreakpadMiniDumpSystemInit Steam_SetMinidumpSteamID: Caching Steam ID: 76561197960306978 [API loaded yes] Steam_SetMinidumpSteamID: Setting Steam ID: 76561197960306978 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 12) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 16) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 20) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 24) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 28) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 44) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 60) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 64) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 68) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 72) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 76) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 92) %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %8 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %22 %47 = fmul float %43, %23 %48 = fadd float %46, %47 %49 = fmul float %44, %24 %50 = fadd float %48, %49 %51 = fmul float %45, %25 %52 = fadd float %50, %51 %53 = fmul float %42, %26 %54 = fmul float %43, %27 %55 = fadd float %53, %54 %56 = fmul float %44, %28 %57 = fadd float %55, %56 %58 = fmul float %45, %29 %59 = fadd float %57, %58 %60 = fmul float %42, %30 %61 = fmul float %43, %31 %62 = fadd float %60, %61 %63 = fmul float %44, %32 %64 = fadd float %62, %63 %65 = fmul float %45, %33 %66 = fadd float %64, %65 %67 = fmul float %42, %34 %68 = fmul float %43, %35 %69 = fadd float %67, %68 %70 = fmul float %44, %36 %71 = fadd float %69, %70 %72 = fmul float %45, %37 %73 = fadd float %71, %72 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %18, float %19, float %20, float %21) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %66, float %73, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %52, float %59, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mov_b32_e32 v6, s6 ; 7E0C0206 v_mov_b32_e32 v7, s7 ; 7E0E0207 exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, s8 ; 7E080208 v_mov_b32_e32 v5, s9 ; 7E0A0209 v_mov_b32_e32 v6, s10 ; 7E0C020A v_mov_b32_e32 v7, s11 ; 7E0E020B exp 15, 33, 0, 0, 0, v4, v5, v6, v7 ; F800021F 07060504 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mul_f32_e32 v5, s17, v1 ; 100A0211 v_mul_f32_e32 v6, s21, v1 ; 100C0215 v_mul_f32_e32 v1, s23, v1 ; 10020217 v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mac_f32_e32 v6, s20, v0 ; 3E0C0014 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mac_f32_e32 v4, s14, v2 ; 3E08040E v_mac_f32_e32 v5, s18, v2 ; 3E0A0412 v_mac_f32_e32 v6, s24, v2 ; 3E0C0418 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s25, v3 ; 3E0C0619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 34, 0, 0, 0, v6, v1, v0, v0 ; F800022F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 12, 0, 1, 0, v4, v5, v1, v0 ; F80008CF 00010504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0] DCL CONST[2] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1], IMM[0].xxxx 2: MOV TEMP[2], IMM[0].xxxx 3: MOV TEMP[3].y, IMM[0].xxxx 4: MOV TEMP[3].x, -CONST[0].xxxx 5: BGNLOOP :0 6: FSLT TEMP[4].x, CONST[0].xxxx, TEMP[3].xxxx 7: UIF TEMP[4].xxxx :0 8: BRK 9: ENDIF 10: MOV TEMP[3].y, -CONST[0].yyyy 11: BGNLOOP :0 12: FSLT TEMP[5].x, CONST[0].yyyy, TEMP[3].yyyy 13: UIF TEMP[5].xxxx :0 14: BRK 15: ENDIF 16: MAD TEMP[6].xy, TEMP[3].xyyy, CONST[2].xyyy, TEMP[0].xyyy 17: MOV TEMP[7].xy, TEMP[6].xyyy 18: MOV TEMP[7].w, IMM[0].xxxx 19: TXB TEMP[8], TEMP[7], SAMP[0], 2D 20: ADD TEMP[2], TEMP[2], TEMP[8] 21: ADD TEMP[9].x, TEMP[3].yyyy, IMM[0].yyyy 22: MOV TEMP[3].y, TEMP[9].xxxx 23: ENDLOOP :0 24: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 25: ENDLOOP :0 26: MUL TEMP[1], TEMP[2], CONST[0].wwww 27: MOV TEMP[0].w, IMM[0].yyyy 28: MOV TEMP[0].xyz, IN[1].xyzx 29: MUL TEMP[0], TEMP[1], TEMP[0] 30: MUL TEMP[1], TEMP[0], IN[1].wwww 31: MAD TEMP[1], IN[0], TEMP[1].wwww, TEMP[1] 32: MOV OUT[0], TEMP[1] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) %30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, align 32, !tbaa !0 %32 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !tbaa !0 %34 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %35 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %36 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %37 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %38 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %39 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %40 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %41 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %42 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %43 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %44 = fsub float -0.000000e+00, %25 %45 = fsub float -0.000000e+00, %26 br label %LOOP LOOP: ; preds = %IF43, %main_body %temp9.0 = phi float [ 0.000000e+00, %main_body ], [ %temp9.1, %IF43 ] %temp10.0 = phi float [ 0.000000e+00, %main_body ], [ %temp10.1, %IF43 ] %temp11.0 = phi float [ 0.000000e+00, %main_body ], [ %temp11.1, %IF43 ] %temp12.0 = phi float [ %44, %main_body ], [ %75, %IF43 ] %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %temp8.1, %IF43 ] %46 = fcmp olt float %25, %temp12.0 br i1 %46, label %IF, label %ENDIF IF: ; preds = %LOOP %47 = fmul float %temp8.0, %27 %48 = fmul float %temp9.0, %27 %49 = fmul float %temp10.0, %27 %50 = fmul float %temp11.0, %27 %51 = fmul float %47, %38 %52 = fmul float %48, %39 %53 = fmul float %49, %40 %54 = fmul float %51, %41 %55 = fmul float %52, %41 %56 = fmul float %53, %41 %57 = fmul float %50, %41 %58 = fmul float %34, %57 %59 = fadd float %58, %54 %60 = fmul float %35, %57 %61 = fadd float %60, %55 %62 = fmul float %36, %57 %63 = fadd float %62, %56 %64 = fmul float %37, %57 %65 = fadd float %64, %57 %66 = call i32 @llvm.SI.packf16(float %59, float %61) %67 = bitcast i32 %66 to float %68 = call i32 @llvm.SI.packf16(float %63, float %65) %69 = bitcast i32 %68 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %67, float %69, float %67, float %69) ret void ENDIF: ; preds = %LOOP %70 = fmul float %temp12.0, %28 %71 = fadd float %70, %42 %72 = bitcast float %71 to i32 %73 = insertelement <4 x i32> , i32 %72, i32 1 br label %LOOP41 LOOP41: ; preds = %ENDIF42, %ENDIF %temp9.1 = phi float [ %temp9.0, %ENDIF ], [ %86, %ENDIF42 ] %temp10.1 = phi float [ %temp10.0, %ENDIF ], [ %87, %ENDIF42 ] %temp11.1 = phi float [ %temp11.0, %ENDIF ], [ %88, %ENDIF42 ] %temp13.0 = phi float [ %45, %ENDIF ], [ %89, %ENDIF42 ] %temp8.1 = phi float [ %temp8.0, %ENDIF ], [ %85, %ENDIF42 ] %74 = fcmp olt float %26, %temp13.0 br i1 %74, label %IF43, label %ENDIF42 IF43: ; preds = %LOOP41 %75 = fadd float %temp12.0, 1.000000e+00 br label %LOOP ENDIF42: ; preds = %LOOP41 %76 = fmul float %temp13.0, %29 %77 = fadd float %76, %43 %78 = bitcast float %77 to i32 %79 = insertelement <4 x i32> %73, i32 %78, i32 2 %80 = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> %79, <8 x i32> %31, <4 x i32> %33, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %81 = extractelement <4 x float> %80, i32 0 %82 = extractelement <4 x float> %80, i32 1 %83 = extractelement <4 x float> %80, i32 2 %84 = extractelement <4 x float> %80, i32 3 %85 = fadd float %temp8.1, %81 %86 = fadd float %temp9.1, %82 %87 = fadd float %temp10.1, %83 %88 = fadd float %temp11.1, %84 %89 = fadd float %temp13.0, 1.000000e+00 br label %LOOP41 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[16:19], s[2:3], 0x0 ; C0880300 s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[16:19], 0x0 ; C2009100 s_buffer_load_dword s2, s[16:19], 0x1 ; C2011101 s_buffer_load_dword s0, s[16:19], 0x3 ; C2001103 s_buffer_load_dword s3, s[16:19], 0x8 ; C2019108 s_buffer_load_dword s6, s[16:19], 0x9 ; C2031109 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900 v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901 v_mov_b32_e32 v11, 0x80000000 ; 7E1602FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v1, s1, v11 ; 3A021601 v_xor_b32_e32 v12, s2, v11 ; 3A181602 v_mov_b32_e32 v16, 0 ; 7E200280 s_mov_b64 s[4:5], 0 ; BE840480 v_mov_b32_e32 v17, 0 ; 7E220280 v_mov_b32_e32 v18, 0 ; 7E240280 v_mov_b32_e32 v19, 0 ; 7E260280 v_mov_b32_e32 v13, v19 ; 7E1A0313 v_mov_b32_e32 v11, v18 ; 7E160312 v_mov_b32_e32 v14, v17 ; 7E1C0311 v_mov_b32_e32 v15, v16 ; 7E1E0310 v_cmp_nlt_f32_e32 vcc, s1, v1 ; 7C1C0201 s_and_saveexec_b64 s[20:21], vcc ; BE94246A s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_4 ; BF880000 v_mad_f32 v21, s3, v1, v10 ; D2820015 042A0203 v_mov_b32_e32 v20, 0 ; 7E280280 s_mov_b64 s[22:23], 0 ; BE960480 v_mov_b32_e32 v22, v15 ; 7E2C030F v_mov_b32_e32 v24, v14 ; 7E30030E v_mov_b32_e32 v26, v11 ; 7E34030B v_mov_b32_e32 v23, v12 ; 7E2E030C v_mov_b32_e32 v25, v13 ; 7E32030D v_mov_b32_e32 v19, v25 ; 7E260319 v_mov_b32_e32 v18, v26 ; 7E24031A v_mov_b32_e32 v17, v24 ; 7E220318 v_mov_b32_e32 v16, v22 ; 7E200316 v_cmp_nlt_f32_e32 vcc, s2, v23 ; 7C1C2E02 s_and_saveexec_b64 s[24:25], vcc ; BE98246A s_xor_b64 s[24:25], exec, s[24:25] ; 8998187E v_mad_f32 v22, s6, v23, v0 ; D2820016 04022E06 image_sample_b v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[8:15], s[16:19] ; F0940F00 00821A14 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v25, v26, v19 ; 0632271A v_add_f32_e32 v22, v27, v16 ; 062C211B v_add_f32_e32 v24, v28, v17 ; 0630231C v_add_f32_e32 v26, v29, v18 ; 0634251D v_add_f32_e32 v23, 1.0, v23 ; 062E2EF2 s_or_b64 exec, exec, s[24:25] ; 88FE187E s_or_b64 s[22:23], s[24:25], s[22:23] ; 88961618 s_andn2_b64 exec, exec, s[22:23] ; 8AFE167E s_cbranch_execnz BB0_5 ; BF890000 s_or_b64 exec, exec, s[22:23] ; 88FE167E v_add_f32_e32 v1, 1.0, v1 ; 060202F2 s_or_b64 exec, exec, s[20:21] ; 88FE147E s_or_b64 s[4:5], s[20:21], s[4:5] ; 88840414 s_andn2_b64 exec, exec, s[4:5] ; 8AFE047E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_mul_f32_e32 v0, s0, v13 ; 10001A00 v_mul_f32_e32 v1, s0, v15 ; 10021E00 v_mul_f32_e32 v10, s0, v14 ; 10141C00 v_mul_f32_e32 v11, s0, v11 ; 10161600 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mul_f32_e32 v6, v8, v10 ; 100C1508 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v9, v11 ; 100E1709 v_mac_f32_e32 v0, v7, v2 ; 3E000507 v_mac_f32_e32 v1, v7, v3 ; 3E020707 v_mac_f32_e32 v6, v7, v4 ; 3E0C0907 v_mac_f32_e32 v7, v7, v5 ; 3E0E0B07 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 32 Code Size: 424 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %32, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %34, float %35, float %36, float %37) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s10 ; BEFC030A v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 60 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], CUBE, UINT 0: TEX OUT[0], IN[0], SAMP[0], CUBE 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %12) %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %12) %31 = insertelement <4 x float> undef, float %27, i32 0 %32 = insertelement <4 x float> %31, float %28, i32 1 %33 = insertelement <4 x float> %32, float %29, i32 2 %34 = insertelement <4 x float> %33, float %30, i32 3 %35 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = extractelement <4 x float> %35, i32 2 %39 = extractelement <4 x float> %35, i32 3 %40 = call float @llvm.fabs.f32(float %38) %41 = fdiv float 1.000000e+00, %40 %42 = fmul float %36, %41 %43 = fadd float %42, 1.500000e+00 %44 = fmul float %37, %41 %45 = fadd float %44, 1.500000e+00 %46 = bitcast float %45 to i32 %47 = bitcast float %43 to i32 %48 = bitcast float %39 to i32 %49 = insertelement <4 x i32> undef, i32 %46, i32 0 %50 = insertelement <4 x i32> %49, i32 %47, i32 1 %51 = insertelement <4 x i32> %50, i32 %48, i32 2 %52 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %51, <8 x i32> %24, <4 x i32> %26, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %53 = extractelement <4 x float> %52, i32 0 %54 = extractelement <4 x float> %52, i32 1 %55 = extractelement <4 x float> %52, i32 2 %56 = extractelement <4 x float> %52, i32 3 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %53, float %54, float %55, float %56) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s10 ; BEFC030A s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v0, v0, 3, 0, [m0] ; C8000300 v_interp_p2_f32 v0, [v0], v1, 3, 0, [m0] ; C8010301 v_cubeid_f32 v7, v2, v3, v4 ; D2880007 04120702 v_cubema_f32 v0, v2, v3, v4 ; D28E0000 04120702 v_rcp_f32_e64 v0, |v0| ; D3540100 00000100 v_cubesc_f32 v1, v2, v3, v4 ; D28A0001 04120702 v_cubetc_f32 v2, v2, v3, v4 ; D28C0002 04120702 v_mov_b32_e32 v5, 0x3fc00000 ; 7E0A02FF 3FC00000 v_mad_f32 v6, v0, v2, v5 ; D2820006 04160500 v_mac_f32_e32 v5, v0, v1 ; 3E0A0300 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[5:8], s[8:15], s[0:3] ; F0800F00 00020005 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 136 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL CONST[0..57] DCL TEMP[0..10], LOCAL IMM[0] FLT32 { -128.0000, 1.0000, -2.0000, -64.0000} IMM[1] FLT32 { -0.0159, 0.0159, 2.2000, 0.2125} IMM[2] FLT32 { 0.2125, 0.7154, 0.0721, 0.0000} 0: ADD TEMP[0].xy, IMM[0].xxxx, IN[1].xyyy 1: FSLT TEMP[1].xy, TEMP[0].xyyy, CONST[0].xxxx 2: AND TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy 3: ABS TEMP[2].xy, TEMP[0].xyyy 4: ADD TEMP[0].xy, -TEMP[1].xyyy, TEMP[2].xyyy 5: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].yyyy 6: ADD TEMP[0].xy, TEMP[0].xyyy, IMM[0].wwww 7: FSLT TEMP[2].xy, TEMP[0].xyyy, CONST[0].xxxx 8: AND TEMP[2].xy, TEMP[2].xyyy, IMM[0].yyyy 9: ABS TEMP[3].xy, TEMP[0].xyyy 10: ADD TEMP[0].xy, TEMP[3].xyyy, -TEMP[2].xyyy 11: MAD TEMP[2].xy, TEMP[2].xyyy, IMM[0].zzzz, IMM[0].yyyy 12: MAD TEMP[3].x, TEMP[0].xxxx, IMM[1].xxxx, IMM[0].yyyy 13: MAD TEMP[3].x, TEMP[0].yyyy, IMM[1].xxxx, TEMP[3].xxxx 14: MOV TEMP[3].z, TEMP[3].xxxx 15: MUL TEMP[3].xy, TEMP[0].xyyy, IMM[1].yyyy 16: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 17: RSQ TEMP[4].x, TEMP[4].xxxx 18: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 19: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[3].xyyy 20: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].zzzz 21: MOV TEMP[2].z, TEMP[1].xxxx 22: MAD TEMP[0].xyz, IN[5].xyzz, CONST[13].xxxx, TEMP[2].xyzz 23: DP3 TEMP[2].x, TEMP[0].xyzz, CONST[54].xyzz 24: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[55].xyzz 25: MOV TEMP[2].y, TEMP[1].xxxx 26: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[56].xyzz 27: MOV TEMP[2].z, TEMP[1].xxxx 28: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[2].xyzz 29: RSQ TEMP[1].x, TEMP[0].xxxx 30: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[2].xyzz 31: ADD TEMP[0].xyz, IN[2].xyzz, IN[2].xyzz 32: LG2 TEMP[2].x, TEMP[0].xxxx 33: LG2 TEMP[3].x, TEMP[0].yyyy 34: MOV TEMP[2].y, TEMP[3].xxxx 35: LG2 TEMP[3].x, TEMP[0].zzzz 36: MOV TEMP[2].z, TEMP[3].xxxx 37: MUL TEMP[0].xyz, TEMP[2].xyzz, IMM[1].zzzz 38: EX2 TEMP[2].x, TEMP[0].xxxx 39: EX2 TEMP[3].x, TEMP[0].yyyy 40: MOV TEMP[2].y, TEMP[3].xxxx 41: EX2 TEMP[3].x, TEMP[0].zzzz 42: MOV TEMP[2].z, TEMP[3].xxxx 43: ABS TEMP[3].x, CONST[50].xxxx 44: FSLT TEMP[3].x, -TEMP[3].xxxx, TEMP[3].xxxx 45: AND TEMP[0].x, TEMP[3].xxxx, IMM[0].yyyy 46: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[0].xxxx 47: DP3 TEMP[4].x, TEMP[2].xyzz, IMM[2].xyzz 48: ADD TEMP[5].x, CONST[0].yyyy, -IN[2].wwww 49: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 50: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[0].xxxx 51: MOV TEMP[4].w, TEMP[4].xxxx 52: DP4 TEMP[5].x, IN[3], CONST[48] 53: DP4 TEMP[6].x, IN[3], CONST[49] 54: MOV TEMP[5].y, TEMP[6].xxxx 55: MOV TEMP[0].w, IN[0].wwww 56: MAD TEMP[0].xyz, IN[4].xyzz, CONST[13].xxxx, IN[0].xyzz 57: DP4 TEMP[2].x, TEMP[0], CONST[54] 58: DP4 TEMP[6].x, TEMP[0], CONST[55] 59: MOV TEMP[2].y, TEMP[6].xxxx 60: DP4 TEMP[0].x, TEMP[0], CONST[56] 61: MOV TEMP[2].z, TEMP[0].xxxx 62: ADD TEMP[0].xyz, -TEMP[2].xyzz, CONST[2].xyzz 63: MOV TEMP[2].w, CONST[0].yyyy 64: DP4 TEMP[6].x, TEMP[2], CONST[8] 65: DP4 TEMP[7].x, TEMP[2], CONST[9] 66: MOV TEMP[6].y, TEMP[7].xxxx 67: DP4 TEMP[8].x, TEMP[2], CONST[11] 68: MOV TEMP[6].w, TEMP[8].xxxx 69: DP4 TEMP[9].x, TEMP[2], CONST[10] 70: MOV TEMP[2].xyz, TEMP[2].xyzx 71: MOV TEMP[6].z, TEMP[9].xxxx 72: MOV TEMP[2].w, TEMP[9].xxxx 73: MOV TEMP[5].zw, CONST[0].xxxx 74: MOV TEMP[0].w, CONST[0].xxxx 75: MOV TEMP[1].w, CONST[0].xxxx 76: MOV TEMP[3].w, CONST[0].xxxx 77: MOV TEMP[4].xyz, CONST[0].xxxx 78: MOV TEMP[10], TEMP[6] 79: MAD TEMP[9].x, TEMP[9].xxxx, CONST[0].zzzz, -TEMP[8].xxxx 80: MOV TEMP[6].z, TEMP[9].xxxx 81: MOV TEMP[6].y, -TEMP[7].xxxx 82: MAD TEMP[6].xy, CONST[57].xyyy, TEMP[8].xxxx, TEMP[6].xyyy 83: MOV OUT[2], TEMP[5] 84: MOV OUT[3], TEMP[4] 85: MOV OUT[4], TEMP[0] 86: MOV OUT[5], TEMP[1] 87: MOV OUT[0], TEMP[6] 88: MOV OUT[1], TEMP[10] 89: MOV OUT[6], TEMP[3] 90: MOV OUT[7], TEMP[2] 91: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %12 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 %14 = call float @llvm.SI.load.const(<16 x i8> %13, i32 0) %15 = call float @llvm.SI.load.const(<16 x i8> %13, i32 4) %16 = call float @llvm.SI.load.const(<16 x i8> %13, i32 8) %17 = call float @llvm.SI.load.const(<16 x i8> %13, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %13, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %13, i32 40) %20 = call float @llvm.SI.load.const(<16 x i8> %13, i32 128) %21 = call float @llvm.SI.load.const(<16 x i8> %13, i32 132) %22 = call float @llvm.SI.load.const(<16 x i8> %13, i32 136) %23 = call float @llvm.SI.load.const(<16 x i8> %13, i32 140) %24 = call float @llvm.SI.load.const(<16 x i8> %13, i32 144) %25 = call float @llvm.SI.load.const(<16 x i8> %13, i32 148) %26 = call float @llvm.SI.load.const(<16 x i8> %13, i32 152) %27 = call float @llvm.SI.load.const(<16 x i8> %13, i32 156) %28 = call float @llvm.SI.load.const(<16 x i8> %13, i32 160) %29 = call float @llvm.SI.load.const(<16 x i8> %13, i32 164) %30 = call float @llvm.SI.load.const(<16 x i8> %13, i32 168) %31 = call float @llvm.SI.load.const(<16 x i8> %13, i32 172) %32 = call float @llvm.SI.load.const(<16 x i8> %13, i32 176) %33 = call float @llvm.SI.load.const(<16 x i8> %13, i32 180) %34 = call float @llvm.SI.load.const(<16 x i8> %13, i32 184) %35 = call float @llvm.SI.load.const(<16 x i8> %13, i32 188) %36 = call float @llvm.SI.load.const(<16 x i8> %13, i32 208) %37 = call float @llvm.SI.load.const(<16 x i8> %13, i32 768) %38 = call float @llvm.SI.load.const(<16 x i8> %13, i32 772) %39 = call float @llvm.SI.load.const(<16 x i8> %13, i32 776) %40 = call float @llvm.SI.load.const(<16 x i8> %13, i32 780) %41 = call float @llvm.SI.load.const(<16 x i8> %13, i32 784) %42 = call float @llvm.SI.load.const(<16 x i8> %13, i32 788) %43 = call float @llvm.SI.load.const(<16 x i8> %13, i32 792) %44 = call float @llvm.SI.load.const(<16 x i8> %13, i32 796) %45 = call float @llvm.SI.load.const(<16 x i8> %13, i32 800) %46 = call float @llvm.SI.load.const(<16 x i8> %13, i32 864) %47 = call float @llvm.SI.load.const(<16 x i8> %13, i32 868) %48 = call float @llvm.SI.load.const(<16 x i8> %13, i32 872) %49 = call float @llvm.SI.load.const(<16 x i8> %13, i32 876) %50 = call float @llvm.SI.load.const(<16 x i8> %13, i32 880) %51 = call float @llvm.SI.load.const(<16 x i8> %13, i32 884) %52 = call float @llvm.SI.load.const(<16 x i8> %13, i32 888) %53 = call float @llvm.SI.load.const(<16 x i8> %13, i32 892) %54 = call float @llvm.SI.load.const(<16 x i8> %13, i32 896) %55 = call float @llvm.SI.load.const(<16 x i8> %13, i32 900) %56 = call float @llvm.SI.load.const(<16 x i8> %13, i32 904) %57 = call float @llvm.SI.load.const(<16 x i8> %13, i32 908) %58 = call float @llvm.SI.load.const(<16 x i8> %13, i32 912) %59 = call float @llvm.SI.load.const(<16 x i8> %13, i32 916) %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = add i32 %5, %8 %63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %62) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = extractelement <4 x float> %63, i32 2 %67 = extractelement <4 x float> %63, i32 3 %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 %70 = add i32 %5, %8 %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %70) %72 = extractelement <4 x float> %71, i32 0 %73 = extractelement <4 x float> %71, i32 1 %74 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %75 = load <16 x i8>, <16 x i8> addrspace(2)* %74, align 16, !tbaa !0 %76 = add i32 %5, %8 %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %76) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = extractelement <4 x float> %77, i32 3 %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = add i32 %5, %8 %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = extractelement <4 x float> %85, i32 3 %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = add i32 %5, %8 %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = extractelement <4 x float> %93, i32 2 %97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = add i32 %5, %8 %100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %99) %101 = extractelement <4 x float> %100, i32 0 %102 = extractelement <4 x float> %100, i32 1 %103 = extractelement <4 x float> %100, i32 2 %104 = fadd float %72, -1.280000e+02 %105 = fadd float %73, -1.280000e+02 %106 = fcmp olt float %104, %14 %107 = fcmp olt float %105, %14 %108 = select i1 %106, float 1.000000e+00, float 0.000000e+00 %109 = call float @llvm.fabs.f32(float %104) %110 = call float @llvm.fabs.f32(float %105) %111 = fsub float %109, %108 %112 = select i1 %107, float -1.000000e+00, float -0.000000e+00 %113 = fadd float %112, %110 %114 = fmul float %108, -2.000000e+00 %115 = fadd float %114, 1.000000e+00 %116 = fadd float %111, -6.400000e+01 %117 = fadd float %113, -6.400000e+01 %118 = fcmp olt float %116, %14 %119 = fcmp olt float %117, %14 %120 = select i1 %118, float 1.000000e+00, float 0.000000e+00 %121 = select i1 %119, float 1.000000e+00, float 0.000000e+00 %122 = call float @llvm.fabs.f32(float %116) %123 = call float @llvm.fabs.f32(float %117) %124 = fsub float %122, %120 %125 = fsub float %123, %121 %126 = fmul float %120, -2.000000e+00 %127 = fadd float %126, 1.000000e+00 %128 = fmul float %121, -2.000000e+00 %129 = fadd float %128, 1.000000e+00 %130 = fmul float %124, 0xBF90410420000000 %131 = fadd float %130, 1.000000e+00 %132 = fmul float %125, 0xBF90410420000000 %133 = fadd float %132, %131 %134 = fmul float %124, 0x3F90410420000000 %135 = fmul float %125, 0x3F90410420000000 %136 = fmul float %134, %134 %137 = fmul float %135, %135 %138 = fadd float %137, %136 %139 = fmul float %133, %133 %140 = fadd float %138, %139 %141 = call float @llvm.AMDGPU.rsq.clamped.f32(float %140) %142 = fmul float %134, %141 %143 = fmul float %135, %141 %144 = fmul float %133, %141 %145 = fmul float %127, %142 %146 = fmul float %129, %143 %147 = fmul float %115, %144 %148 = fmul float %101, %36 %149 = fadd float %148, %145 %150 = fmul float %102, %36 %151 = fadd float %150, %146 %152 = fmul float %103, %36 %153 = fadd float %152, %147 %154 = fmul float %149, %46 %155 = fmul float %151, %47 %156 = fadd float %155, %154 %157 = fmul float %153, %48 %158 = fadd float %156, %157 %159 = fmul float %149, %50 %160 = fmul float %151, %51 %161 = fadd float %160, %159 %162 = fmul float %153, %52 %163 = fadd float %161, %162 %164 = fmul float %149, %54 %165 = fmul float %151, %55 %166 = fadd float %165, %164 %167 = fmul float %153, %56 %168 = fadd float %166, %167 %169 = fmul float %158, %158 %170 = fmul float %163, %163 %171 = fadd float %170, %169 %172 = fmul float %168, %168 %173 = fadd float %171, %172 %174 = call float @llvm.AMDGPU.rsq.clamped.f32(float %173) %175 = fmul float %174, %158 %176 = fmul float %174, %163 %177 = fmul float %174, %168 %178 = fadd float %78, %78 %179 = fadd float %79, %79 %180 = fadd float %80, %80 %181 = call float @llvm.log2.f32(float %178) %182 = call float @llvm.log2.f32(float %179) %183 = call float @llvm.log2.f32(float %180) %184 = fmul float %181, 0x40019999A0000000 %185 = fmul float %182, 0x40019999A0000000 %186 = fmul float %183, 0x40019999A0000000 %187 = call float @llvm.AMDIL.exp.(float %184) %188 = call float @llvm.AMDIL.exp.(float %185) %189 = call float @llvm.AMDIL.exp.(float %186) %190 = call float @llvm.fabs.f32(float %45) %191 = fsub float -0.000000e+00, %190 %192 = fcmp ogt float %190, %191 %193 = select i1 %192, float 1.000000e+00, float 0.000000e+00 %194 = fmul float %187, %193 %195 = fmul float %188, %193 %196 = fmul float %189, %193 %197 = fmul float %187, 0x3FCB333340000000 %198 = fmul float %188, 0x3FE6E48E80000000 %199 = fadd float %198, %197 %200 = fmul float %189, 0x3FB2752540000000 %201 = fadd float %199, %200 %202 = fsub float %15, %81 %203 = fmul float %201, %202 %204 = fmul float %203, %193 %205 = fmul float %86, %37 %206 = fmul float %87, %38 %207 = fadd float %205, %206 %208 = fmul float %88, %39 %209 = fadd float %207, %208 %210 = fmul float %89, %40 %211 = fadd float %209, %210 %212 = fmul float %86, %41 %213 = fmul float %87, %42 %214 = fadd float %212, %213 %215 = fmul float %88, %43 %216 = fadd float %214, %215 %217 = fmul float %89, %44 %218 = fadd float %216, %217 %219 = fmul float %94, %36 %220 = fadd float %219, %64 %221 = fmul float %95, %36 %222 = fadd float %221, %65 %223 = fmul float %96, %36 %224 = fadd float %223, %66 %225 = fmul float %220, %46 %226 = fmul float %222, %47 %227 = fadd float %225, %226 %228 = fmul float %224, %48 %229 = fadd float %227, %228 %230 = fmul float %67, %49 %231 = fadd float %229, %230 %232 = fmul float %220, %50 %233 = fmul float %222, %51 %234 = fadd float %232, %233 %235 = fmul float %224, %52 %236 = fadd float %234, %235 %237 = fmul float %67, %53 %238 = fadd float %236, %237 %239 = fmul float %220, %54 %240 = fmul float %222, %55 %241 = fadd float %239, %240 %242 = fmul float %224, %56 %243 = fadd float %241, %242 %244 = fmul float %67, %57 %245 = fadd float %243, %244 %246 = fsub float %17, %231 %247 = fsub float %18, %238 %248 = fsub float %19, %245 %249 = fmul float %231, %20 %250 = fmul float %238, %21 %251 = fadd float %249, %250 %252 = fmul float %245, %22 %253 = fadd float %251, %252 %254 = fmul float %15, %23 %255 = fadd float %253, %254 %256 = fmul float %231, %24 %257 = fmul float %238, %25 %258 = fadd float %256, %257 %259 = fmul float %245, %26 %260 = fadd float %258, %259 %261 = fmul float %15, %27 %262 = fadd float %260, %261 %263 = fmul float %231, %32 %264 = fmul float %238, %33 %265 = fadd float %263, %264 %266 = fmul float %245, %34 %267 = fadd float %265, %266 %268 = fmul float %15, %35 %269 = fadd float %267, %268 %270 = fmul float %231, %28 %271 = fmul float %238, %29 %272 = fadd float %270, %271 %273 = fmul float %245, %30 %274 = fadd float %272, %273 %275 = fmul float %15, %31 %276 = fadd float %274, %275 %277 = fmul float %276, %16 %278 = fsub float %277, %269 %279 = fmul float %58, %269 %280 = fadd float %279, %255 %281 = fmul float %59, %269 %282 = fsub float %281, %262 %283 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 16 %284 = load <16 x i8>, <16 x i8> addrspace(2)* %283, align 16, !tbaa !0 %285 = call float @llvm.SI.load.const(<16 x i8> %284, i32 0) %286 = fmul float %285, %255 %287 = call float @llvm.SI.load.const(<16 x i8> %284, i32 4) %288 = fmul float %287, %262 %289 = fadd float %286, %288 %290 = call float @llvm.SI.load.const(<16 x i8> %284, i32 8) %291 = fmul float %290, %276 %292 = fadd float %289, %291 %293 = call float @llvm.SI.load.const(<16 x i8> %284, i32 12) %294 = fmul float %293, %269 %295 = fadd float %292, %294 %296 = call float @llvm.SI.load.const(<16 x i8> %284, i32 16) %297 = fmul float %296, %255 %298 = call float @llvm.SI.load.const(<16 x i8> %284, i32 20) %299 = fmul float %298, %262 %300 = fadd float %297, %299 %301 = call float @llvm.SI.load.const(<16 x i8> %284, i32 24) %302 = fmul float %301, %276 %303 = fadd float %300, %302 %304 = call float @llvm.SI.load.const(<16 x i8> %284, i32 28) %305 = fmul float %304, %269 %306 = fadd float %303, %305 %307 = call float @llvm.SI.load.const(<16 x i8> %284, i32 32) %308 = fmul float %307, %255 %309 = call float @llvm.SI.load.const(<16 x i8> %284, i32 36) %310 = fmul float %309, %262 %311 = fadd float %308, %310 %312 = call float @llvm.SI.load.const(<16 x i8> %284, i32 40) %313 = fmul float %312, %276 %314 = fadd float %311, %313 %315 = call float @llvm.SI.load.const(<16 x i8> %284, i32 44) %316 = fmul float %315, %269 %317 = fadd float %314, %316 %318 = call float @llvm.SI.load.const(<16 x i8> %284, i32 48) %319 = fmul float %318, %255 %320 = call float @llvm.SI.load.const(<16 x i8> %284, i32 52) %321 = fmul float %320, %262 %322 = fadd float %319, %321 %323 = call float @llvm.SI.load.const(<16 x i8> %284, i32 56) %324 = fmul float %323, %276 %325 = fadd float %322, %324 %326 = call float @llvm.SI.load.const(<16 x i8> %284, i32 60) %327 = fmul float %326, %269 %328 = fadd float %325, %327 %329 = call float @llvm.SI.load.const(<16 x i8> %284, i32 64) %330 = fmul float %329, %255 %331 = call float @llvm.SI.load.const(<16 x i8> %284, i32 68) %332 = fmul float %331, %262 %333 = fadd float %330, %332 %334 = call float @llvm.SI.load.const(<16 x i8> %284, i32 72) %335 = fmul float %334, %276 %336 = fadd float %333, %335 %337 = call float @llvm.SI.load.const(<16 x i8> %284, i32 76) %338 = fmul float %337, %269 %339 = fadd float %336, %338 %340 = call float @llvm.SI.load.const(<16 x i8> %284, i32 80) %341 = fmul float %340, %255 %342 = call float @llvm.SI.load.const(<16 x i8> %284, i32 84) %343 = fmul float %342, %262 %344 = fadd float %341, %343 %345 = call float @llvm.SI.load.const(<16 x i8> %284, i32 88) %346 = fmul float %345, %276 %347 = fadd float %344, %346 %348 = call float @llvm.SI.load.const(<16 x i8> %284, i32 92) %349 = fmul float %348, %269 %350 = fadd float %347, %349 %351 = call float @llvm.SI.load.const(<16 x i8> %284, i32 96) %352 = fmul float %351, %255 %353 = call float @llvm.SI.load.const(<16 x i8> %284, i32 100) %354 = fmul float %353, %262 %355 = fadd float %352, %354 %356 = call float @llvm.SI.load.const(<16 x i8> %284, i32 104) %357 = fmul float %356, %276 %358 = fadd float %355, %357 %359 = call float @llvm.SI.load.const(<16 x i8> %284, i32 108) %360 = fmul float %359, %269 %361 = fadd float %358, %360 %362 = call float @llvm.SI.load.const(<16 x i8> %284, i32 112) %363 = fmul float %362, %255 %364 = call float @llvm.SI.load.const(<16 x i8> %284, i32 116) %365 = fmul float %364, %262 %366 = fadd float %363, %365 %367 = call float @llvm.SI.load.const(<16 x i8> %284, i32 120) %368 = fmul float %367, %276 %369 = fadd float %366, %368 %370 = call float @llvm.SI.load.const(<16 x i8> %284, i32 124) %371 = fmul float %370, %269 %372 = fadd float %369, %371 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %211, float %218, float %14, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %14, float %14, float %14, float %204) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %246, float %247, float %248, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %175, float %176, float %177, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %194, float %195, float %196, float %14) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %231, float %238, float %245, float %276) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %280, float %282, float %278, float %269) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %295, float %306, float %317, float %328) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %339, float %350, float %361, float %372) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x80000000 ; 7E0402FF 80000000 v_mov_b32_e32 v3, 0xc2800000 ; 7E0602FF C2800000 v_mov_b32_e32 v4, 0x3c820821 ; 7E0802FF 3C820821 v_mov_b32_e32 v5, 0xbc820821 ; 7E0A02FF BC820821 v_mov_b32_e32 v6, 0x400ccccd ; 7E0C02FF 400CCCCD v_add_i32_e32 v0, vcc, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[24:27], s[8:9], 0x10 ; C08C0910 s_load_dwordx4 s[8:11], s[8:9], 0x14 ; C0840914 s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[60:63], s[2:3], 0x40 ; C09E0340 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v0, s[4:7], 0 idxen ; E00C2000 80010700 buffer_load_format_xyzw v[11:14], v0, s[12:15], 0 idxen ; E00C2000 80030B00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[13:16], v0, s[16:19], 0 idxen ; E00C2000 80040D00 buffer_load_format_xyzw v[17:20], v0, s[20:23], 0 idxen ; E00C2000 80051100 buffer_load_format_xyzw v[21:24], v0, s[24:27], 0 idxen ; E00C2000 80061500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[24:27], v0, s[8:11], 0 idxen ; E00C2000 80021800 s_buffer_load_dword s0, s[28:31], 0x0 ; C2001D00 s_buffer_load_dword s43, s[28:31], 0x1 ; C2159D01 s_buffer_load_dword s2, s[28:31], 0x2 ; C2011D02 s_buffer_load_dword s44, s[28:31], 0x8 ; C2161D08 s_buffer_load_dword s45, s[28:31], 0x9 ; C2169D09 s_buffer_load_dword s47, s[28:31], 0x2e ; C2179D2E s_buffer_load_dword s1, s[28:31], 0x2f ; C2009D2F s_buffer_load_dword s38, s[28:31], 0x34 ; C2131D34 s_buffer_load_dword s58, s[28:31], 0xc0 ; C21D1DC0 s_buffer_load_dword s59, s[28:31], 0xc1 ; C21D9DC1 s_buffer_load_dword s64, s[28:31], 0xc7 ; C2201DC7 s_buffer_load_dword s6, s[28:31], 0xc8 ; C2031DC8 s_buffer_load_dword s9, s[28:31], 0xd8 ; C2049DD8 s_buffer_load_dword s7, s[28:31], 0xd9 ; C2039DD9 s_buffer_load_dword s3, s[28:31], 0xda ; C2019DDA s_buffer_load_dword s65, s[28:31], 0xdb ; C2209DDB s_buffer_load_dword s11, s[28:31], 0xdc ; C2059DDC s_buffer_load_dword s8, s[28:31], 0xdd ; C2041DDD s_buffer_load_dword s4, s[28:31], 0xde ; C2021DDE s_buffer_load_dword s66, s[28:31], 0xdf ; C2211DDF s_buffer_load_dword s12, s[28:31], 0xe0 ; C2061DE0 s_buffer_load_dword s10, s[28:31], 0xe1 ; C2051DE1 s_buffer_load_dword s5, s[28:31], 0xe2 ; C2029DE2 s_buffer_load_dword s67, s[28:31], 0xe3 ; C2219DE3 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_gt_f32_e64 s[14:15], |s6|, -|s6| ; D008030E 40000C06 s_buffer_load_dword s6, s[28:31], 0xe4 ; C2031DE4 s_buffer_load_dword s68, s[28:31], 0xc2 ; C2221DC2 s_buffer_load_dword s69, s[28:31], 0xc3 ; C2229DC3 s_buffer_load_dword s70, s[28:31], 0xc4 ; C2231DC4 v_cndmask_b32_e64 v0, 0, 1.0, s[14:15] ; D2000000 0039E480 s_buffer_load_dword s71, s[28:31], 0xc5 ; C2239DC5 s_buffer_load_dword s72, s[28:31], 0xc6 ; C2241DC6 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v27, s1 ; 7E360201 s_buffer_load_dword s57, s[28:31], 0x24 ; C21C9D24 v_mov_b32_e32 v28, s0 ; 7E380200 s_buffer_load_dword s73, s[28:31], 0x25 ; C2249D25 s_buffer_load_dword s50, s[28:31], 0x26 ; C2191D26 s_buffer_load_dword s1, s[28:31], 0x27 ; C2009D27 s_buffer_load_dword s51, s[28:31], 0x28 ; C2199D28 s_buffer_load_dword s48, s[28:31], 0xa ; C2181D0A s_buffer_load_dword s74, s[28:31], 0x20 ; C2251D20 s_buffer_load_dword s75, s[28:31], 0x21 ; C2259D21 s_buffer_load_dword s52, s[28:31], 0x22 ; C21A1D22 s_buffer_load_dword s13, s[28:31], 0x23 ; C2069D23 s_buffer_load_dword s55, s[28:31], 0x29 ; C21B9D29 s_buffer_load_dword s53, s[28:31], 0x2a ; C21A9D2A s_buffer_load_dword s14, s[28:31], 0x2b ; C2071D2B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v29, s1 ; 7E3A0201 s_buffer_load_dword s76, s[28:31], 0x2c ; C2261D2C s_buffer_load_dword s77, s[28:31], 0x2d ; C2269D2D s_buffer_load_dword s21, s[28:31], 0xe5 ; C20A9DE5 s_buffer_load_dword s26, s[60:63], 0x0 ; C20D3D00 s_buffer_load_dword s56, s[60:63], 0x1 ; C21C3D01 s_buffer_load_dword s22, s[60:63], 0x2 ; C20B3D02 v_mov_b32_e32 v30, s13 ; 7E3C020D s_buffer_load_dword s18, s[60:63], 0x3 ; C2093D03 s_buffer_load_dword s28, s[60:63], 0x4 ; C20E3D04 s_buffer_load_dword s54, s[60:63], 0x5 ; C21B3D05 v_mov_b32_e32 v31, s14 ; 7E3E020E s_buffer_load_dword s23, s[60:63], 0x6 ; C20BBD06 s_buffer_load_dword s13, s[60:63], 0x7 ; C206BD07 s_buffer_load_dword s31, s[60:63], 0x8 ; C20FBD08 s_buffer_load_dword s49, s[60:63], 0x9 ; C218BD09 s_buffer_load_dword s24, s[60:63], 0xa ; C20C3D0A s_buffer_load_dword s14, s[60:63], 0xb ; C2073D0B s_buffer_load_dword s33, s[60:63], 0xc ; C210BD0C s_buffer_load_dword s46, s[60:63], 0xd ; C2173D0D s_buffer_load_dword s25, s[60:63], 0xe ; C20CBD0E s_buffer_load_dword s16, s[60:63], 0xf ; C2083D0F s_buffer_load_dword s34, s[60:63], 0x10 ; C2113D10 s_buffer_load_dword s42, s[60:63], 0x11 ; C2153D11 s_buffer_load_dword s27, s[60:63], 0x12 ; C20DBD12 s_buffer_load_dword s15, s[60:63], 0x13 ; C207BD13 s_buffer_load_dword s35, s[60:63], 0x14 ; C211BD14 s_buffer_load_dword s41, s[60:63], 0x15 ; C214BD15 s_buffer_load_dword s29, s[60:63], 0x16 ; C20EBD16 s_buffer_load_dword s17, s[60:63], 0x17 ; C208BD17 s_buffer_load_dword s36, s[60:63], 0x18 ; C2123D18 s_buffer_load_dword s40, s[60:63], 0x19 ; C2143D19 s_buffer_load_dword s30, s[60:63], 0x1a ; C20F3D1A s_buffer_load_dword s19, s[60:63], 0x1b ; C209BD1B s_buffer_load_dword s37, s[60:63], 0x1c ; C212BD1C s_buffer_load_dword s39, s[60:63], 0x1d ; C213BD1D s_buffer_load_dword s32, s[60:63], 0x1e ; C2103D1E s_buffer_load_dword s20, s[60:63], 0x1f ; C20A3D1F v_add_f32_e32 v12, v1, v12 ; 06181901 v_cmp_gt_f32_e32 vcc, s0, v12 ; 7C081800 v_cndmask_b32_e64 v2, v2, -1.0, vcc ; D2000002 01A9E702 v_add_f32_e32 v1, v1, v11 ; 06021701 v_cmp_gt_f32_e32 vcc, s0, v1 ; 7C080200 v_cndmask_b32_e64 v11, 0, 1.0, vcc ; D200000B 01A9E480 v_add_f32_e64 v2, v2, |v12| ; D2060202 00021902 v_sub_f32_e64 v1, |v1|, v11 ; D2080101 00021701 v_add_f32_e32 v1, v3, v1 ; 06020303 v_add_f32_e32 v2, v3, v2 ; 06040503 v_cmp_gt_f32_e32 vcc, s0, v1 ; 7C080200 v_cmp_gt_f32_e64 s[0:1], s0, v2 ; D0080000 00020400 v_mul_f32_e32 v3, s59, v18 ; 1006243B v_mul_f32_e32 v12, s71, v18 ; 10182447 v_mac_f32_e32 v3, s58, v17 ; 3E06223A v_mac_f32_e32 v12, s70, v17 ; 3E182246 v_mac_f32_e32 v3, s68, v19 ; 3E062644 v_mac_f32_e32 v12, s72, v19 ; 3E182648 v_mac_f32_e32 v3, s69, v20 ; 3E062845 v_mac_f32_e32 v12, s64, v20 ; 3E182840 v_mad_f32 v7, s38, v21, v7 ; D2820007 041E2A26 v_mad_f32 v8, s38, v22, v8 ; D2820008 04222C26 v_mad_f32 v9, s38, v23, v9 ; D2820009 04262E26 v_mul_f32_e32 v17, s7, v8 ; 10221007 v_mac_f32_e32 v17, s9, v7 ; 3E220E09 v_mac_f32_e32 v17, s3, v9 ; 3E221203 v_mac_f32_e32 v17, s65, v10 ; 3E221441 v_mul_f32_e32 v18, s8, v8 ; 10241008 v_mac_f32_e32 v18, s11, v7 ; 3E240E0B v_mac_f32_e32 v18, s4, v9 ; 3E241204 v_mac_f32_e32 v18, s66, v10 ; 3E241442 v_mul_f32_e32 v8, s10, v8 ; 1010100A v_mac_f32_e32 v8, s12, v7 ; 3E100E0C v_mac_f32_e32 v8, s5, v9 ; 3E101205 v_mac_f32_e32 v8, s67, v10 ; 3E101443 v_mul_f32_e32 v7, s75, v18 ; 100E244B v_mac_f32_e32 v7, s74, v17 ; 3E0E224A v_mul_f32_e32 v9, s73, v18 ; 10122449 v_mac_f32_e32 v9, s57, v17 ; 3E122239 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s77, v18 ; 1014244D v_mac_f32_e32 v10, s76, v17 ; 3E14224C v_mul_f32_e32 v19, s55, v18 ; 10262437 v_mac_f32_e32 v19, s51, v17 ; 3E262233 v_mac_f32_e32 v7, s52, v8 ; 3E0E1034 v_mac_f32_e32 v9, s50, v8 ; 3E121032 v_mac_f32_e32 v10, s47, v8 ; 3E14102F v_mac_f32_e32 v19, s53, v8 ; 3E261035 v_mac_f32_e32 v7, s43, v30 ; 3E0E3C2B v_mac_f32_e32 v9, s43, v29 ; 3E123A2B v_mac_f32_e32 v10, s43, v27 ; 3E14362B v_mac_f32_e32 v19, s43, v31 ; 3E263E2B v_sub_f32_e32 v16, s43, v16 ; 0820202B v_sub_f32_e32 v20, s44, v17 ; 0828222C v_sub_f32_e32 v21, s45, v18 ; 082A242D v_sub_f32_e32 v22, s48, v8 ; 082C1030 v_mul_f32_e32 v23, s56, v9 ; 102E1238 v_mul_f32_e32 v27, s54, v9 ; 10361236 v_cndmask_b32_e64 v29, 0, 1.0, vcc ; D200001D 01A9E480 v_cndmask_b32_e64 v30, 0, 1.0, s[0:1] ; D200001E 0001E480 v_sub_f32_e64 v1, |v1|, v29 ; D2080101 00023B01 v_sub_f32_e64 v2, |v2|, v30 ; D2080102 00023D02 v_mad_f32 v31, v1, v5, 1.0 ; D282001F 03CA0B01 v_mac_f32_e32 v31, v5, v2 ; 3E3E0505 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v4, v1, v1 ; 10080301 v_mac_f32_e32 v4, v2, v2 ; 3E080502 v_mac_f32_e32 v4, v31, v31 ; 3E083F1F v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_mul_f32_e32 v5, s49, v9 ; 100A1231 v_mul_f32_e32 v32, s46, v9 ; 1040122E v_mul_f32_e32 v33, s42, v9 ; 1042122A v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v4, v4, v31 ; 10083F04 v_mul_f32_e32 v29, -2.0, v29 ; 103A3AF5 v_mac_f32_e32 v1, v1, v29 ; 3E023B01 v_mul_f32_e32 v29, -2.0, v30 ; 103A3CF5 v_mac_f32_e32 v2, v2, v29 ; 3E043B02 v_mul_f32_e32 v11, -2.0, v11 ; 101616F5 v_mac_f32_e32 v4, v4, v11 ; 3E081704 v_mac_f32_e32 v1, s38, v24 ; 3E023026 v_mac_f32_e32 v2, s38, v25 ; 3E043226 v_mac_f32_e32 v4, s38, v26 ; 3E083426 v_mul_f32_e32 v11, s41, v9 ; 10161229 v_mul_f32_e32 v24, s40, v9 ; 10301228 v_add_f32_e32 v13, v13, v13 ; 061A1B0D v_add_f32_e32 v14, v14, v14 ; 061C1D0E v_add_f32_e32 v15, v15, v15 ; 061E1F0F v_log_f32_e32 v13, v13 ; 7E1A4F0D v_log_f32_e32 v14, v14 ; 7E1C4F0E v_log_f32_e32 v15, v15 ; 7E1E4F0F exp 15, 32, 0, 0, 0, v3, v12, v28, v28 ; F800020F 1C1C0C03 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v6, v13 ; 10061B06 v_mul_f32_e32 v12, v6, v14 ; 10181D06 v_mul_f32_e32 v6, v6, v15 ; 100C1F06 v_exp_f32_e32 v3, v3 ; 7E064B03 v_exp_f32_e32 v12, v12 ; 7E184B0C v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mul_f32_e32 v13, 0x3e59999a, v3 ; 101A06FF 3E59999A v_madmk_f32_e32 v13, v12, v13, 0x3f372474 ; 401A1B0C 3F372474 v_madmk_f32_e32 v13, v6, v13, 0x3d93a92a ; 401A1B06 3D93A92A v_mul_f32_e32 v13, v16, v13 ; 101A1B10 v_mul_f32_e32 v13, v0, v13 ; 101A1B00 exp 15, 33, 0, 0, 0, v28, v28, v28, v13 ; F800021F 0D1C1C1C exp 15, 34, 0, 0, 0, v20, v21, v22, v28 ; F800022F 1C161514 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v13, s9, v1 ; 101A0209 v_mul_f32_e32 v14, s11, v1 ; 101C020B v_mul_f32_e32 v1, s12, v1 ; 1002020C v_mac_f32_e32 v13, s7, v2 ; 3E1A0407 v_mac_f32_e32 v14, s8, v2 ; 3E1C0408 v_mac_f32_e32 v1, s10, v2 ; 3E02040A v_mul_f32_e32 v2, s39, v9 ; 10041227 v_mac_f32_e32 v23, s26, v7 ; 3E2E0E1A v_mac_f32_e32 v27, s28, v7 ; 3E360E1C v_mac_f32_e32 v5, s31, v7 ; 3E0A0E1F v_mac_f32_e32 v32, s33, v7 ; 3E400E21 v_mac_f32_e32 v33, s34, v7 ; 3E420E22 v_mac_f32_e32 v11, s35, v7 ; 3E160E23 v_mac_f32_e32 v24, s36, v7 ; 3E300E24 v_mac_f32_e32 v2, s37, v7 ; 3E040E25 v_mad_f32 v7, s6, v10, v7 ; D2820007 041E1406 v_mad_f32 v9, s21, v10, -v9 ; D2820009 84261415 v_mac_f32_e32 v23, s22, v19 ; 3E2E2616 v_mac_f32_e32 v27, s23, v19 ; 3E362617 v_mac_f32_e32 v5, s24, v19 ; 3E0A2618 v_mac_f32_e32 v32, s25, v19 ; 3E402619 v_mac_f32_e32 v33, s27, v19 ; 3E42261B v_mac_f32_e32 v11, s29, v19 ; 3E16261D v_mac_f32_e32 v24, s30, v19 ; 3E30261E v_mac_f32_e32 v2, s32, v19 ; 3E042620 v_mac_f32_e32 v23, s18, v10 ; 3E2E1412 v_mac_f32_e32 v13, s3, v4 ; 3E1A0803 v_mac_f32_e32 v14, s4, v4 ; 3E1C0804 v_mac_f32_e32 v1, s5, v4 ; 3E020805 v_mul_f32_e32 v4, v13, v13 ; 10081B0D v_mac_f32_e32 v4, v14, v14 ; 3E081D0E v_mac_f32_e32 v4, v1, v1 ; 3E080301 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_mac_f32_e32 v27, s13, v10 ; 3E36140D v_mac_f32_e32 v5, s14, v10 ; 3E0A140E v_mac_f32_e32 v32, s16, v10 ; 3E401410 v_mul_f32_e32 v13, v13, v4 ; 101A090D v_mul_f32_e32 v14, v14, v4 ; 101C090E v_mul_f32_e32 v1, v1, v4 ; 10020901 exp 15, 35, 0, 0, 0, v13, v14, v1, v28 ; F800023F 1C010E0D v_mac_f32_e32 v33, s15, v10 ; 3E42140F v_mac_f32_e32 v11, s17, v10 ; 3E161411 v_mac_f32_e32 v24, s19, v10 ; 3E301413 v_mac_f32_e32 v2, s20, v10 ; 3E041414 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, v0, v3 ; 10020700 v_mul_f32_e32 v3, v0, v12 ; 10061900 v_mad_f32 v4, v19, s2, -v10 ; D2820004 84280513 v_mul_f32_e32 v0, v0, v6 ; 10000D00 exp 15, 36, 0, 0, 0, v1, v3, v0, v28 ; F800024F 1C000301 exp 15, 37, 0, 0, 0, v17, v18, v8, v19 ; F800025F 13081211 exp 15, 12, 0, 0, 0, v7, v9, v4, v10 ; F80000CF 0A040907 exp 15, 13, 0, 0, 0, v23, v27, v5, v32 ; F80000DF 20051B17 exp 15, 14, 0, 1, 0, v33, v11, v24, v2 ; F80008EF 02180B21 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 36 Code Size: 1272 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], SHADOW2D, FLOAT DCL CONST[0..90] DCL TEMP[0..13], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, -0.5000} IMM[1] FLT32 { -0.0000, -1.0000, -2.0000, 0.0625} IMM[2] FLT32 { 0.0005, 0.0000, -0.0005, 0.1250} IMM[3] FLT32 { 0.2500, 0.0000, -1.0000, -2.0000} IMM[4] FLT32 { 0.2125, 0.7154, 0.0721, 0.5000} IMM[5] FLT32 { 0.2990, 0.5870, 0.1140, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[0].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[2], 2D 4: MOV TEMP[1], TEMP[1] 5: UIF CONST[90].xxxx :0 6: MAD TEMP[2], IN[5].xyzx, IMM[0].xxxy, IMM[0].yyyx 7: DP4 TEMP[3].x, TEMP[2], CONST[69] 8: DP4 TEMP[4].x, TEMP[2], CONST[70] 9: MOV TEMP[3].y, TEMP[4].xxxx 10: MOV_SAT TEMP[5].xy, TEMP[3].xyyy 11: ADD TEMP[5].xy, -TEMP[3].xyyy, TEMP[5].xyyy 12: MOV TEMP[1].yz, TEMP[5].yxyy 13: DP2 TEMP[5].x, TEMP[5].xyyy, IMM[0].xxxx 14: MOV TEMP[1].y, TEMP[5].xxxx 15: DP4 TEMP[6].x, TEMP[2], CONST[73] 16: DP4 TEMP[7].x, TEMP[2], CONST[74] 17: MOV TEMP[6].y, TEMP[7].xxxx 18: MOV_SAT TEMP[8].xy, TEMP[6].xyyy 19: ADD TEMP[8].xy, -TEMP[6].xyyy, TEMP[8].xyyy 20: MOV TEMP[1].zw, TEMP[8].yyxy 21: DP2 TEMP[8].x, TEMP[8].xyyy, IMM[0].xxxx 22: MOV TEMP[1].z, TEMP[8].xxxx 23: DP4 TEMP[9].x, TEMP[2], CONST[77] 24: DP4 TEMP[10].x, TEMP[2], CONST[78] 25: MOV TEMP[6].z, IMM[0].xxxx 26: MOV TEMP[9].z, IMM[0].zzzz 27: MOV TEMP[11].w, TEMP[6] 28: ABS TEMP[12].x, TEMP[8].xxxx 29: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 30: UIF TEMP[12].xxxx :0 31: MOV TEMP[12].x, TEMP[6].xxxx 32: ELSE :0 33: MOV TEMP[12].x, TEMP[9].xxxx 34: ENDIF 35: MOV TEMP[11].x, TEMP[12].xxxx 36: ABS TEMP[12].x, TEMP[8].xxxx 37: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 38: UIF TEMP[12].xxxx :0 39: MOV TEMP[7].x, TEMP[7].xxxx 40: ELSE :0 41: MOV TEMP[7].x, TEMP[10].xxxx 42: ENDIF 43: MOV TEMP[11].y, TEMP[7].xxxx 44: ABS TEMP[7].x, TEMP[8].xxxx 45: FSGE TEMP[7].x, -TEMP[7].xxxx, IMM[0].yyyy 46: UIF TEMP[7].xxxx :0 47: MOV TEMP[7].x, IMM[0].xxxx 48: ELSE :0 49: MOV TEMP[7].x, IMM[0].zzzz 50: ENDIF 51: MOV TEMP[11].z, TEMP[7].xxxx 52: MOV TEMP[6].xyz, TEMP[11] 53: MOV TEMP[3].w, IMM[0].yyyy 54: MOV TEMP[11].x, TEMP[1] 55: ABS TEMP[7].x, TEMP[5].xxxx 56: FSGE TEMP[7].x, -TEMP[7].xxxx, IMM[0].yyyy 57: UIF TEMP[7].xxxx :0 58: MOV TEMP[7].x, TEMP[3].xxxx 59: ELSE :0 60: MOV TEMP[7].x, TEMP[6].xxxx 61: ENDIF 62: MOV TEMP[11].y, TEMP[7].xxxx 63: ABS TEMP[7].x, TEMP[5].xxxx 64: FSGE TEMP[7].x, -TEMP[7].xxxx, IMM[0].yyyy 65: UIF TEMP[7].xxxx :0 66: MOV TEMP[4].x, TEMP[4].xxxx 67: ELSE :0 68: MOV TEMP[4].x, TEMP[6].yyyy 69: ENDIF 70: MOV TEMP[11].z, TEMP[4].xxxx 71: ABS TEMP[4].x, TEMP[5].xxxx 72: FSGE TEMP[4].x, -TEMP[4].xxxx, IMM[0].yyyy 73: UIF TEMP[4].xxxx :0 74: MOV TEMP[4].x, IMM[0].yyyy 75: ELSE :0 76: MOV TEMP[4].x, TEMP[6].zzzz 77: ENDIF 78: MOV TEMP[11].w, TEMP[4].xxxx 79: MOV TEMP[1].xw, TEMP[11].xxxw 80: DP4 TEMP[5].x, TEMP[2], CONST[71] 81: MOV TEMP[3].z, TEMP[5].xxxx 82: ADD TEMP[6].xy, TEMP[11].yzzz, IMM[0].wwww 83: ABS TEMP[7].xy, TEMP[6].xyyy 84: ADD TEMP[6].xy, TEMP[7].xyyy, -CONST[67].zzzz 85: MUL TEMP[6].xy, TEMP[6].xyyy, CONST[67].wwww 86: MOV_SAT TEMP[7].xy, TEMP[6].xyyy 87: ADD TEMP[6].xy, -TEMP[7].xyyy, IMM[0].xxxx 88: MUL TEMP[6].x, TEMP[6].yyyy, TEMP[6].xxxx 89: MOV_SAT TEMP[7].xy, TEMP[11].yzzz 90: ADD TEMP[4].xyz, TEMP[4].xxxx, IMM[1].xyzz 91: MOV TEMP[9].y, IMM[0].yyyy 92: ABS TEMP[8].x, TEMP[4].xxxx 93: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 94: UIF TEMP[8].xxxx :0 95: MOV TEMP[8].x, CONST[85].zzzz 96: ELSE :0 97: MOV TEMP[8].x, IMM[0].yyyy 98: ENDIF 99: ABS TEMP[10].x, TEMP[4].xxxx 100: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 101: UIF TEMP[10].xxxx :0 102: MOV TEMP[10].x, CONST[85].wwww 103: ELSE :0 104: MOV TEMP[10].x, IMM[0].yyyy 105: ENDIF 106: MOV TEMP[11].y, TEMP[10].xxxx 107: ABS TEMP[10].x, TEMP[4].xxxx 108: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 109: UIF TEMP[10].xxxx :0 110: MOV TEMP[10].x, CONST[85].xxxx 111: ELSE :0 112: MOV TEMP[10].x, IMM[0].yyyy 113: ENDIF 114: MOV TEMP[11].z, TEMP[10].xxxx 115: ABS TEMP[10].x, TEMP[4].xxxx 116: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 117: UIF TEMP[10].xxxx :0 118: MOV TEMP[10].x, CONST[85].yyyy 119: ELSE :0 120: MOV TEMP[10].x, IMM[0].yyyy 121: ENDIF 122: MOV TEMP[11].w, TEMP[10].xxxx 123: ABS TEMP[10].x, TEMP[4].yyyy 124: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 125: UIF TEMP[10].xxxx :0 126: MOV TEMP[10].x, CONST[86].zzzz 127: ELSE :0 128: MOV TEMP[10].x, TEMP[8].xxxx 129: ENDIF 130: ABS TEMP[8].x, TEMP[4].yyyy 131: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 132: UIF TEMP[8].xxxx :0 133: MOV TEMP[8].x, CONST[86].wwww 134: ELSE :0 135: MOV TEMP[8].x, TEMP[11].yyyy 136: ENDIF 137: MOV TEMP[11].y, TEMP[8].xxxx 138: ABS TEMP[8].x, TEMP[4].yyyy 139: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 140: UIF TEMP[8].xxxx :0 141: MOV TEMP[8].x, CONST[86].xxxx 142: ELSE :0 143: MOV TEMP[8].x, TEMP[11].zzzz 144: ENDIF 145: MOV TEMP[11].z, TEMP[8].xxxx 146: ABS TEMP[8].x, TEMP[4].yyyy 147: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 148: UIF TEMP[8].xxxx :0 149: MOV TEMP[8].x, CONST[86].yyyy 150: ELSE :0 151: MOV TEMP[8].x, TEMP[11].wwww 152: ENDIF 153: MOV TEMP[11].w, TEMP[8].xxxx 154: ABS TEMP[8].x, TEMP[4].zzzz 155: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 156: UIF TEMP[8].xxxx :0 157: MOV TEMP[8].x, CONST[87].zzzz 158: ELSE :0 159: MOV TEMP[8].x, TEMP[10].xxxx 160: ENDIF 161: MOV TEMP[11].x, TEMP[8].xxxx 162: ABS TEMP[8].x, TEMP[4].zzzz 163: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 164: UIF TEMP[8].xxxx :0 165: MOV TEMP[8].x, CONST[87].wwww 166: ELSE :0 167: MOV TEMP[8].x, TEMP[11].yyyy 168: ENDIF 169: MOV TEMP[11].y, TEMP[8].xxxx 170: ABS TEMP[8].x, TEMP[4].zzzz 171: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 172: UIF TEMP[8].xxxx :0 173: MOV TEMP[8].x, CONST[87].xxxx 174: ELSE :0 175: MOV TEMP[8].x, TEMP[11].zzzz 176: ENDIF 177: MOV TEMP[11].z, TEMP[8].xxxx 178: ABS TEMP[8].x, TEMP[4].zzzz 179: FSGE TEMP[8].x, -TEMP[8].xxxx, IMM[0].yyyy 180: UIF TEMP[8].xxxx :0 181: MOV TEMP[8].x, CONST[87].yyyy 182: ELSE :0 183: MOV TEMP[8].x, TEMP[11].wwww 184: ENDIF 185: MOV TEMP[11].w, TEMP[8].xxxx 186: MAD TEMP[3].xy, TEMP[7].xyyy, TEMP[11].xyyy, TEMP[11].zwww 187: ADD TEMP[4], TEMP[3], IMM[2].xxyy 188: TXL TEMP[7].x, TEMP[4], SAMP[3], SHADOW2D 189: MOV TEMP[4].x, TEMP[7].xxxx 190: ADD TEMP[7], TEMP[3], IMM[2].zxyy 191: ADD TEMP[8], TEMP[3], IMM[2].xzyy 192: ADD TEMP[10], TEMP[3], IMM[2].zzyy 193: TXL TEMP[12].x, TEMP[7], SAMP[3], SHADOW2D 194: MOV TEMP[4].y, TEMP[12].xxxx 195: TXL TEMP[12].x, TEMP[8], SAMP[3], SHADOW2D 196: MOV TEMP[4].z, TEMP[12].xxxx 197: TXL TEMP[12].x, TEMP[10], SAMP[3], SHADOW2D 198: MOV TEMP[4].w, TEMP[12].xxxx 199: DP4 TEMP[12].x, TEMP[4], IMM[1].wwww 200: ADD TEMP[4], TEMP[3], IMM[2].xyyy 201: TXL TEMP[13].x, TEMP[4], SAMP[3], SHADOW2D 202: MOV TEMP[4].x, TEMP[13].xxxx 203: ADD TEMP[7], TEMP[3], IMM[2].zyyy 204: TXL TEMP[13], TEMP[7], SAMP[3], SHADOW2D 205: MOV TEMP[7], TEMP[13] 206: ADD TEMP[8], TEMP[3], IMM[2].yzyy 207: TXL TEMP[8].x, TEMP[8], SAMP[3], SHADOW2D 208: ADD TEMP[10], TEMP[3], IMM[2].yxyy 209: TXL TEMP[10].x, TEMP[10], SAMP[3], SHADOW2D 210: MOV TEMP[4].y, TEMP[13].xxxx 211: MOV TEMP[4].z, TEMP[8].xxxx 212: MOV TEMP[4].w, TEMP[10].xxxx 213: DP4 TEMP[8].x, TEMP[4], IMM[2].wwww 214: MOV TEMP[1].z, TEMP[8].xxxx 215: MOV TEMP[10].xy, TEMP[3].xyyy 216: MOV TEMP[10].z, TEMP[5].xxxx 217: MOV TEMP[10].w, IMM[0].yyyy 218: TXL TEMP[10], TEMP[10], SAMP[3], SHADOW2D 219: MOV TEMP[4], TEMP[10] 220: ADD TEMP[8].x, TEMP[8].xxxx, TEMP[12].xxxx 221: MAD TEMP[8].x, TEMP[10].xxxx, IMM[3].xxxx, TEMP[8].xxxx 222: MOV TEMP[1].y, TEMP[8].xxxx 223: FSLT TEMP[8].x, TEMP[6].xxxx, IMM[0].xxxx 224: UIF TEMP[8].xxxx :0 225: ADD TEMP[8].xyz, TEMP[1].wwww, IMM[3].yzww 226: ABS TEMP[10].x, TEMP[8].xxxx 227: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 228: UIF TEMP[10].xxxx :0 229: MOV TEMP[10].x, CONST[73].xxxx 230: ELSE :0 231: MOV TEMP[10].x, IMM[0].yyyy 232: ENDIF 233: MOV TEMP[11].x, TEMP[10].xxxx 234: ABS TEMP[10].x, TEMP[8].xxxx 235: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 236: UIF TEMP[10].xxxx :0 237: MOV TEMP[10].x, CONST[73].yyyy 238: ELSE :0 239: MOV TEMP[10].x, IMM[0].yyyy 240: ENDIF 241: MOV TEMP[11].y, TEMP[10].xxxx 242: ABS TEMP[10].x, TEMP[8].xxxx 243: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 244: UIF TEMP[10].xxxx :0 245: MOV TEMP[10].x, CONST[73].zzzz 246: ELSE :0 247: MOV TEMP[10].x, IMM[0].yyyy 248: ENDIF 249: MOV TEMP[11].z, TEMP[10].xxxx 250: ABS TEMP[10].x, TEMP[8].xxxx 251: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 252: UIF TEMP[10].xxxx :0 253: MOV TEMP[10].x, CONST[73].wwww 254: ELSE :0 255: MOV TEMP[10].x, IMM[0].yyyy 256: ENDIF 257: MOV TEMP[11].w, TEMP[10].xxxx 258: MOV TEMP[4], TEMP[11] 259: ABS TEMP[10].x, TEMP[8].xxxx 260: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 261: UIF TEMP[10].xxxx :0 262: MOV TEMP[10].x, CONST[74].xxxx 263: ELSE :0 264: MOV TEMP[10].x, IMM[0].yyyy 265: ENDIF 266: MOV TEMP[11].x, TEMP[10].xxxx 267: ABS TEMP[10].x, TEMP[8].xxxx 268: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 269: UIF TEMP[10].xxxx :0 270: MOV TEMP[10].x, CONST[74].yyyy 271: ELSE :0 272: MOV TEMP[10].x, IMM[0].yyyy 273: ENDIF 274: MOV TEMP[11].y, TEMP[10].xxxx 275: ABS TEMP[10].x, TEMP[8].xxxx 276: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 277: UIF TEMP[10].xxxx :0 278: MOV TEMP[10].x, CONST[74].zzzz 279: ELSE :0 280: MOV TEMP[10].x, IMM[0].yyyy 281: ENDIF 282: MOV TEMP[11].z, TEMP[10].xxxx 283: ABS TEMP[10].x, TEMP[8].xxxx 284: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 285: UIF TEMP[10].xxxx :0 286: MOV TEMP[10].x, CONST[74].wwww 287: ELSE :0 288: MOV TEMP[10].x, IMM[0].yyyy 289: ENDIF 290: MOV TEMP[11].w, TEMP[10].xxxx 291: MOV TEMP[7], TEMP[11] 292: ABS TEMP[10].x, TEMP[8].yyyy 293: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 294: UIF TEMP[10].xxxx :0 295: MOV TEMP[10].x, CONST[77].xxxx 296: ELSE :0 297: MOV TEMP[10].x, TEMP[4].xxxx 298: ENDIF 299: MOV TEMP[11].x, TEMP[10].xxxx 300: ABS TEMP[10].x, TEMP[8].yyyy 301: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 302: UIF TEMP[10].xxxx :0 303: MOV TEMP[10].x, CONST[77].yyyy 304: ELSE :0 305: MOV TEMP[10].x, TEMP[4].yyyy 306: ENDIF 307: MOV TEMP[11].y, TEMP[10].xxxx 308: ABS TEMP[10].x, TEMP[8].yyyy 309: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 310: UIF TEMP[10].xxxx :0 311: MOV TEMP[10].x, CONST[77].zzzz 312: ELSE :0 313: MOV TEMP[10].x, TEMP[4].zzzz 314: ENDIF 315: MOV TEMP[11].z, TEMP[10].xxxx 316: ABS TEMP[10].x, TEMP[8].yyyy 317: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 318: UIF TEMP[10].xxxx :0 319: MOV TEMP[10].x, CONST[77].wwww 320: ELSE :0 321: MOV TEMP[10].x, TEMP[4].wwww 322: ENDIF 323: MOV TEMP[11].w, TEMP[10].xxxx 324: MOV TEMP[4], TEMP[11] 325: ABS TEMP[10].x, TEMP[8].yyyy 326: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 327: UIF TEMP[10].xxxx :0 328: MOV TEMP[10].x, CONST[78].xxxx 329: ELSE :0 330: MOV TEMP[10].x, TEMP[7].xxxx 331: ENDIF 332: MOV TEMP[11].x, TEMP[10].xxxx 333: ABS TEMP[10].x, TEMP[8].yyyy 334: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 335: UIF TEMP[10].xxxx :0 336: MOV TEMP[10].x, CONST[78].yyyy 337: ELSE :0 338: MOV TEMP[10].x, TEMP[7].yyyy 339: ENDIF 340: MOV TEMP[11].y, TEMP[10].xxxx 341: ABS TEMP[10].x, TEMP[8].yyyy 342: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 343: UIF TEMP[10].xxxx :0 344: MOV TEMP[10].x, CONST[78].zzzz 345: ELSE :0 346: MOV TEMP[10].x, TEMP[7].zzzz 347: ENDIF 348: MOV TEMP[11].z, TEMP[10].xxxx 349: ABS TEMP[10].x, TEMP[8].yyyy 350: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 351: UIF TEMP[10].xxxx :0 352: MOV TEMP[10].x, CONST[78].wwww 353: ELSE :0 354: MOV TEMP[10].x, TEMP[7].wwww 355: ENDIF 356: MOV TEMP[11].w, TEMP[10].xxxx 357: MOV TEMP[7], TEMP[11] 358: ABS TEMP[10].x, TEMP[8].zzzz 359: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 360: UIF TEMP[10].xxxx :0 361: MOV TEMP[10].x, CONST[81].xxxx 362: ELSE :0 363: MOV TEMP[10].x, TEMP[4].xxxx 364: ENDIF 365: MOV TEMP[11].x, TEMP[10].xxxx 366: ABS TEMP[10].x, TEMP[8].zzzz 367: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 368: UIF TEMP[10].xxxx :0 369: MOV TEMP[10].x, CONST[81].yyyy 370: ELSE :0 371: MOV TEMP[10].x, TEMP[4].yyyy 372: ENDIF 373: MOV TEMP[11].y, TEMP[10].xxxx 374: ABS TEMP[10].x, TEMP[8].zzzz 375: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 376: UIF TEMP[10].xxxx :0 377: MOV TEMP[10].x, CONST[81].zzzz 378: ELSE :0 379: MOV TEMP[10].x, TEMP[4].zzzz 380: ENDIF 381: MOV TEMP[11].z, TEMP[10].xxxx 382: ABS TEMP[10].x, TEMP[8].zzzz 383: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 384: UIF TEMP[10].xxxx :0 385: MOV TEMP[10].x, CONST[81].wwww 386: ELSE :0 387: MOV TEMP[10].x, TEMP[4].wwww 388: ENDIF 389: MOV TEMP[11].w, TEMP[10].xxxx 390: MOV TEMP[4], TEMP[11] 391: ABS TEMP[10].x, TEMP[8].zzzz 392: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 393: UIF TEMP[10].xxxx :0 394: MOV TEMP[10].x, CONST[82].xxxx 395: ELSE :0 396: MOV TEMP[10].x, TEMP[7].xxxx 397: ENDIF 398: MOV TEMP[11].x, TEMP[10].xxxx 399: ABS TEMP[10].x, TEMP[8].zzzz 400: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 401: UIF TEMP[10].xxxx :0 402: MOV TEMP[10].x, CONST[82].yyyy 403: ELSE :0 404: MOV TEMP[10].x, TEMP[7].yyyy 405: ENDIF 406: MOV TEMP[11].y, TEMP[10].xxxx 407: ABS TEMP[10].x, TEMP[8].zzzz 408: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 409: UIF TEMP[10].xxxx :0 410: MOV TEMP[10].x, CONST[82].zzzz 411: ELSE :0 412: MOV TEMP[10].x, TEMP[7].zzzz 413: ENDIF 414: MOV TEMP[11].z, TEMP[10].xxxx 415: ABS TEMP[10].x, TEMP[8].zzzz 416: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 417: UIF TEMP[10].xxxx :0 418: MOV TEMP[10].x, CONST[82].wwww 419: ELSE :0 420: MOV TEMP[10].x, TEMP[7].wwww 421: ENDIF 422: MOV TEMP[11].w, TEMP[10].xxxx 423: DP4 TEMP[4].x, TEMP[2], TEMP[4] 424: MOV_SAT TEMP[4].x, TEMP[4].xxxx 425: DP4 TEMP[10].x, TEMP[2], TEMP[11] 426: MOV_SAT TEMP[10].x, TEMP[10].xxxx 427: MOV TEMP[4].y, TEMP[10].xxxx 428: ABS TEMP[10].x, TEMP[8].xxxx 429: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 430: UIF TEMP[10].xxxx :0 431: MOV TEMP[10].x, CONST[86].zzzz 432: ELSE :0 433: MOV TEMP[10].x, IMM[0].yyyy 434: ENDIF 435: ABS TEMP[12].x, TEMP[8].xxxx 436: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 437: UIF TEMP[12].xxxx :0 438: MOV TEMP[12].x, CONST[86].wwww 439: ELSE :0 440: MOV TEMP[12].x, IMM[0].yyyy 441: ENDIF 442: MOV TEMP[11].y, TEMP[12].xxxx 443: ABS TEMP[12].x, TEMP[8].xxxx 444: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 445: UIF TEMP[12].xxxx :0 446: MOV TEMP[12].x, CONST[86].xxxx 447: ELSE :0 448: MOV TEMP[12].x, IMM[0].yyyy 449: ENDIF 450: MOV TEMP[11].z, TEMP[12].xxxx 451: ABS TEMP[12].x, TEMP[8].xxxx 452: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 453: UIF TEMP[12].xxxx :0 454: MOV TEMP[12].x, CONST[86].yyyy 455: ELSE :0 456: MOV TEMP[12].x, IMM[0].yyyy 457: ENDIF 458: MOV TEMP[11].w, TEMP[12].xxxx 459: ABS TEMP[12].x, TEMP[8].yyyy 460: FSGE TEMP[12].x, -TEMP[12].xxxx, IMM[0].yyyy 461: UIF TEMP[12].xxxx :0 462: MOV TEMP[12].x, CONST[87].zzzz 463: ELSE :0 464: MOV TEMP[12].x, TEMP[10].xxxx 465: ENDIF 466: ABS TEMP[10].x, TEMP[8].yyyy 467: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 468: UIF TEMP[10].xxxx :0 469: MOV TEMP[10].x, CONST[87].wwww 470: ELSE :0 471: MOV TEMP[10].x, TEMP[11].yyyy 472: ENDIF 473: MOV TEMP[11].y, TEMP[10].xxxx 474: ABS TEMP[10].x, TEMP[8].yyyy 475: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 476: UIF TEMP[10].xxxx :0 477: MOV TEMP[10].x, CONST[87].xxxx 478: ELSE :0 479: MOV TEMP[10].x, TEMP[11].zzzz 480: ENDIF 481: MOV TEMP[11].z, TEMP[10].xxxx 482: ABS TEMP[10].x, TEMP[8].yyyy 483: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 484: UIF TEMP[10].xxxx :0 485: MOV TEMP[10].x, CONST[87].yyyy 486: ELSE :0 487: MOV TEMP[10].x, TEMP[11].wwww 488: ENDIF 489: MOV TEMP[11].w, TEMP[10].xxxx 490: ABS TEMP[10].x, TEMP[8].zzzz 491: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 492: UIF TEMP[10].xxxx :0 493: MOV TEMP[10].x, CONST[88].zzzz 494: ELSE :0 495: MOV TEMP[10].x, TEMP[12].xxxx 496: ENDIF 497: MOV TEMP[11].x, TEMP[10].xxxx 498: ABS TEMP[10].x, TEMP[8].zzzz 499: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 500: UIF TEMP[10].xxxx :0 501: MOV TEMP[10].x, CONST[88].wwww 502: ELSE :0 503: MOV TEMP[10].x, TEMP[11].yyyy 504: ENDIF 505: MOV TEMP[11].y, TEMP[10].xxxx 506: ABS TEMP[10].x, TEMP[8].zzzz 507: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 508: UIF TEMP[10].xxxx :0 509: MOV TEMP[10].x, CONST[88].xxxx 510: ELSE :0 511: MOV TEMP[10].x, TEMP[11].zzzz 512: ENDIF 513: MOV TEMP[11].z, TEMP[10].xxxx 514: ABS TEMP[10].x, TEMP[8].zzzz 515: FSGE TEMP[10].x, -TEMP[10].xxxx, IMM[0].yyyy 516: UIF TEMP[10].xxxx :0 517: MOV TEMP[10].x, CONST[88].yyyy 518: ELSE :0 519: MOV TEMP[10].x, TEMP[11].wwww 520: ENDIF 521: MOV TEMP[11].w, TEMP[10].xxxx 522: MAD TEMP[3].xy, TEMP[4].xyyy, TEMP[11].xyyy, TEMP[11].zwww 523: ADD TEMP[2], TEMP[3], IMM[2].xxyy 524: TXL TEMP[10].x, TEMP[2], SAMP[3], SHADOW2D 525: MOV TEMP[2].x, TEMP[10].xxxx 526: ADD TEMP[9], TEMP[3], IMM[2].zxyy 527: ADD TEMP[4], TEMP[3], IMM[2].xzyy 528: ADD TEMP[7], TEMP[3], IMM[2].zzyy 529: TXL TEMP[10].x, TEMP[9], SAMP[3], SHADOW2D 530: MOV TEMP[2].y, TEMP[10].xxxx 531: TXL TEMP[10].x, TEMP[4], SAMP[3], SHADOW2D 532: MOV TEMP[2].z, TEMP[10].xxxx 533: TXL TEMP[10].x, TEMP[7], SAMP[3], SHADOW2D 534: MOV TEMP[2].w, TEMP[10].xxxx 535: DP4 TEMP[10].x, TEMP[2], IMM[1].wwww 536: ADD TEMP[2], TEMP[3], IMM[2].xyyy 537: TXL TEMP[12].x, TEMP[2], SAMP[3], SHADOW2D 538: MOV TEMP[2].x, TEMP[12].xxxx 539: ADD TEMP[9], TEMP[3], IMM[2].zyyy 540: TXL TEMP[9].x, TEMP[9], SAMP[3], SHADOW2D 541: ADD TEMP[4], TEMP[3], IMM[2].yzyy 542: TXL TEMP[4].x, TEMP[4], SAMP[3], SHADOW2D 543: ADD TEMP[7], TEMP[3], IMM[2].yxyy 544: TXL TEMP[7].x, TEMP[7], SAMP[3], SHADOW2D 545: MOV TEMP[2].y, TEMP[9].xxxx 546: MOV TEMP[2].z, TEMP[4].xxxx 547: MOV TEMP[2].w, TEMP[7].xxxx 548: DP4 TEMP[4].x, TEMP[2], IMM[2].wwww 549: MOV TEMP[1].w, TEMP[4].xxxx 550: MOV TEMP[7].xy, TEMP[3].xyyy 551: MOV TEMP[7].z, TEMP[5].xxxx 552: MOV TEMP[7].w, IMM[0].yyyy 553: TXL TEMP[5].x, TEMP[7], SAMP[3], SHADOW2D 554: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[10].xxxx 555: MOV TEMP[1].z, TEMP[4].xxxx 556: MAD TEMP[4].x, TEMP[5].xxxx, IMM[3].xxxx, TEMP[4].xxxx 557: MOV TEMP[1].z, TEMP[4].xxxx 558: MOV TEMP[11].xw, TEMP[1] 559: FSGE TEMP[5].x, TEMP[8].zzzz, IMM[0].yyyy 560: UIF TEMP[5].xxxx :0 561: MOV TEMP[5].x, IMM[0].xxxx 562: ELSE :0 563: MOV TEMP[5].x, TEMP[4].xxxx 564: ENDIF 565: MOV TEMP[11].z, TEMP[5].xxxx 566: MOV TEMP[1].x, TEMP[11].xxzw 567: LRP TEMP[2].x, TEMP[6].xxxx, TEMP[1].yyyy, TEMP[5].xxxx 568: MOV TEMP[1].y, TEMP[2].xxxx 569: ENDIF 570: ADD TEMP[2].xyz, -CONST[89].xyzz, IN[5].xyzz 571: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz 572: MAD TEMP[4].x, TEMP[4].xxxx, CONST[68].yyyy, CONST[68].xxxx 573: MOV_SAT TEMP[4].x, TEMP[4].xxxx 574: LRP TEMP[2].x, TEMP[4].xxxx, IMM[0].xxxx, TEMP[1].yyyy 575: ELSE :0 576: MOV TEMP[2].x, IMM[0].xxxx 577: ENDIF 578: MAD TEMP[4].xyz, IN[1].xyzz, TEMP[2].xxxx, IN[4].xyzz 579: MOV TEMP[1].yzw, TEMP[4].yxyz 580: UIF CONST[90].xxxx :0 581: DP3 TEMP[5].x, TEMP[4].xyzz, IMM[4].xyzz 582: RCP TEMP[5].x, TEMP[5].xxxx 583: MUL TEMP[5].x, TEMP[5].xxxx, IN[1].wwww 584: ADD TEMP[2].x, -TEMP[2].xxxx, IMM[0].xxxx 585: MAD TEMP[2].x, TEMP[5].xxxx, -TEMP[2].xxxx, IMM[0].xxxx 586: MUL TEMP[4].xyz, TEMP[4].zyxx, TEMP[2].xxxx 587: MAD TEMP[2].x, TEMP[2].xxxx, IMM[4].wwww, IMM[4].wwww 588: LRP TEMP[4].xyz, TEMP[2].xxxx, TEMP[4].zyxx, TEMP[4].xyzz 589: MOV TEMP[1].yzw, TEMP[4].yxyz 590: ENDIF 591: ADD TEMP[2].x, TEMP[0].wwww, IMM[1].yyyy 592: MAD TEMP[2].x, CONST[20].wwww, TEMP[2].xxxx, IMM[0].xxxx 593: ADD TEMP[4].x, TEMP[0].wwww, CONST[12].xxxx 594: ADD TEMP[5].xyz, IMM[1].yyyy, CONST[1].xyzz 595: MOV_SAT TEMP[4].x, TEMP[4].xxxx 596: MAD TEMP[4].xyz, TEMP[4].xxxx, TEMP[5].xyzz, IMM[0].xxxx 597: MUL TEMP[3].xyz, TEMP[1].yzww, TEMP[4].xyzz 598: MUL TEMP[5].x, TEMP[2].xxxx, CONST[1].wwww 599: MAD TEMP[2].x, TEMP[5].xxxx, IN[4].wwww, -TEMP[5].xxxx 600: MAD TEMP[5].x, CONST[12].wwww, TEMP[2].xxxx, TEMP[5].xxxx 601: DP3 TEMP[7].x, IN[3].xyzz, IN[2].xyzz 602: ADD TEMP[2].x, TEMP[7].xxxx, TEMP[7].xxxx 603: DP3 TEMP[7].x, IN[3].xyzz, IN[3].xyzz 604: MUL TEMP[6].xyz, TEMP[7].xxxx, IN[2].xyzz 605: MAD TEMP[6].xyz, TEMP[2].xxxx, IN[3].xyzz, -TEMP[6].xyzz 606: MOV TEMP[7].xyz, TEMP[6].xyzz 607: TEX TEMP[7].xyz, TEMP[7], SAMP[1], CUBE 608: MUL TEMP[6].xyz, TEMP[7].xyzz, CONST[30].zzzz 609: MUL TEMP[6].xyz, TEMP[1].xxxx, TEMP[6].xyzz 610: MUL TEMP[6].xyz, TEMP[6].xyzz, CONST[0].xyzz 611: MAD TEMP[1].xyz, TEMP[1].yzww, TEMP[4].xyzz, -CONST[19].zzzz 612: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[19].wwww 613: MOV_SAT TEMP[4].xyz, TEMP[1].xyzz 614: MAD TEMP[1].xyz, TEMP[6].xyzz, TEMP[4].xyzz, -TEMP[6].xyzz 615: MAD TEMP[1].xyz, CONST[19].yyyy, TEMP[1].xyzz, TEMP[6].xyzz 616: MAD TEMP[2].xyz, TEMP[1].xyzz, TEMP[1].xyzz, -TEMP[1].xyzz 617: MAD TEMP[1].xyz, CONST[19].xxxx, TEMP[2].xyzz, TEMP[1].xyzz 618: DP3 TEMP[4].x, TEMP[1].xyzz, IMM[5].xyzz 619: LRP TEMP[2].xyz, CONST[3].xyzz, TEMP[1].xyzz, TEMP[4].xxxx 620: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xyzz, TEMP[2].xyzz 621: ADD TEMP[1].xyz, CONST[20].xyzz, -IN[5].xyzz 622: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 623: SQRT TEMP[1].x, TEMP[1].xxxx 624: MAD TEMP[1].x, TEMP[1].xxxx, CONST[21].wwww, CONST[21].xxxx 625: MOV_SAT TEMP[1].x, TEMP[1].xxxx 626: MIN TEMP[1].x, TEMP[1].xxxx, CONST[21].zzzz 627: ABS TEMP[2].x, CONST[12].yyyy 628: MUL TEMP[3].xyz, TEMP[0].xyzz, CONST[30].xxxx 629: MUL TEMP[4].x, CONST[29].wwww, IN[5].wwww 630: FSGE TEMP[6].x, -TEMP[2].xxxx, IMM[0].yyyy 631: UIF TEMP[6].xxxx :0 632: MOV TEMP[5].x, TEMP[5].xxxx 633: ELSE :0 634: MOV TEMP[5].x, TEMP[4].xxxx 635: ENDIF 636: MOV TEMP[2].w, TEMP[5].xxxx 637: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx 638: MAD TEMP[0].xyz, TEMP[0].xyzz, -CONST[30].xxxx, CONST[29].xyzz 639: MAD TEMP[2].xyz, TEMP[1].xxxx, TEMP[0].xyzz, TEMP[3].xyzz 640: MOV OUT[0], TEMP[2] 641: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %23 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 24) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 28) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 48) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 52) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 56) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 192) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 196) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 204) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 304) %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 308) %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 312) %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 316) %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 320) %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 324) %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 328) %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 332) %46 = call float @llvm.SI.load.const(<16 x i8> %24, i32 336) %47 = call float @llvm.SI.load.const(<16 x i8> %24, i32 344) %48 = call float @llvm.SI.load.const(<16 x i8> %24, i32 348) %49 = call float @llvm.SI.load.const(<16 x i8> %24, i32 464) %50 = call float @llvm.SI.load.const(<16 x i8> %24, i32 468) %51 = call float @llvm.SI.load.const(<16 x i8> %24, i32 472) %52 = call float @llvm.SI.load.const(<16 x i8> %24, i32 476) %53 = call float @llvm.SI.load.const(<16 x i8> %24, i32 480) %54 = call float @llvm.SI.load.const(<16 x i8> %24, i32 488) %55 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1080) %56 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1084) %57 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1088) %58 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1092) %59 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1168) %60 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1172) %61 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1176) %62 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1180) %63 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1184) %64 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1188) %65 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1192) %66 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1196) %67 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1232) %68 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1236) %69 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1240) %70 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1244) %71 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1248) %72 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1252) %73 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1256) %74 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1260) %75 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1296) %76 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1300) %77 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1304) %78 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1308) %79 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1312) %80 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1316) %81 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1320) %82 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1324) %83 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1376) %84 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1380) %85 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1384) %86 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1388) %87 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1392) %88 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1396) %89 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1400) %90 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1404) %91 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1408) %92 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1412) %93 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1416) %94 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1420) %95 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1424) %96 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1428) %97 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1432) %98 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1440) %99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %100 = load <8 x i32>, <8 x i32> addrspace(2)* %99, align 32, !tbaa !0 %101 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %102 = load <4 x i32>, <4 x i32> addrspace(2)* %101, align 16, !tbaa !0 %103 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %104 = load <8 x i32>, <8 x i32> addrspace(2)* %103, align 32, !tbaa !0 %105 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %106 = load <4 x i32>, <4 x i32> addrspace(2)* %105, align 16, !tbaa !0 %107 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %108 = load <8 x i32>, <8 x i32> addrspace(2)* %107, align 32, !tbaa !0 %109 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %110 = load <4 x i32>, <4 x i32> addrspace(2)* %109, align 16, !tbaa !0 %111 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %112 = load <8 x i32>, <8 x i32> addrspace(2)* %111, align 32, !tbaa !0 %113 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %114 = load <4 x i32>, <4 x i32> addrspace(2)* %113, align 16, !tbaa !0 %115 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %116 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %117 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %118 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %119 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %120 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %121 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %122 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %123 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %6, <2 x i32> %8) %124 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %125 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %126 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) %127 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %6, <2 x i32> %8) %128 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %6, <2 x i32> %8) %129 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %6, <2 x i32> %8) %130 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %6, <2 x i32> %8) %131 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %6, <2 x i32> %8) %132 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %6, <2 x i32> %8) %133 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %6, <2 x i32> %8) %134 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %6, <2 x i32> %8) %135 = bitcast float %115 to i32 %136 = bitcast float %116 to i32 %137 = insertelement <2 x i32> undef, i32 %135, i32 0 %138 = insertelement <2 x i32> %137, i32 %136, i32 1 %139 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %138, <8 x i32> %100, <4 x i32> %102, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %140 = extractelement <4 x float> %139, i32 0 %141 = extractelement <4 x float> %139, i32 1 %142 = extractelement <4 x float> %139, i32 2 %143 = extractelement <4 x float> %139, i32 3 %144 = bitcast float %115 to i32 %145 = bitcast float %116 to i32 %146 = insertelement <2 x i32> undef, i32 %144, i32 0 %147 = insertelement <2 x i32> %146, i32 %145, i32 1 %148 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %147, <8 x i32> %108, <4 x i32> %110, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %149 = extractelement <4 x float> %148, i32 0 %150 = bitcast float %98 to i32 %151 = icmp eq i32 %150, 0 br i1 %151, label %ENDIF, label %IF IF: ; preds = %main_body %152 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1372) %153 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1368) %154 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1364) %155 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1360) %156 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1148) %157 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1144) %158 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1140) %159 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1136) %160 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1132) %161 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1128) %162 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1124) %163 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1120) %164 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1116) %165 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1112) %166 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1108) %167 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1104) %168 = fadd float %131, 0.000000e+00 %169 = fadd float %132, 0.000000e+00 %170 = fadd float %133, 0.000000e+00 %171 = fmul float %131, 0.000000e+00 %172 = fadd float %171, 1.000000e+00 %173 = fmul float %168, %167 %174 = fmul float %169, %166 %175 = fadd float %173, %174 %176 = fmul float %170, %165 %177 = fadd float %175, %176 %178 = fmul float %172, %164 %179 = fadd float %177, %178 %180 = fmul float %168, %163 %181 = fmul float %169, %162 %182 = fadd float %180, %181 %183 = fmul float %170, %161 %184 = fadd float %182, %183 %185 = fmul float %172, %160 %186 = fadd float %184, %185 %187 = call float @llvm.AMDIL.clamp.(float %179, float 0.000000e+00, float 1.000000e+00) %188 = call float @llvm.AMDIL.clamp.(float %186, float 0.000000e+00, float 1.000000e+00) %189 = fsub float %187, %179 %190 = fsub float %188, %186 %191 = fadd float %189, %190 %192 = fmul float %168, %59 %193 = fmul float %169, %60 %194 = fadd float %192, %193 %195 = fmul float %170, %61 %196 = fadd float %194, %195 %197 = fmul float %172, %62 %198 = fadd float %196, %197 %199 = fmul float %168, %63 %200 = fmul float %169, %64 %201 = fadd float %199, %200 %202 = fmul float %170, %65 %203 = fadd float %201, %202 %204 = fmul float %172, %66 %205 = fadd float %203, %204 %206 = call float @llvm.AMDIL.clamp.(float %198, float 0.000000e+00, float 1.000000e+00) %207 = call float @llvm.AMDIL.clamp.(float %205, float 0.000000e+00, float 1.000000e+00) %208 = fsub float %206, %198 %209 = fsub float %207, %205 %210 = fadd float %208, %209 %211 = fmul float %168, %67 %212 = fmul float %169, %68 %213 = fadd float %211, %212 %214 = fmul float %170, %69 %215 = fadd float %213, %214 %216 = fmul float %172, %70 %217 = fadd float %215, %216 %218 = fmul float %168, %71 %219 = fmul float %169, %72 %220 = fadd float %218, %219 %221 = fmul float %170, %73 %222 = fadd float %220, %221 %223 = fmul float %172, %74 %224 = fadd float %222, %223 %225 = call float @llvm.fabs.f32(float %210) %226 = fcmp ole float %225, -0.000000e+00 %. = select i1 %226, float %198, float %217 %227 = call float @llvm.fabs.f32(float %210) %228 = fcmp ole float %227, -0.000000e+00 %temp28.0 = select i1 %228, float %205, float %224 %229 = call float @llvm.fabs.f32(float %210) %230 = fcmp ole float %229, -0.000000e+00 %.230 = select i1 %230, float 1.000000e+00, float 2.000000e+00 %231 = call float @llvm.fabs.f32(float %191) %232 = fcmp ole float %231, -0.000000e+00 %temp28.2 = select i1 %232, float %179, float %. %233 = call float @llvm.fabs.f32(float %191) %234 = fcmp ole float %233, -0.000000e+00 %.temp28.0 = select i1 %234, float %186, float %temp28.0 %235 = call float @llvm.fabs.f32(float %191) %236 = fcmp ole float %235, -0.000000e+00 %temp16.1 = select i1 %236, float 0.000000e+00, float %.230 %237 = fmul float %168, %159 %238 = fmul float %169, %158 %239 = fadd float %237, %238 %240 = fmul float %170, %157 %241 = fadd float %239, %240 %242 = fmul float %172, %156 %243 = fadd float %241, %242 %244 = fadd float %temp28.2, -5.000000e-01 %245 = fadd float %.temp28.0, -5.000000e-01 %246 = call float @llvm.fabs.f32(float %244) %247 = call float @llvm.fabs.f32(float %245) %248 = fsub float %246, %55 %249 = fsub float %247, %55 %250 = fmul float %248, %56 %251 = fmul float %249, %56 %252 = call float @llvm.AMDIL.clamp.(float %250, float 0.000000e+00, float 1.000000e+00) %253 = call float @llvm.AMDIL.clamp.(float %251, float 0.000000e+00, float 1.000000e+00) %254 = fsub float 1.000000e+00, %252 %255 = fsub float 1.000000e+00, %253 %256 = fmul float %255, %254 %257 = call float @llvm.AMDIL.clamp.(float %temp28.2, float 0.000000e+00, float 1.000000e+00) %258 = call float @llvm.AMDIL.clamp.(float %.temp28.0, float 0.000000e+00, float 1.000000e+00) %259 = fadd float %temp16.1, -1.000000e+00 %260 = fadd float %temp16.1, -2.000000e+00 %261 = call float @llvm.fabs.f32(float %temp16.1) %262 = fcmp ole float %261, -0.000000e+00 %.231 = select i1 %262, float %153, float 0.000000e+00 %263 = call float @llvm.fabs.f32(float %temp16.1) %264 = fcmp ole float %263, -0.000000e+00 %temp40.0 = select i1 %264, float %152, float 0.000000e+00 %265 = call float @llvm.fabs.f32(float %temp16.1) %266 = fcmp ole float %265, -0.000000e+00 %.232 = select i1 %266, float %155, float 0.000000e+00 %267 = call float @llvm.fabs.f32(float %temp16.1) %268 = fcmp ole float %267, -0.000000e+00 %temp40.2 = select i1 %268, float %154, float 0.000000e+00 %269 = call float @llvm.fabs.f32(float %259) %270 = fcmp ole float %269, -0.000000e+00 %..231 = select i1 %270, float %85, float %.231 %271 = call float @llvm.fabs.f32(float %259) %272 = fcmp ole float %271, -0.000000e+00 %temp32.1 = select i1 %272, float %86, float %temp40.0 %273 = call float @llvm.fabs.f32(float %259) %274 = fcmp ole float %273, -0.000000e+00 %..232 = select i1 %274, float %83, float %.232 %275 = call float @llvm.fabs.f32(float %259) %276 = fcmp ole float %275, -0.000000e+00 %temp32.3 = select i1 %276, float %84, float %temp40.2 %277 = call float @llvm.fabs.f32(float %260) %278 = fcmp ole float %277, -0.000000e+00 %...231 = select i1 %278, float %89, float %..231 %279 = call float @llvm.fabs.f32(float %260) %280 = fcmp ole float %279, -0.000000e+00 %temp32.5 = select i1 %280, float %90, float %temp32.1 %281 = call float @llvm.fabs.f32(float %260) %282 = fcmp ole float %281, -0.000000e+00 %...232 = select i1 %282, float %87, float %..232 %283 = call float @llvm.fabs.f32(float %260) %284 = fcmp ole float %283, -0.000000e+00 %temp32.7 = select i1 %284, float %88, float %temp32.3 %285 = fmul float %257, %...231 %286 = fadd float %285, %...232 %287 = fmul float %258, %temp32.5 %288 = fadd float %287, %temp32.7 %289 = fadd float %286, 0x3F40000000000000 %290 = fadd float %288, 0x3F40000000000000 %291 = fadd float %243, 0.000000e+00 %292 = bitcast float %291 to i32 %293 = bitcast float %289 to i32 %294 = bitcast float %290 to i32 %295 = insertelement <4 x i32> undef, i32 %292, i32 0 %296 = insertelement <4 x i32> %295, i32 %293, i32 1 %297 = insertelement <4 x i32> %296, i32 %294, i32 2 %298 = insertelement <4 x i32> %297, i32 0, i32 3 %299 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %298, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %300 = extractelement <4 x float> %299, i32 0 %301 = fadd float %286, 0xBF40000000000000 %302 = fadd float %288, 0x3F40000000000000 %303 = fadd float %243, 0.000000e+00 %304 = fadd float %286, 0x3F40000000000000 %305 = fadd float %288, 0xBF40000000000000 %306 = fadd float %243, 0.000000e+00 %307 = fadd float %286, 0xBF40000000000000 %308 = fadd float %288, 0xBF40000000000000 %309 = fadd float %243, 0.000000e+00 %310 = bitcast float %303 to i32 %311 = bitcast float %301 to i32 %312 = bitcast float %302 to i32 %313 = insertelement <4 x i32> undef, i32 %310, i32 0 %314 = insertelement <4 x i32> %313, i32 %311, i32 1 %315 = insertelement <4 x i32> %314, i32 %312, i32 2 %316 = insertelement <4 x i32> %315, i32 0, i32 3 %317 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %316, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %318 = extractelement <4 x float> %317, i32 0 %319 = bitcast float %306 to i32 %320 = bitcast float %304 to i32 %321 = bitcast float %305 to i32 %322 = insertelement <4 x i32> undef, i32 %319, i32 0 %323 = insertelement <4 x i32> %322, i32 %320, i32 1 %324 = insertelement <4 x i32> %323, i32 %321, i32 2 %325 = insertelement <4 x i32> %324, i32 0, i32 3 %326 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %325, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %327 = extractelement <4 x float> %326, i32 0 %328 = bitcast float %309 to i32 %329 = bitcast float %307 to i32 %330 = bitcast float %308 to i32 %331 = insertelement <4 x i32> undef, i32 %328, i32 0 %332 = insertelement <4 x i32> %331, i32 %329, i32 1 %333 = insertelement <4 x i32> %332, i32 %330, i32 2 %334 = insertelement <4 x i32> %333, i32 0, i32 3 %335 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %334, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %336 = extractelement <4 x float> %335, i32 0 %337 = fmul float %300, 6.250000e-02 %338 = fmul float %318, 6.250000e-02 %339 = fadd float %337, %338 %340 = fmul float %327, 6.250000e-02 %341 = fadd float %339, %340 %342 = fmul float %336, 6.250000e-02 %343 = fadd float %341, %342 %344 = fadd float %286, 0x3F40000000000000 %345 = fadd float %288, 0.000000e+00 %346 = fadd float %243, 0.000000e+00 %347 = bitcast float %346 to i32 %348 = bitcast float %344 to i32 %349 = bitcast float %345 to i32 %350 = insertelement <4 x i32> undef, i32 %347, i32 0 %351 = insertelement <4 x i32> %350, i32 %348, i32 1 %352 = insertelement <4 x i32> %351, i32 %349, i32 2 %353 = insertelement <4 x i32> %352, i32 0, i32 3 %354 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %353, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %355 = extractelement <4 x float> %354, i32 0 %356 = fadd float %286, 0xBF40000000000000 %357 = fadd float %288, 0.000000e+00 %358 = fadd float %243, 0.000000e+00 %359 = bitcast float %358 to i32 %360 = bitcast float %356 to i32 %361 = bitcast float %357 to i32 %362 = insertelement <4 x i32> undef, i32 %359, i32 0 %363 = insertelement <4 x i32> %362, i32 %360, i32 1 %364 = insertelement <4 x i32> %363, i32 %361, i32 2 %365 = insertelement <4 x i32> %364, i32 0, i32 3 %366 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %365, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %367 = extractelement <4 x float> %366, i32 0 %368 = extractelement <4 x float> %366, i32 3 %369 = fadd float %286, 0.000000e+00 %370 = fadd float %288, 0xBF40000000000000 %371 = fadd float %243, 0.000000e+00 %372 = bitcast float %371 to i32 %373 = bitcast float %369 to i32 %374 = bitcast float %370 to i32 %375 = insertelement <4 x i32> undef, i32 %372, i32 0 %376 = insertelement <4 x i32> %375, i32 %373, i32 1 %377 = insertelement <4 x i32> %376, i32 %374, i32 2 %378 = insertelement <4 x i32> %377, i32 0, i32 3 %379 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %378, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %380 = extractelement <4 x float> %379, i32 0 %381 = fadd float %286, 0.000000e+00 %382 = fadd float %288, 0x3F40000000000000 %383 = fadd float %243, 0.000000e+00 %384 = bitcast float %383 to i32 %385 = bitcast float %381 to i32 %386 = bitcast float %382 to i32 %387 = insertelement <4 x i32> undef, i32 %384, i32 0 %388 = insertelement <4 x i32> %387, i32 %385, i32 1 %389 = insertelement <4 x i32> %388, i32 %386, i32 2 %390 = insertelement <4 x i32> %389, i32 0, i32 3 %391 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %390, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %392 = extractelement <4 x float> %391, i32 0 %393 = fmul float %355, 1.250000e-01 %394 = fmul float %367, 1.250000e-01 %395 = fadd float %393, %394 %396 = fmul float %380, 1.250000e-01 %397 = fadd float %395, %396 %398 = fmul float %392, 1.250000e-01 %399 = fadd float %397, %398 %400 = bitcast float %243 to i32 %401 = bitcast float %286 to i32 %402 = bitcast float %288 to i32 %403 = insertelement <4 x i32> undef, i32 %400, i32 0 %404 = insertelement <4 x i32> %403, i32 %401, i32 1 %405 = insertelement <4 x i32> %404, i32 %402, i32 2 %406 = insertelement <4 x i32> %405, i32 0, i32 3 %407 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %406, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %408 = extractelement <4 x float> %407, i32 0 %409 = fadd float %399, %343 %410 = fmul float %408, 2.500000e-01 %411 = fadd float %410, %409 %412 = fcmp olt float %256, 1.000000e+00 br i1 %412, label %IF111, label %ENDIF110 ENDIF: ; preds = %main_body, %ENDIF110 %temp8.0 = phi float [ %656, %ENDIF110 ], [ 1.000000e+00, %main_body ] %temp31.0 = phi float [ %temp31.1, %ENDIF110 ], [ undef, %main_body ] %413 = fmul float %117, %temp8.0 %414 = fadd float %413, %127 %415 = fmul float %118, %temp8.0 %416 = fadd float %415, %128 %417 = fmul float %119, %temp8.0 %418 = fadd float %417, %129 %419 = bitcast float %98 to i32 %420 = icmp eq i32 %419, 0 br i1 %420, label %ENDIF224, label %IF225 IF111: ; preds = %IF %421 = fadd float %temp16.1, 0.000000e+00 %422 = fadd float %temp16.1, -1.000000e+00 %423 = fadd float %temp16.1, -2.000000e+00 %424 = call float @llvm.fabs.f32(float %421) %425 = fcmp ole float %424, -0.000000e+00 %.233 = select i1 %425, float %59, float 0.000000e+00 %426 = call float @llvm.fabs.f32(float %421) %427 = fcmp ole float %426, -0.000000e+00 %temp40.5 = select i1 %427, float %60, float 0.000000e+00 %428 = call float @llvm.fabs.f32(float %421) %429 = fcmp ole float %428, -0.000000e+00 %.234 = select i1 %429, float %61, float 0.000000e+00 %430 = call float @llvm.fabs.f32(float %421) %431 = fcmp ole float %430, -0.000000e+00 %temp40.7 = select i1 %431, float %62, float 0.000000e+00 %432 = call float @llvm.fabs.f32(float %421) %433 = fcmp ole float %432, -0.000000e+00 %.235 = select i1 %433, float %63, float 0.000000e+00 %434 = call float @llvm.fabs.f32(float %421) %435 = fcmp ole float %434, -0.000000e+00 %temp40.9 = select i1 %435, float %64, float 0.000000e+00 %436 = call float @llvm.fabs.f32(float %421) %437 = fcmp ole float %436, -0.000000e+00 %.236 = select i1 %437, float %65, float 0.000000e+00 %438 = call float @llvm.fabs.f32(float %421) %439 = fcmp ole float %438, -0.000000e+00 %temp40.11 = select i1 %439, float %66, float 0.000000e+00 %440 = call float @llvm.fabs.f32(float %422) %441 = fcmp ole float %440, -0.000000e+00 %..233 = select i1 %441, float %67, float %.233 %442 = call float @llvm.fabs.f32(float %422) %443 = fcmp ole float %442, -0.000000e+00 %temp40.13 = select i1 %443, float %68, float %temp40.5 %444 = call float @llvm.fabs.f32(float %422) %445 = fcmp ole float %444, -0.000000e+00 %..234 = select i1 %445, float %69, float %.234 %446 = call float @llvm.fabs.f32(float %422) %447 = fcmp ole float %446, -0.000000e+00 %temp40.15 = select i1 %447, float %70, float %temp40.7 %448 = call float @llvm.fabs.f32(float %422) %449 = fcmp ole float %448, -0.000000e+00 %..235 = select i1 %449, float %71, float %.235 %450 = call float @llvm.fabs.f32(float %422) %451 = fcmp ole float %450, -0.000000e+00 %temp40.17 = select i1 %451, float %72, float %temp40.9 %452 = call float @llvm.fabs.f32(float %422) %453 = fcmp ole float %452, -0.000000e+00 %..236 = select i1 %453, float %73, float %.236 %454 = call float @llvm.fabs.f32(float %422) %455 = fcmp ole float %454, -0.000000e+00 %temp40.19 = select i1 %455, float %74, float %temp40.11 %456 = call float @llvm.fabs.f32(float %423) %457 = fcmp ole float %456, -0.000000e+00 %...233 = select i1 %457, float %75, float %..233 %458 = call float @llvm.fabs.f32(float %423) %459 = fcmp ole float %458, -0.000000e+00 %temp40.21 = select i1 %459, float %76, float %temp40.13 %460 = call float @llvm.fabs.f32(float %423) %461 = fcmp ole float %460, -0.000000e+00 %...234 = select i1 %461, float %77, float %..234 %462 = call float @llvm.fabs.f32(float %423) %463 = fcmp ole float %462, -0.000000e+00 %temp40.23 = select i1 %463, float %78, float %temp40.15 %464 = call float @llvm.fabs.f32(float %423) %465 = fcmp ole float %464, -0.000000e+00 %...235 = select i1 %465, float %79, float %..235 %466 = call float @llvm.fabs.f32(float %423) %467 = fcmp ole float %466, -0.000000e+00 %temp40.25 = select i1 %467, float %80, float %temp40.17 %468 = call float @llvm.fabs.f32(float %423) %469 = fcmp ole float %468, -0.000000e+00 %...236 = select i1 %469, float %81, float %..236 %470 = call float @llvm.fabs.f32(float %423) %471 = fcmp ole float %470, -0.000000e+00 %temp40.27 = select i1 %471, float %82, float %temp40.19 %472 = fmul float %168, %...233 %473 = fmul float %169, %temp40.21 %474 = fadd float %472, %473 %475 = fmul float %170, %...234 %476 = fadd float %474, %475 %477 = fmul float %172, %temp40.23 %478 = fadd float %476, %477 %479 = call float @llvm.AMDIL.clamp.(float %478, float 0.000000e+00, float 1.000000e+00) %480 = fmul float %168, %...235 %481 = fmul float %169, %temp40.25 %482 = fadd float %480, %481 %483 = fmul float %170, %...236 %484 = fadd float %482, %483 %485 = fmul float %172, %temp40.27 %486 = fadd float %484, %485 %487 = call float @llvm.AMDIL.clamp.(float %486, float 0.000000e+00, float 1.000000e+00) %488 = call float @llvm.fabs.f32(float %421) %489 = fcmp ole float %488, -0.000000e+00 %.237 = select i1 %489, float %85, float 0.000000e+00 %490 = call float @llvm.fabs.f32(float %421) %491 = fcmp ole float %490, -0.000000e+00 %temp48.1 = select i1 %491, float %86, float 0.000000e+00 %492 = call float @llvm.fabs.f32(float %421) %493 = fcmp ole float %492, -0.000000e+00 %.238 = select i1 %493, float %83, float 0.000000e+00 %494 = call float @llvm.fabs.f32(float %421) %495 = fcmp ole float %494, -0.000000e+00 %temp48.3 = select i1 %495, float %84, float 0.000000e+00 %496 = call float @llvm.fabs.f32(float %422) %497 = fcmp ole float %496, -0.000000e+00 %..237 = select i1 %497, float %89, float %.237 %498 = call float @llvm.fabs.f32(float %422) %499 = fcmp ole float %498, -0.000000e+00 %temp40.29 = select i1 %499, float %90, float %temp48.1 %500 = call float @llvm.fabs.f32(float %422) %501 = fcmp ole float %500, -0.000000e+00 %..238 = select i1 %501, float %87, float %.238 %502 = call float @llvm.fabs.f32(float %422) %503 = fcmp ole float %502, -0.000000e+00 %temp40.31 = select i1 %503, float %88, float %temp48.3 %504 = call float @llvm.fabs.f32(float %423) %505 = fcmp ole float %504, -0.000000e+00 %...237 = select i1 %505, float %93, float %..237 %506 = call float @llvm.fabs.f32(float %423) %507 = fcmp ole float %506, -0.000000e+00 %temp40.33 = select i1 %507, float %94, float %temp40.29 %508 = call float @llvm.fabs.f32(float %423) %509 = fcmp ole float %508, -0.000000e+00 %...238 = select i1 %509, float %91, float %..238 %510 = call float @llvm.fabs.f32(float %423) %511 = fcmp ole float %510, -0.000000e+00 %temp40.35 = select i1 %511, float %92, float %temp40.31 %512 = fmul float %479, %...237 %513 = fadd float %512, %...238 %514 = fmul float %487, %temp40.33 %515 = fadd float %514, %temp40.35 %516 = fadd float %513, 0x3F40000000000000 %517 = fadd float %515, 0x3F40000000000000 %518 = fadd float %243, 0.000000e+00 %519 = bitcast float %518 to i32 %520 = bitcast float %516 to i32 %521 = bitcast float %517 to i32 %522 = insertelement <4 x i32> undef, i32 %519, i32 0 %523 = insertelement <4 x i32> %522, i32 %520, i32 1 %524 = insertelement <4 x i32> %523, i32 %521, i32 2 %525 = insertelement <4 x i32> %524, i32 0, i32 3 %526 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %525, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %527 = extractelement <4 x float> %526, i32 0 %528 = fadd float %513, 0xBF40000000000000 %529 = fadd float %515, 0x3F40000000000000 %530 = fadd float %243, 0.000000e+00 %531 = fadd float %513, 0x3F40000000000000 %532 = fadd float %515, 0xBF40000000000000 %533 = fadd float %243, 0.000000e+00 %534 = fadd float %513, 0xBF40000000000000 %535 = fadd float %515, 0xBF40000000000000 %536 = fadd float %243, 0.000000e+00 %537 = bitcast float %530 to i32 %538 = bitcast float %528 to i32 %539 = bitcast float %529 to i32 %540 = insertelement <4 x i32> undef, i32 %537, i32 0 %541 = insertelement <4 x i32> %540, i32 %538, i32 1 %542 = insertelement <4 x i32> %541, i32 %539, i32 2 %543 = insertelement <4 x i32> %542, i32 0, i32 3 %544 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %543, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %545 = extractelement <4 x float> %544, i32 0 %546 = bitcast float %533 to i32 %547 = bitcast float %531 to i32 %548 = bitcast float %532 to i32 %549 = insertelement <4 x i32> undef, i32 %546, i32 0 %550 = insertelement <4 x i32> %549, i32 %547, i32 1 %551 = insertelement <4 x i32> %550, i32 %548, i32 2 %552 = insertelement <4 x i32> %551, i32 0, i32 3 %553 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %552, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %554 = extractelement <4 x float> %553, i32 0 %555 = bitcast float %536 to i32 %556 = bitcast float %534 to i32 %557 = bitcast float %535 to i32 %558 = insertelement <4 x i32> undef, i32 %555, i32 0 %559 = insertelement <4 x i32> %558, i32 %556, i32 1 %560 = insertelement <4 x i32> %559, i32 %557, i32 2 %561 = insertelement <4 x i32> %560, i32 0, i32 3 %562 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %561, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %563 = extractelement <4 x float> %562, i32 0 %564 = fmul float %527, 6.250000e-02 %565 = fmul float %545, 6.250000e-02 %566 = fadd float %564, %565 %567 = fmul float %554, 6.250000e-02 %568 = fadd float %566, %567 %569 = fmul float %563, 6.250000e-02 %570 = fadd float %568, %569 %571 = fadd float %513, 0x3F40000000000000 %572 = fadd float %515, 0.000000e+00 %573 = fadd float %243, 0.000000e+00 %574 = bitcast float %573 to i32 %575 = bitcast float %571 to i32 %576 = bitcast float %572 to i32 %577 = insertelement <4 x i32> undef, i32 %574, i32 0 %578 = insertelement <4 x i32> %577, i32 %575, i32 1 %579 = insertelement <4 x i32> %578, i32 %576, i32 2 %580 = insertelement <4 x i32> %579, i32 0, i32 3 %581 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %580, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %582 = extractelement <4 x float> %581, i32 0 %583 = fadd float %513, 0xBF40000000000000 %584 = fadd float %515, 0.000000e+00 %585 = fadd float %243, 0.000000e+00 %586 = bitcast float %585 to i32 %587 = bitcast float %583 to i32 %588 = bitcast float %584 to i32 %589 = insertelement <4 x i32> undef, i32 %586, i32 0 %590 = insertelement <4 x i32> %589, i32 %587, i32 1 %591 = insertelement <4 x i32> %590, i32 %588, i32 2 %592 = insertelement <4 x i32> %591, i32 0, i32 3 %593 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %592, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %594 = extractelement <4 x float> %593, i32 0 %595 = fadd float %513, 0.000000e+00 %596 = fadd float %515, 0xBF40000000000000 %597 = fadd float %243, 0.000000e+00 %598 = bitcast float %597 to i32 %599 = bitcast float %595 to i32 %600 = bitcast float %596 to i32 %601 = insertelement <4 x i32> undef, i32 %598, i32 0 %602 = insertelement <4 x i32> %601, i32 %599, i32 1 %603 = insertelement <4 x i32> %602, i32 %600, i32 2 %604 = insertelement <4 x i32> %603, i32 0, i32 3 %605 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %604, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %606 = extractelement <4 x float> %605, i32 0 %607 = fadd float %513, 0.000000e+00 %608 = fadd float %515, 0x3F40000000000000 %609 = fadd float %243, 0.000000e+00 %610 = bitcast float %609 to i32 %611 = bitcast float %607 to i32 %612 = bitcast float %608 to i32 %613 = insertelement <4 x i32> undef, i32 %610, i32 0 %614 = insertelement <4 x i32> %613, i32 %611, i32 1 %615 = insertelement <4 x i32> %614, i32 %612, i32 2 %616 = insertelement <4 x i32> %615, i32 0, i32 3 %617 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %616, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %618 = extractelement <4 x float> %617, i32 0 %619 = fmul float %582, 1.250000e-01 %620 = fmul float %594, 1.250000e-01 %621 = fadd float %619, %620 %622 = fmul float %606, 1.250000e-01 %623 = fadd float %621, %622 %624 = fmul float %618, 1.250000e-01 %625 = fadd float %623, %624 %626 = bitcast float %243 to i32 %627 = bitcast float %513 to i32 %628 = bitcast float %515 to i32 %629 = insertelement <4 x i32> undef, i32 %626, i32 0 %630 = insertelement <4 x i32> %629, i32 %627, i32 1 %631 = insertelement <4 x i32> %630, i32 %628, i32 2 %632 = insertelement <4 x i32> %631, i32 0, i32 3 %633 = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> %632, <8 x i32> %112, <4 x i32> %114, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %634 = extractelement <4 x float> %633, i32 0 %635 = fadd float %625, %570 %636 = fmul float %634, 2.500000e-01 %637 = fadd float %636, %635 %638 = fcmp oge float %423, 0.000000e+00 %.239 = select i1 %638, float 1.000000e+00, float %637 %639 = fsub float 1.000000e+00, %256 %640 = fmul float %411, %256 %641 = fmul float %.239, %639 %642 = fadd float %640, %641 br label %ENDIF110 ENDIF110: ; preds = %IF, %IF111 %temp5.0 = phi float [ %642, %IF111 ], [ %411, %IF ] %temp31.1 = phi float [ 0.000000e+00, %IF111 ], [ %368, %IF ] %643 = fsub float %131, %95 %644 = fsub float %132, %96 %645 = fsub float %133, %97 %646 = fmul float %643, %643 %647 = fmul float %644, %644 %648 = fadd float %647, %646 %649 = fmul float %645, %645 %650 = fadd float %648, %649 %651 = fmul float %650, %58 %652 = fadd float %651, %57 %653 = call float @llvm.AMDIL.clamp.(float %652, float 0.000000e+00, float 1.000000e+00) %654 = fsub float 1.000000e+00, %653 %655 = fmul float %temp5.0, %654 %656 = fadd float %653, %655 br label %ENDIF IF225: ; preds = %ENDIF %657 = fmul float %414, 0x3FCB333340000000 %658 = fmul float %416, 0x3FE6E48E80000000 %659 = fadd float %658, %657 %660 = fmul float %418, 0x3FB2752540000000 %661 = fadd float %659, %660 %662 = fdiv float 1.000000e+00, %661 %663 = fmul float %662, %120 %664 = fsub float 1.000000e+00, %temp8.0 %665 = fmul float %664, %663 %666 = fsub float 1.000000e+00, %665 %667 = fmul float %418, %666 %668 = fmul float %416, %666 %669 = fmul float %414, %666 %670 = fmul float %666, 5.000000e-01 %671 = fadd float %670, 5.000000e-01 %672 = fsub float 1.000000e+00, %671 %673 = fmul float %669, %671 %674 = fmul float %667, %672 %675 = fadd float %673, %674 %676 = fsub float 1.000000e+00, %671 %677 = fmul float %668, %671 %678 = fmul float %668, %676 %679 = fadd float %677, %678 %680 = fsub float 1.000000e+00, %671 %681 = fmul float %667, %671 %682 = fmul float %669, %680 %683 = fadd float %681, %682 br label %ENDIF224 ENDIF224: ; preds = %ENDIF, %IF225 %temp5.1 = phi float [ %675, %IF225 ], [ %414, %ENDIF ] %temp6.0 = phi float [ %679, %IF225 ], [ %416, %ENDIF ] %temp7.0 = phi float [ %683, %IF225 ], [ %418, %ENDIF ] %684 = fadd float %143, -1.000000e+00 %685 = fmul float %45, %684 %686 = fadd float %685, 1.000000e+00 %687 = fadd float %143, %35 %688 = fadd float %28, -1.000000e+00 %689 = fadd float %29, -1.000000e+00 %690 = fadd float %30, -1.000000e+00 %691 = call float @llvm.AMDIL.clamp.(float %687, float 0.000000e+00, float 1.000000e+00) %692 = fmul float %691, %688 %693 = fadd float %692, 1.000000e+00 %694 = fmul float %691, %689 %695 = fadd float %694, 1.000000e+00 %696 = fmul float %691, %690 %697 = fadd float %696, 1.000000e+00 %698 = fmul float %temp5.1, %693 %699 = fmul float %temp6.0, %695 %700 = fmul float %temp7.0, %697 %701 = fmul float %686, %31 %702 = fmul float %701, %130 %703 = fsub float %702, %701 %704 = fmul float %37, %703 %705 = fadd float %704, %701 %706 = fmul float %124, %121 %707 = fmul float %125, %122 %708 = fadd float %707, %706 %709 = fmul float %126, %123 %710 = fadd float %708, %709 %711 = fadd float %710, %710 %712 = fmul float %124, %124 %713 = fmul float %125, %125 %714 = fadd float %713, %712 %715 = fmul float %126, %126 %716 = fadd float %714, %715 %717 = fmul float %716, %121 %718 = fmul float %716, %122 %719 = fmul float %716, %123 %720 = fmul float %711, %124 %721 = fsub float %720, %717 %722 = fmul float %711, %125 %723 = fsub float %722, %718 %724 = fmul float %711, %126 %725 = fsub float %724, %719 %726 = insertelement <4 x float> undef, float %721, i32 0 %727 = insertelement <4 x float> %726, float %723, i32 1 %728 = insertelement <4 x float> %727, float %725, i32 2 %729 = insertelement <4 x float> %728, float %temp31.0, i32 3 %730 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %729) %731 = extractelement <4 x float> %730, i32 0 %732 = extractelement <4 x float> %730, i32 1 %733 = extractelement <4 x float> %730, i32 2 %734 = extractelement <4 x float> %730, i32 3 %735 = call float @llvm.fabs.f32(float %733) %736 = fdiv float 1.000000e+00, %735 %737 = fmul float %731, %736 %738 = fadd float %737, 1.500000e+00 %739 = fmul float %732, %736 %740 = fadd float %739, 1.500000e+00 %741 = bitcast float %740 to i32 %742 = bitcast float %738 to i32 %743 = bitcast float %734 to i32 %744 = insertelement <4 x i32> undef, i32 %741, i32 0 %745 = insertelement <4 x i32> %744, i32 %742, i32 1 %746 = insertelement <4 x i32> %745, i32 %743, i32 2 %747 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %746, <8 x i32> %104, <4 x i32> %106, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %748 = extractelement <4 x float> %747, i32 0 %749 = extractelement <4 x float> %747, i32 1 %750 = extractelement <4 x float> %747, i32 2 %751 = fmul float %748, %54 %752 = fmul float %749, %54 %753 = fmul float %750, %54 %754 = fmul float %149, %751 %755 = fmul float %149, %752 %756 = fmul float %149, %753 %757 = fmul float %754, %25 %758 = fmul float %755, %26 %759 = fmul float %756, %27 %760 = fmul float %temp5.1, %693 %761 = fsub float %760, %40 %762 = fmul float %temp6.0, %695 %763 = fsub float %762, %40 %764 = fmul float %temp7.0, %697 %765 = fsub float %764, %40 %766 = fmul float %761, %41 %767 = fmul float %763, %41 %768 = fmul float %765, %41 %769 = call float @llvm.AMDIL.clamp.(float %766, float 0.000000e+00, float 1.000000e+00) %770 = call float @llvm.AMDIL.clamp.(float %767, float 0.000000e+00, float 1.000000e+00) %771 = call float @llvm.AMDIL.clamp.(float %768, float 0.000000e+00, float 1.000000e+00) %772 = fmul float %757, %769 %773 = fsub float %772, %757 %774 = fmul float %758, %770 %775 = fsub float %774, %758 %776 = fmul float %759, %771 %777 = fsub float %776, %759 %778 = fmul float %39, %773 %779 = fadd float %778, %757 %780 = fmul float %39, %775 %781 = fadd float %780, %758 %782 = fmul float %39, %777 %783 = fadd float %782, %759 %784 = fmul float %779, %779 %785 = fsub float %784, %779 %786 = fmul float %781, %781 %787 = fsub float %786, %781 %788 = fmul float %783, %783 %789 = fsub float %788, %783 %790 = fmul float %38, %785 %791 = fadd float %790, %779 %792 = fmul float %38, %787 %793 = fadd float %792, %781 %794 = fmul float %38, %789 %795 = fadd float %794, %783 %796 = fmul float %791, 0x3FD322D0E0000000 %797 = fmul float %793, 0x3FE2C8B440000000 %798 = fadd float %797, %796 %799 = fmul float %795, 0x3FBD2F1AA0000000 %800 = fadd float %798, %799 %801 = fsub float 1.000000e+00, %32 %802 = fmul float %791, %32 %803 = fmul float %800, %801 %804 = fadd float %802, %803 %805 = fsub float 1.000000e+00, %33 %806 = fmul float %793, %33 %807 = fmul float %800, %805 %808 = fadd float %806, %807 %809 = fsub float 1.000000e+00, %34 %810 = fmul float %795, %34 %811 = fmul float %800, %809 %812 = fadd float %810, %811 %813 = fmul float %140, %698 %814 = fadd float %813, %804 %815 = fmul float %141, %699 %816 = fadd float %815, %808 %817 = fmul float %142, %700 %818 = fadd float %817, %812 %819 = fsub float %42, %131 %820 = fsub float %43, %132 %821 = fsub float %44, %133 %822 = fmul float %819, %819 %823 = fmul float %820, %820 %824 = fadd float %823, %822 %825 = fmul float %821, %821 %826 = fadd float %824, %825 %827 = call float @llvm.sqrt.f32(float %826) %828 = fmul float %827, %48 %829 = fadd float %828, %46 %830 = call float @llvm.AMDIL.clamp.(float %829, float 0.000000e+00, float 1.000000e+00) %831 = call float @llvm.minnum.f32(float %830, float %47) %832 = call float @llvm.fabs.f32(float %36) %833 = fmul float %814, %53 %834 = fmul float %816, %53 %835 = fmul float %818, %53 %836 = fmul float %52, %134 %837 = fcmp ole float %832, -0.000000e+00 %.240 = select i1 %837, float %705, float %836 %838 = fmul float %831, %831 %839 = fmul float %53, %814 %840 = fsub float %49, %839 %841 = fmul float %53, %816 %842 = fsub float %50, %841 %843 = fmul float %53, %818 %844 = fsub float %51, %843 %845 = fmul float %838, %840 %846 = fadd float %845, %833 %847 = fmul float %838, %842 %848 = fadd float %847, %834 %849 = fmul float %838, %844 %850 = fadd float %849, %835 %851 = call i32 @llvm.SI.packf16(float %846, float %848) %852 = bitcast i32 %851 to float %853 = call i32 @llvm.SI.packf16(float %850, float %.240) %854 = bitcast i32 %853 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %852, float %854, float %852, float %854) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} crash_20151212041947_1.dmp[10518]: Uploading dump (out-of-process) /tmp/dumps/crash_20151212041947_1.dmp /mnt/multimedia2/Games/Steam/SteamApps/common/Counter-Strike Global Offensive/csgo.sh: Zeile 57: 9966 Speicherzugriffsfehler ${DEBUGGER} "${GAMEROOT}"/${GAMEEXE} "$@" crash_20151212041947_1.dmp[10518]: Finished uploading minidump (out-of-process): success = yes crash_20151212041947_1.dmp[10518]: response: Discarded=1 crash_20151212041947_1.dmp[10518]: file ''/tmp/dumps/crash_20151212041947_1.dmp'', upload yes: ''Discarded=1''