; ModuleID = 'input.cl' source_filename = "input.cl" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn-mesa-mesa3d" ; Function Attrs: nounwind sspstrong define amdgpu_kernel void @PulseWave(float addrspace(1)* nocapture %buffer, i64 %num_harmonics, float %duty_cycle) local_unnamed_addr #0 !kernel_arg_addr_space !1380 !kernel_arg_access_qual !1381 !kernel_arg_type !1382 !kernel_arg_base_type !1383 !kernel_arg_type_qual !1384 { tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !1385, metadata !1391) #3, !dbg !1392 tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !1393, metadata !1391) #3, !dbg !1396 %1 = tail call i32 @llvm.amdgcn.workgroup.id.x() #3, !dbg !1398 %2 = zext i32 %1 to i64 %dispatch_ptr.i1.i = tail call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #3, !dbg !1400 %xy_size_ptr.i2.i = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr.i1.i, i64 4, !dbg !1400 %3 = bitcast i8 addrspace(2)* %xy_size_ptr.i2.i to i32 addrspace(2)*, !dbg !1400 %xy_size.i3.i = load i32, i32 addrspace(2)* %3, align 4, !dbg !1400, !invariant.load !6 %x_size.i.i = and i32 %xy_size.i3.i, 65535, !dbg !1400 %x_size.ext.i.i = zext i32 %x_size.i.i to i64, !dbg !1400 %4 = mul nuw nsw i64 %x_size.ext.i.i, %2, !dbg !1401 tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !1402, metadata !1391) #3, !dbg !1405 %5 = tail call i32 @llvm.amdgcn.workitem.id.x() #3, !dbg !1407, !range !1409 %6 = zext i32 %5 to i64 tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !1410, metadata !1391) #3, !dbg !1414 %7 = tail call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #3, !dbg !1416 %8 = getelementptr inbounds i8, i8 addrspace(2)* %7, i64 4, !dbg !1417 %9 = bitcast i8 addrspace(2)* %8 to i32 addrspace(2)*, !dbg !1417 %10 = load i32, i32 addrspace(2)* %9, align 4, !dbg !1417, !tbaa !1419 %11 = zext i32 %10 to i64, !dbg !1417 %12 = add nuw nsw i64 %11, %6, !dbg !1423 %13 = add nuw nsw i64 %12, %4, !dbg !1424 %arrayidx = getelementptr inbounds float, float addrspace(1)* %buffer, i64 %13 %14 = load float, float addrspace(1)* %arrayidx, align 4, !tbaa !1425 %div = fmul float %duty_cycle, 5.000000e-01 %sub = fsub float %14, %div %mul = fmul float %sub, 2.000000e+00 %mul1 = fmul float %mul, 0x400921FB60000000 %cmp34 = icmp eq i64 %num_harmonics, 0 br i1 %cmp34, label %._crit_edge, label %.lr.ph.preheader .lr.ph.preheader: ; preds = %0 br label %.lr.ph ._crit_edge: ; preds = %_Z3cosf.exit, %0 %sample.0.lcssa = phi float [ %duty_cycle, %0 ], [ %add, %_Z3cosf.exit ] %15 = tail call float @llvm.fmuladd.f32(float %sample.0.lcssa, float 2.000000e+00, float -1.000000e+00) store float %15, float addrspace(1)* %arrayidx, align 4, !tbaa !1425 ret void .lr.ph: ; preds = %.lr.ph.preheader, %_Z3cosf.exit %sample.036 = phi float [ %add, %_Z3cosf.exit ], [ %duty_cycle, %.lr.ph.preheader ] %i.035 = phi i64 [ %inc, %_Z3cosf.exit ], [ 1, %.lr.ph.preheader ] %conv = uitofp i64 %i.035 to float %mul2 = fmul float %conv, 0x400921FB60000000 %div3 = fdiv float 2.000000e+00, %mul2, !fpmath !1427 %mul6 = fmul float %mul2, %duty_cycle tail call void @llvm.dbg.value(metadata float %mul6, i64 0, metadata !1428, metadata !1391) #3, !dbg !1442 %16 = bitcast float %mul6 to i32, !dbg !1443 tail call void @llvm.dbg.value(metadata i32 %16, i64 0, metadata !1433, metadata !1391) #3, !dbg !1444 %17 = and i32 %16, 2147483647, !dbg !1445 tail call void @llvm.dbg.value(metadata i32 %17, i64 0, metadata !1434, metadata !1391) #3, !dbg !1446 %18 = bitcast i32 %17 to float, !dbg !1447 tail call void @llvm.dbg.value(metadata float %18, i64 0, metadata !1435, metadata !1391) #3, !dbg !1448 tail call void @llvm.dbg.value(metadata float %18, i64 0, metadata !1449, metadata !1391) #3, !dbg !1457 %19 = fcmp olt float %18, 0x4160000000000000, !dbg !1459 br i1 %19, label %20, label %48, !dbg !1461 ;