Index: include/llvm/CodeGen/AsmPrinter.h =================================================================== --- include/llvm/CodeGen/AsmPrinter.h +++ include/llvm/CodeGen/AsmPrinter.h @@ -197,6 +197,7 @@ FUNCTION_ENTER = 0, FUNCTION_EXIT = 1, TAIL_CALL = 2, + CUSTOM_LOG = 3, }; // The table will contain these structs that point to the sled, the function Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -759,6 +759,11 @@ def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty], [IntrReadMem, IntrArgMemOnly]>; +// Xray intrinsics +//===----------------------------------------------------------------------===// +def int_xray_customlog : Intrinsic<[], [llvm_ptr_ty]>; +//===----------------------------------------------------------------------===// + //===----------------------------------------------------------------------===// // Target-specific intrinsics //===----------------------------------------------------------------------===// Index: include/llvm/Target/Target.td =================================================================== --- include/llvm/Target/Target.td +++ include/llvm/Target/Target.td @@ -998,6 +998,13 @@ let hasSideEffects = 1; let isReturn = 1; } +def PATCHABLE_LOG_CALL : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins unknown:$entry); + let AsmString = "# XRay Custom Log."; + let usesCustomInserter = 1; + let hasSideEffects = 1; +} // Generic opcodes used in GlobalISel. include "llvm/Target/GenericOpcodes.td" Index: include/llvm/Target/TargetOpcodes.def =================================================================== --- include/llvm/Target/TargetOpcodes.def +++ include/llvm/Target/TargetOpcodes.def @@ -177,6 +177,10 @@ /// PATCHABLE_RET which specifically only works for return instructions. HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL) +/// Wraps a logging call and its arguments with nop sleds. At runtime, this can be +/// patched to insert instrumentation instructions. +HANDLE_TARGET_OPCODE(PATCHABLE_LOG_CALL) + /// The following generic opcodes are not supposed to appear after ISel. /// This is something we might want to relax, but for now, this is convenient /// to produce diagnostics. Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5729,7 +5729,36 @@ setValue(&I, N); return nullptr; } + case Intrinsic::xray_customlog: { + Value *LogEntry = I.getArgOperand(0); + SDValue LogEntryVal = getValue(LogEntry); + assert(LogEntry->getType()->isPointerTy() && + "Log entry type to xray_customlog must be pointer"); + EVT Ty = LogEntryVal.getValueType(); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Chain = getRoot(); + SDValue patchableNode = SDValue( + DAG.getMachineNode(TargetOpcode::PATCHABLE_LOG_CALL, getCurSDLoc(), MVT::Other, MVT::Glue, LogEntryVal, Chain), 0); + dbgs() << "louisrli:"; + dbgs() << LogEntry->getNumUses(); + for (User *U : LogEntry->users()) { + if (Instruction *Inst = dyn_cast(U)) { + errs() << "F is used in instruction:\n"; + errs() << *Inst << "\n"; + } + } + dbgs() << '\n'; + /* + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()) + .setChain(getRoot()); + */ + + setValue(&I, patchableNode); + // DAG.setRoot(getRoot()); + return nullptr; + } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return nullptr; Index: lib/Target/X86/X86AsmPrinter.h =================================================================== --- lib/Target/X86/X86AsmPrinter.h +++ lib/Target/X86/X86AsmPrinter.h @@ -91,6 +91,7 @@ X86MCInstLower &MCIL); void LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); + void LowerPATCHABLE_LOG_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); // Helper function that emits the XRay sleds we've collected for a particular // function. Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -19216,6 +19216,16 @@ Reg = RegInfo->getPtrSizedFrameRegister(MF); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } + + /* + case Intrinsic::xray_customlog: { + SDValue EntryOp = Op.getOperand(0); + const Constant *C = getTargetConstantFromNode(EntryOp); + cast(Op.getNode())->getZExtValue(); + MachineFunction &MF = DAG.getMachineFunction(); + return nullptr; + } + */ } } @@ -25614,6 +25624,10 @@ case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); + + case TargetOpcode::PATCHABLE_LOG_CALL: + // Do nothing here, handle in xray instrumentation pass. + return BB; case X86::LCMPXCHG8B: { const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -1021,6 +1021,46 @@ getSubtargetInfo()); } +void X86AsmPrinter::LowerPATCHABLE_LOG_CALL(const MachineInstr &MI, + X86MCInstLower &MCIL) { + // We want to emit the following pattern: + // + // .p2align 1, ... + // .Lxray_sled_N: + // jmp .tmpN # 2 bytes + // # 4 bytes of noops + // push # 5 bytes + // # 5 bytes of noops + // .tmpN + // + // We need the 9 bytes because at runtime, we'd be patching over the full 16 + // bytes with the following pattern: + // + // mov %r10, // 6 bytes + // push // 5 bytes + // call // 5 bytes + // + auto CurSled = OutContext.createTempSymbol("xray_sled_", true); + OutStreamer->EmitCodeAlignment(2); + OutStreamer->EmitLabel(CurSled); + auto Target = OutContext.createTempSymbol(); + + // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as + // an operand (computed as an offset from the jmp instruction). + // FIXME: Find another less hacky way do force the relative jump. + OutStreamer->EmitBytes("\xeb\x09"); + EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo()); + // Push the register containing the log entry onto the stack. + MCInst Push; + Push.setOpcode(X86::PUSH64r); + Push.addOperand(MCIL.LowerMachineOperand(&MI, MI.getOperand(0)).getValue()); + OutStreamer->EmitInstruction(Push, getSubtargetInfo()); + + EmitNops(*OutStreamer, 5, Subtarget->is64Bit(), getSubtargetInfo()); + OutStreamer->EmitLabel(Target); + recordSled(CurSled, MI, SledKind::CUSTOM_LOG); +} + void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, X86MCInstLower &MCIL) { // We want to emit the following pattern: @@ -1436,6 +1476,9 @@ case TargetOpcode::PATCHABLE_TAIL_CALL: return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); + + case TargetOpcode::PATCHABLE_LOG_CALL: + return LowerPATCHABLE_LOG_CALL(*MI, MCInstLowering); case X86::MORESTACK_RET: EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); Index: test/CodeGen/X86/xray-custom-log.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/xray-custom-log.ll @@ -0,0 +1,30 @@ +; RUN: llc -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @caller() nounwind noinline uwtable { + %logentryptr = alloca i8 + call void @llvm.xray.custom_log(i8* %logentryptr) + ret i32 0 +} + +declare void @llvm.xray.custom_log(i8*) + +define i32 @caller() nounwind noinline uwtable { +; CHECK: .p2align 1, 0x90 +; CHECK-LABEL: Lxray_sled_0: +; CHECK-NEXT: .ascii "\353\t" +; CHECK-NEXT: nopw 512(%rax,%rax) +; CHECK-LABEL: Ltmp1: +; CHECK: .p2align 1, 0x90 +; CHECK-LABEL: Lxray_sled_3: +; CHECK-NEXT: .ascii "\353\t" +; CHECK-NEXT: nopw 512(%rax,%rax) +; CHECK-LABEL: Ltmp2: + %retval = tail call i32 @callee() +; CHECK: jmp callee # TAILCALL + ret i32 %retval +} +; CHECK: .p2align 4, 0x90 +; CHECK-NEXT: .quad .Lxray_synthetic_1 +; CHECK-LABEL: Lxray_synthetic_1: +; CHECK: .quad .Lxray_sled_2 +; CHECK: .quad .Lxray_sled_3