diff --git a/base/partr.jl b/base/partr.jl index c5bb6603d53af..8ec81321bfac0 100644 --- a/base/partr.jl +++ b/base/partr.jl @@ -68,6 +68,8 @@ function multiq_size(tpid::Int8) heap_c = UInt32(2) heap_p = UInt32(length(tpheaps)) + ccall("multiq_size", probecall, Cvoid, (Int8,), tpid) + if heap_c * nt <= heap_p return heap_p end diff --git a/src/Makefile b/src/Makefile index 6b914cd53da5b..7dbd865d68559 100644 --- a/src/Makefile +++ b/src/Makefile @@ -46,7 +46,8 @@ SRCS := \ simplevector runtime_intrinsics precompile jloptions \ threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \ jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \ - crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall + crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall \ + probes RT_LLVMLINK := CG_LLVMLINK := diff --git a/src/ccall.cpp b/src/ccall.cpp index ed3992a78091c..9d3e77a9636bc 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -11,6 +11,7 @@ STATISTIC(PLTThunks, "Number of PLT Thunks emitted"); STATISTIC(PLT, "Number of direct PLT entries emitted"); STATISTIC(EmittedCGlobals, "Number of C globals emitted"); STATISTIC(EmittedLLVMCalls, "Number of llvmcall intrinsics emitted"); +STATISTIC(EmittedProbeCalls, "Number of probecall intrinsics emitted"); #define _CCALL_STAT(name) jl_transformed_ccall__##name #define CCALL_STAT(name) _CCALL_STAT(name) @@ -568,7 +569,7 @@ typedef struct { } native_sym_arg_t; // --- parse :sym or (:sym, :lib) argument into address info --- -static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_value_t *arg, const char *fname, bool llvmcall) +static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_value_t *arg, const char *fname, bool llvmcall, bool probecall) { Value *&jl_ptr = out.jl_ptr; void (*&fptr)(void) = out.fptr; @@ -620,7 +621,7 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va if (f_name != NULL) { // just symbol, default to JuliaDLHandle // will look in process symbol table - if (!llvmcall) { + if (!llvmcall && !probecall) { void *symaddr; std::string iname("i"); iname += f_name; @@ -695,7 +696,7 @@ static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t narg Type *lrt = getSizeTy(ctx.builder.getContext()); assert(lrt == julia_type_to_llvm(ctx, rt)); - interpret_symbol_arg(ctx, sym, args[1], "cglobal", false); + interpret_symbol_arg(ctx, sym, args[1], "cglobal", false, false); if (sym.jl_ptr != NULL) { res = ctx.builder.CreateBitCast(sym.jl_ptr, lrt); @@ -1041,6 +1042,7 @@ class function_sig_t { std::string err_msg; CallingConv::ID cc; // calling convention ABI bool llvmcall; + bool probecall; jl_svec_t *at; // svec of julia argument types jl_value_t *rt; // julia return type jl_unionall_t *unionall_env; // UnionAll environment for `at` and `rt` @@ -1048,9 +1050,9 @@ class function_sig_t { size_t nreqargs; // number of required arguments in ccall function definition jl_codegen_params_t *ctx; - function_sig_t(const char *fname, Type *lrt, jl_value_t *rt, bool retboxed, jl_svec_t *at, jl_unionall_t *unionall_env, size_t nreqargs, CallingConv::ID cc, bool llvmcall, jl_codegen_params_t *ctx) + function_sig_t(const char *fname, Type *lrt, jl_value_t *rt, bool retboxed, jl_svec_t *at, jl_unionall_t *unionall_env, size_t nreqargs, CallingConv::ID cc, bool llvmcall, bool probecall, jl_codegen_params_t *ctx) : lrt(lrt), retboxed(retboxed), - prt(NULL), sret(0), cc(cc), llvmcall(llvmcall), + prt(NULL), sret(0), cc(cc), llvmcall(llvmcall), probecall(probecall), at(at), rt(rt), unionall_env(unionall_env), nccallargs(jl_svec_len(at)), nreqargs(nreqargs), ctx(ctx) @@ -1210,26 +1212,29 @@ std::string generate_func_sig(const char *fname) } }; -static std::pair convert_cconv(jl_sym_t *lhd) +static std::tuple convert_cconv(jl_sym_t *lhd) { // check for calling convention specifier if (lhd == jl_symbol("stdcall")) { - return std::make_pair(CallingConv::X86_StdCall, false); + return std::make_tuple(CallingConv::X86_StdCall, false, false); } else if (lhd == jl_symbol("cdecl") || lhd == jl_symbol("ccall")) { // `ccall` calling convention is a placeholder for when there isn't one provided // it is not by itself a valid calling convention name to be specified in the surface // syntax. - return std::make_pair(CallingConv::C, false); + return std::make_tuple(CallingConv::C, false, false); } else if (lhd == jl_symbol("fastcall")) { - return std::make_pair(CallingConv::X86_FastCall, false); + return std::make_tuple(CallingConv::X86_FastCall, false, false); } else if (lhd == jl_symbol("thiscall")) { - return std::make_pair(CallingConv::X86_ThisCall, false); + return std::make_tuple(CallingConv::X86_ThisCall, false, false); } else if (lhd == jl_symbol("llvmcall")) { - return std::make_pair(CallingConv::C, true); + return std::make_tuple(CallingConv::C, true, false); + } + else if (lhd == jl_symbol("probecall")) { + return std::make_tuple(CallingConv::C, false, true); } jl_errorf("ccall: invalid calling convention %s", jl_symbol_name(lhd)); } @@ -1344,9 +1349,10 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) CallingConv::ID cc = CallingConv::C; bool llvmcall = false; - std::tie(cc, llvmcall) = convert_cconv(cc_sym); + bool probecall = false; + std::tie(cc, llvmcall, probecall) = convert_cconv(cc_sym); - interpret_symbol_arg(ctx, symarg, args[1], "ccall", llvmcall); + interpret_symbol_arg(ctx, symarg, args[1], "ccall", llvmcall, probecall); Value *&jl_ptr = symarg.jl_ptr; void (*&fptr)(void) = symarg.fptr; const char *&f_name = symarg.f_name; @@ -1452,7 +1458,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) rt = jl_ensure_rooted(ctx, rt); function_sig_t sig("ccall", lrt, rt, retboxed, (jl_svec_t*)at, unionall, nreqargs, - cc, llvmcall, &ctx.emission_context); + cc, llvmcall, probecall, &ctx.emission_context); for (size_t i = 0; i < nccallargs; i++) { jl_value_t *tti = jl_svecref(at, i); if (jl_is_abstract_ref_type(tti)) { @@ -1885,6 +1891,8 @@ jl_cgval_t function_sig_t::emit_a_ccall( return jl_cgval_t(); } + assert(!(llvmcall && probecall)); + FunctionType *functype = this->functype(ctx.builder.getContext()); Value **argvals = (Value**) alloca((nccallargs + sret) * sizeof(Value*)); @@ -2018,6 +2026,173 @@ jl_cgval_t function_sig_t::emit_a_ccall( } } } + else if (probecall) { + ++EmittedProbeCalls; + if (symarg.jl_ptr != NULL) { + emit_error(ctx, "probecall doesn't support dynamic pointers"); + return jl_cgval_t(); + } + else if (symarg.fptr != NULL) { + emit_error(ctx, "probecall doesn't support static pointers"); + return jl_cgval_t(); + } + else if (symarg.f_lib != NULL) { + emit_error(ctx, "probecall doesn't support dynamic libraries"); + return jl_cgval_t(); + } + else { + assert(symarg.f_name != NULL); + std::string fn_name = "jlprobe_"; + fn_name += symarg.f_name; + std::string fn_name_unique = fn_name + std::to_string(globalUniqueGeneratedNames++); + std::string probe_name = fn_name + "_probe"; + std::string sema_name = fn_name + "_semaphore"; + + Type *T_void = getVoidTy(ctx.builder.getContext()); + ArrayRef T_probe_args = makeArrayRef(fargt); + FunctionType *FT_call_probe = FunctionType::get(T_void, T_probe_args, false); + PointerType *T_pcall_probe = FT_call_probe->getPointerTo(); + + // Generate probe or locate existing + jl_probe_spec_t *spec = jl_probe_register(symarg.f_name); + + Module *probemod = jl_Module; + + // TODO: always_inline + llvmf = Function::Create(FT_call_probe, GlobalValue::InternalLinkage, + fn_name_unique, probemod); + Function *F_llvmf = dyn_cast(llvmf); + assert(F_llvmf); + F_llvmf->addFnAttr(Attribute::AlwaysInline); + BasicBlock *probe_entry_bb = BasicBlock::Create(ctx.builder.getContext(), "probe_entry", F_llvmf); + BasicBlock *sema_loaded_bb = BasicBlock::Create(ctx.builder.getContext(), "semaphore_loaded", F_llvmf); + BasicBlock *probe_loaded_bb = BasicBlock::Create(ctx.builder.getContext(), "probe_loaded", F_llvmf); + BasicBlock *probe_done_bb = BasicBlock::Create(ctx.builder.getContext(), "probe_done", F_llvmf); + IRBuilder<> irbuilder(probe_entry_bb); + + // Generate the probe slot + //jl_value_t *probe_init; + void **probe_init; + if (!spec->probe_addr) { + //probe_init = jl_eval_string("Base.RefValue{Ptr{Cvoid}}(0)"); + probe_init = (void **)calloc(1, sizeof(void *)); + spec->probe_addr = (void *)probe_init; + } else { + probe_init = (void **)spec->probe_addr; + } + //Value *probegv = julia_pgv(ctx, probe_name.c_str(), (void *)probe_init); + //Value *probegv = literal_pointer_val(ctx, (jl_value_t *)probe_init); + Type *T_i64 = getInt64Ty(irbuilder.getContext()); + Value *probegv = new GlobalVariable(*jl_Module, T_i64, false, + GlobalVariable::LinkOnceODRLinkage, + ConstantInt::get(T_i64, (uint64_t)probe_init), probe_name); + Value *probegv_ptr = irbuilder.CreateBitCast(probegv, T_pcall_probe->getPointerTo()); + LoadInst *probe_val = irbuilder.CreateAlignedLoad(T_pcall_probe, probegv_ptr, Align(sizeof(void*))); + probe_val->setOrdering(AtomicOrdering::Unordered); + probe_val->setVolatile(true); + + // Generate the semaphore slot + //jl_value_t *sema_init; + int64_t *sema_init; + if (!spec->semaphore_addr) { + //sema_init = jl_eval_string("Base.RefValue{UInt64}(0)"); + sema_init = (int64_t *)calloc(1, sizeof(int64_t)); + spec->semaphore_addr = (void *)sema_init; + } else { + sema_init = (int64_t *)spec->semaphore_addr; + } + //Value *semagv = julia_pgv(ctx, sema_name.c_str(), (void *)sema_init); + //Value *semagv = literal_pointer_val(ctx, (jl_value_t *)sema_init); + Value *semagv = new GlobalVariable(*jl_Module, T_i64, false, + GlobalVariable::LinkOnceODRLinkage, + ConstantInt::get(T_i64, (uint64_t)sema_init), sema_name); + Value *semagv_ptr = irbuilder.CreateBitCast(semagv, T_i64->getPointerTo()); + LoadInst *sema_val = irbuilder.CreateAlignedLoad(T_i64, semagv_ptr, Align(sizeof(void*))); + sema_val->setOrdering(AtomicOrdering::Unordered); + sema_val->setVolatile(true); + + // Check semaphore value is non-zero + irbuilder.CreateCondBr(irbuilder.CreateICmpSGT(sema_val, ConstantInt::get(T_i64, 0)), sema_loaded_bb, probe_done_bb); + + // Increment semaphore + irbuilder.SetInsertPoint(sema_loaded_bb); + irbuilder.CreateAtomicRMW(AtomicRMWInst::Add, semagv_ptr, ConstantInt::get(T_i64, 1), Align(sizeof(void*)), AtomicOrdering::AcquireRelease); + + // Check that probe is non-NULL + Value *probe_val_int = irbuilder.CreatePtrToInt(probe_val, T_i64); + irbuilder.CreateCondBr(irbuilder.CreateICmpEQ(probe_val_int, ConstantInt::get(T_i64, 0)), probe_done_bb, probe_loaded_bb); + + // Call probe + irbuilder.SetInsertPoint(probe_loaded_bb); + assert(!sret); + std::vector probe_args; + for (unsigned i = 0; i < nccallargs; i++) { + probe_args.push_back(&F_llvmf->arg_begin()[i]); + } + irbuilder.CreateCall(FT_call_probe, probe_val, probe_args); + + // Emit STAPSDT note + // Ported from UProbes.jl/src/UProbes.jl + const char *usdt_provider = "jlprobe"; + const char *usdt_name = symarg.f_name; + char *usdt_argstr = (char *)calloc(256, 1); + char *usdt_constr = (char *)calloc(256, 1); + int argstr_pos = 0; + int constr_pos = 0; + for (unsigned int i = 0; i < nccallargs; i++) { + // FIXME: Determine type and signedness + int issigned = 0; + argstr_pos += snprintf(usdt_argstr+argstr_pos, 256-argstr_pos, "%s%ld@$%d%s", issigned ? "-" : "", sizeof(/*T*/int), i, i == nccallargs-1 ? "" : " "); + assert(argstr_pos < 256); + constr_pos += snprintf(usdt_constr+constr_pos, 256-constr_pos, "nor%s", i == nccallargs-1 ? "" : ","); + assert(constr_pos < 256); + } + const char *note = "990: nop\n" + " .pushsection .note.stapsdt,\"?\",\"note\"\n" + " .balign 4\n" + " .4byte 992f-991f, 994f-993f, 3\n" + "991: .asciz \"stapsdt\"\n" + "992: .balign 4\n" + "993: .%dbyte 990b\n" + " .%dbyte _.stapsdt.base\n" + " .%dbyte %s_%s_semaphore\n" + " .asciz \"%s\"\n" + " .asciz \"%s\"\n" + " .asciz \"%s\"\n" + "994: .balign 4\n" + " .popsection\n"; + char *usdt_note = (char *)malloc(1024); + assert(snprintf(usdt_note, 1024, note, + sizeof(void *), + sizeof(void *), + sizeof(void *), usdt_provider, usdt_name, + usdt_provider, + usdt_name, + usdt_argstr) < 1024); + //Function *F_note = Function::Create(FT_call_probe, GlobalValue::InternalLinkage, fn_name + "_note", probemod); + //BasicBlock *note_entry_bb = BasicBlock::Create(ctx.builder.getContext(), "top", F_note); + //IRBuilder<> note_irbuilder(note_entry_bb); + auto iasm = InlineAsm::get(FT_call_probe, usdt_note, usdt_constr, true); + irbuilder.CreateCall(iasm, probe_args); + Value *stapsdt_basegv = new GlobalVariable(*jl_Module, getInt8Ty(irbuilder.getContext()), false, + GlobalVariable::LinkOnceODRLinkage, + Constant::getNullValue(getInt8Ty(irbuilder.getContext())), "_.stapsdt.base"); + dyn_cast(stapsdt_basegv)->setSection(".stapsdt.base"); + + irbuilder.CreateBr(probe_done_bb); + + // Decrement semaphore and return + irbuilder.SetInsertPoint(probe_done_bb); + irbuilder.CreateAtomicRMW(AtomicRMWInst::Sub, semagv_ptr, ConstantInt::get(T_i64, 1), Align(sizeof(void*)), AtomicOrdering::AcquireRelease); + irbuilder.CreateRetVoid(); + + // Load probe and semaphore addresses into object + if (!jl_ExecutionEngine->getGlobalValueAddress(probe_name)) { + //jl_ExecutionEngine->addGlobalMapping(probe_name, (JITTargetAddress)spec->probe_addr); + //jl_ExecutionEngine->addGlobalMapping(sema_name, (JITTargetAddress)spec->semaphore_addr); + } + } + } else if (symarg.jl_ptr != NULL) { ++LiteralCCalls; null_pointer_check(ctx, symarg.jl_ptr); diff --git a/src/codegen.cpp b/src/codegen.cpp index 1caafd8e64330..388593393567b 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -6140,7 +6140,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con if (rt != declrt && rt != (jl_value_t*)jl_any_type) rt = jl_ensure_rooted(ctx, rt); - function_sig_t sig("cfunction", lrt, rt, retboxed, argt, unionall_env, false, CallingConv::C, false, &ctx.emission_context); + function_sig_t sig("cfunction", lrt, rt, retboxed, argt, unionall_env, false, CallingConv::C, false, false, &ctx.emission_context); assert(sig.fargt.size() + sig.sret == sig.fargt_sig.size()); if (!sig.err_msg.empty()) { emit_error(ctx, sig.err_msg); @@ -6280,7 +6280,8 @@ const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysi jl_value_t *err; { // scope block for sig function_sig_t sig("cfunction", lcrt, crt, toboxed, - argtypes, NULL, false, CallingConv::C, false, ¶ms); + argtypes, NULL, false, CallingConv::C, false, false, + ¶ms); if (sig.err_msg.empty()) { size_t world = jl_atomic_load_acquire(&jl_world_counter); size_t min_valid = 0; diff --git a/src/init.c b/src/init.c index 18e4d41eb6d79..8455e919258cd 100644 --- a/src/init.c +++ b/src/init.c @@ -736,6 +736,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) jl_prep_sanitizers(); void *stack_lo, *stack_hi; jl_init_stack_limits(1, &stack_lo, &stack_hi); + jl_init_probes(); jl_libjulia_internal_handle = jl_load_dynamic_library(NULL, JL_RTLD_DEFAULT, 1); #ifdef _OS_WINDOWS_ diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 8ea77bfe12be3..eb4f3be2d1bcc 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -379,6 +379,8 @@ XX(jl_prepend_cwd) \ XX(jl_printf) \ XX(jl_print_backtrace) \ + XX(jl_probe_lookup) \ + XX(jl_probe_register) \ XX(jl_process_events) \ XX(jl_profile_clear_data) \ XX(jl_profile_delay_nsec) \ diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index 4a0407e019432..e92a607529894 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -2613,7 +2613,7 @@ ((eq? f 'ccall) (if (not (length> e 4)) (error "too few arguments to ccall")) (let* ((cconv (cadddr e)) - (have-cconv (memq cconv '(cdecl stdcall fastcall thiscall llvmcall))) + (have-cconv (memq cconv '(cdecl stdcall fastcall thiscall llvmcall probecall))) (after-cconv (if have-cconv (cddddr e) (cdddr e))) (name (caddr e)) (RT (car after-cconv)) @@ -4444,7 +4444,7 @@ f(x) = yt(x) ;; from the current function. (define (compile e break-labels value tail) (if (or (not (pair? e)) (memq (car e) '(null true false ssavalue quote inert top core copyast the_exception $ - globalref outerref thismodule cdecl stdcall fastcall thiscall llvmcall))) + globalref outerref thismodule cdecl stdcall fastcall thiscall llvmcall probecall))) (let ((e1 (if (and arg-map (symbol? e)) (get arg-map e e) e))) diff --git a/src/julia.h b/src/julia.h index 1395df4501329..ab1d652a89194 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2241,6 +2241,16 @@ typedef struct { } jl_cgparams_t; extern JL_DLLEXPORT int jl_default_debug_info_kind; +// probes interface +typedef struct { + const char *name; + void *probe_addr; + void *semaphore_addr; +} jl_probe_spec_t; +JL_DLLEXPORT void jl_init_probes(void); +JL_DLLEXPORT jl_probe_spec_t *jl_probe_register(const char *); +JL_DLLEXPORT jl_probe_spec_t *jl_probe_lookup(const char *); + #ifdef __cplusplus } #endif diff --git a/src/probes.c b/src/probes.c new file mode 100644 index 0000000000000..323f9e57ceab3 --- /dev/null +++ b/src/probes.c @@ -0,0 +1,46 @@ +#include +#include "julia.h" +#include "julia_internal.h" + +// FIXME: htable +arraylist_t *jl_probes; +jl_mutex_t jl_probes_mutex; + +void jl_init_probes(void) { + jl_probes = arraylist_new((arraylist_t *)malloc(sizeof(arraylist_t)), 1); + JL_MUTEX_INIT(&jl_probes_mutex); +} + +JL_DLLEXPORT jl_probe_spec_t *jl_probe_register(const char *name) { + JL_LOCK(&jl_probes_mutex); + + jl_probe_spec_t *spec; + for (int i = 0; i < jl_probes->len; i++) { + spec = (jl_probe_spec_t *)jl_probes->items[i]; + if (strcmp(spec->name, name) == 0) { + JL_UNLOCK(&jl_probes_mutex); + return spec; + } + } + spec = (jl_probe_spec_t *)calloc(1, sizeof(jl_probe_spec_t)); + spec->name = strdup(name); + //spec->probe_addr = calloc(1, sizeof(void*)); + //spec->semaphore_addr = calloc(1, sizeof(int64_t)); + arraylist_push(jl_probes, (void *)spec); + + JL_UNLOCK(&jl_probes_mutex); + return spec; +} + +JL_DLLEXPORT jl_probe_spec_t *jl_probe_lookup(const char *name) { + JL_LOCK(&jl_probes_mutex); + for (int i = 0; i < jl_probes->len; i++) { + jl_probe_spec_t *spec = (jl_probe_spec_t *)jl_probes->items[i]; + if (strcmp(spec->name, name) == 0) { + JL_UNLOCK(&jl_probes_mutex); + return spec; + } + } + JL_UNLOCK(&jl_probes_mutex); + return (jl_probe_spec_t *)NULL; +}