The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"

/* define DBG_SUB to cause a warning on each subroutine entry. */
/*#define DBG_SUB 1      */

/* define DBG_TIMER to cause a warning when the timer is turned on and off. */
/*#define DBG_TIMER 1  */

#ifdef DEBUGGING
#define ASSERT(x) assert(x)
#else
#define ASSERT(x)
#endif

static CV *
db_get_cv(pTHX_ SV *sv)
{
	CV *cv;

	if (SvIOK(sv)) {			/* if (PERLDB_SUB_NN) { */
	    cv = INT2PTR(CV*,SvIVX(sv));
	} else {
	    if (SvPOK(sv)) {
		STRLEN len;
		const char *const name = SvPV(sv, len);
		cv = get_cvn_flags(name, len, GV_ADD | SvUTF8(sv));
	    } else if (SvROK(sv)) {
		cv = (CV*)SvRV(sv);
	    } else {
		croak("DProf: don't know what subroutine to profile");
	    }
	}
	return cv;
}

#ifdef DBG_SUB
#  define DBG_SUB_NOTIFY(A) dprof_dbg_sub_notify(aTHX_ A)
void
dprof_dbg_sub_notify(pTHX_ SV *Sub) {
    CV * const cv = db_get_cv(aTHX_ Sub);
    GV * const gv = cv ? CvGV(cv) : NULL;
    if (cv && gv) {
	warn("XS DBsub(%s::%s)\n",
	     ((GvSTASH(gv) && HvNAME_get(GvSTASH(gv))) ?
	      HvNAME_get(GvSTASH(gv)) : "(null)"),
	     GvNAME(gv));
    } else {
	warn("XS DBsub(unknown) at %x", Sub);
    }
}
#else
#  define DBG_SUB_NOTIFY(A)  /* nothing */
#endif


#ifdef DBG_TIMER
#  define DBG_TIMER_NOTIFY(A) warn(A)
#else
#  define DBG_TIMER_NOTIFY(A)  /* nothing */
#endif

/* HZ == clock ticks per second */
#ifdef VMS
#  define HZ ((I32)CLK_TCK)
#  define DPROF_HZ HZ
#  include <starlet.h>  /* prototype for sys$gettim() */
#  include <lib$routines.h>
#  define Times(ptr) (dprof_times(aTHX_ ptr))
#  define NEEDS_DPROF_TIMES
#else
#  ifdef BSDish
#    define Times(ptr) (dprof_times(aTHX_ ptr))
#    define NEEDS_DPROF_TIMES
#    define HZ 1000000
#    define DPROF_HZ HZ
#  else
#    ifndef HZ
#      ifdef CLK_TCK
#        define HZ ((I32)CLK_TCK)
#      else
#        define HZ 60
#      endif
#    endif
#    ifdef OS2				/* times() has significant overhead */
#      define Times(ptr) (dprof_times(aTHX_ ptr))
#      define NEEDS_DPROF_TIMES
#      define INCL_DOSPROFILE
#      define INCL_DOSERRORS
#      include <os2.h>
#      define toLongLong(arg) (*(long long*)&(arg))
#      define DPROF_HZ g_dprof_ticks
#    else
#      define Times(ptr) (times(ptr))
#      define DPROF_HZ HZ
#    endif 
#  endif
#endif

XS(XS_Devel__DProf_END);        /* used by prof_mark() */

/* Everything is built on times(2).  See its manpage for a description
 * of the timings.
 */

union prof_any {
        clock_t tms_utime;  /* cpu time spent in user space */
        clock_t tms_stime;  /* cpu time spent in system */
        clock_t realtime;   /* elapsed real time, in ticks */
        const char *name;
        U32 id;
        opcode ptype;
};

typedef union prof_any PROFANY;

typedef struct {
    U32		dprof_ticks;
    const char*	out_file_name;	/* output file (defaults to tmon.out) */
    PerlIO*	fp;		/* pointer to tmon.out file */
    Off_t	TIMES_LOCATION;	/* Where in the file to store the time totals */
    int		SAVE_STACK;	/* How much data to buffer until end of run */
    int		prof_pid;	/* pid of profiled process */
    struct tms	prof_start;
    struct tms	prof_end;
    clock_t	rprof_start;	/* elapsed real time ticks */
    clock_t	rprof_end;
    clock_t	wprof_u;
    clock_t	wprof_s;
    clock_t	wprof_r;
    clock_t	otms_utime;
    clock_t	otms_stime;
    clock_t	orealtime;
    PROFANY*	profstack;
    int		profstack_max;
    int		profstack_ix;
    HV*		cv_hash;	/* cache of CV to identifier mappings */
    SV*		key_hash;	/* key for cv_hash */
    U32		total;
    U32		lastid;
    U32		default_perldb;
    UV		depth;
#ifdef OS2
    ULONG	frequ;
    long long	start_cnt;
#endif
#ifdef PERL_IMPLICIT_CONTEXT
    PerlInterpreter *my_perl;
#endif
} prof_state_t;

prof_state_t g_prof_state;

#define g_dprof_ticks		g_prof_state.dprof_ticks
#define g_out_file_name		g_prof_state.out_file_name
#define g_fp			g_prof_state.fp
#define g_TIMES_LOCATION	g_prof_state.TIMES_LOCATION
#define g_SAVE_STACK		g_prof_state.SAVE_STACK
#define g_prof_pid		g_prof_state.prof_pid
#define g_prof_start		g_prof_state.prof_start
#define g_prof_end		g_prof_state.prof_end
#define g_rprof_start		g_prof_state.rprof_start
#define g_rprof_end		g_prof_state.rprof_end
#define g_wprof_u		g_prof_state.wprof_u
#define g_wprof_s		g_prof_state.wprof_s
#define g_wprof_r		g_prof_state.wprof_r
#define g_otms_utime		g_prof_state.otms_utime
#define g_otms_stime		g_prof_state.otms_stime
#define g_orealtime		g_prof_state.orealtime
#define g_profstack		g_prof_state.profstack
#define g_profstack_max		g_prof_state.profstack_max
#define g_profstack_ix		g_prof_state.profstack_ix
#define g_cv_hash		g_prof_state.cv_hash
#define g_key_hash		g_prof_state.key_hash
#define g_total			g_prof_state.total
#define g_lastid		g_prof_state.lastid
#define g_default_perldb	g_prof_state.default_perldb
#define g_depth			g_prof_state.depth
#ifdef PERL_IMPLICIT_CONTEXT
#  define g_THX			g_prof_state.my_perl
#endif
#ifdef OS2
#  define g_frequ		g_prof_state.frequ
#  define g_start_cnt		g_prof_state.start_cnt
#endif

#ifdef NEEDS_DPROF_TIMES
static clock_t
dprof_times(pTHX_ struct tms *t)
{
#ifdef OS2
    ULONG rc;
    QWORD cnt;
    
    if (!g_frequ) {
	if (CheckOSError(DosTmrQueryFreq(&g_frequ)))
	    croak("DosTmrQueryFreq: %s", SvPV_nolen(perl_get_sv("!",GV_ADD)));
	else
	    g_frequ = g_frequ/DPROF_HZ;	/* count per tick */
	if (CheckOSError(DosTmrQueryTime(&cnt)))
	    croak("DosTmrQueryTime: %s",
		  SvPV_nolen_const(perl_get_sv("!",GV_ADD)));
	g_start_cnt = toLongLong(cnt);
    }

    if (CheckOSError(DosTmrQueryTime(&cnt)))
	    croak("DosTmrQueryTime: %s", SvPV_nolen(perl_get_sv("!",GV_ADD)));
    t->tms_stime = 0;
    return (t->tms_utime = (toLongLong(cnt) - g_start_cnt)/g_frequ);
#else		/* !OS2 */
#  ifdef VMS
    clock_t retval;
    /* Get wall time and convert to 10 ms intervals to
     * produce the return value dprof expects */
#    if defined(__DECC) && defined (__ALPHA)
#      include <ints.h>
    uint64 vmstime;
    _ckvmssts(sys$gettim(&vmstime));
    vmstime /= 100000;
    retval = vmstime & 0x7fffffff;
#    else
    /* (Older hw or ccs don't have an atomic 64-bit type, so we
     * juggle 32-bit ints (and a float) to produce a time_t result
     * with minimal loss of information.) */
    long int vmstime[2],remainder,divisor = 100000;
    _ckvmssts(sys$gettim((unsigned long int *)vmstime));
    vmstime[1] &= 0x7fff;  /* prevent overflow in EDIV */
    _ckvmssts(lib$ediv(&divisor,vmstime,(long int *)&retval,&remainder));
#    endif
    /* Fill in the struct tms using the CRTL routine . . .*/
    times((tbuffer_t *)t);
    return (clock_t) retval;
#  else		/* !VMS && !OS2 */
#    ifdef BSDish
#      include <sys/resource.h>
    struct rusage ru;
    struct timeval tv;
    /* Measure offset from start time to avoid overflow  */
    static struct timeval tv0 = { 0, 0 };

    if (!tv0.tv_sec)
        if (gettimeofday(&tv0, NULL) < 0)
            croak("gettimeofday: %s", SvPV_nolen_const(perl_get_sv("!",GV_ADD)));
    
    if (getrusage(0, &ru) < 0)
        croak("getrusage: %s", SvPV_nolen_const(perl_get_sv("!",GV_ADD)));

    if (gettimeofday(&tv, NULL) < 0)
        croak("gettimeofday: %s", SvPV_nolen_const(perl_get_sv("!",GV_ADD)));

    t->tms_stime = DPROF_HZ * ru.ru_stime.tv_sec + ru.ru_stime.tv_usec;
    t->tms_utime = DPROF_HZ * ru.ru_utime.tv_sec + ru.ru_utime.tv_usec;

    if (tv.tv_usec < tv0.tv_usec)
        tv.tv_sec--, tv.tv_usec += DPROF_HZ;

    return DPROF_HZ * (tv.tv_sec - tv0.tv_sec) + tv.tv_usec - tv0.tv_usec;
#    else  /* !VMS && !OS2 && !BSD! */
    return times(t);
#    endif
#  endif
#endif
}
#endif

static void
prof_dumpa(pTHX_ opcode ptype, U32 id)
{
    if (ptype == OP_LEAVESUB) {
	PerlIO_printf(g_fp,"- %"UVxf"\n", (UV)id);
    }
    else if(ptype == OP_ENTERSUB) {
	PerlIO_printf(g_fp,"+ %"UVxf"\n", (UV)id);
    }
    else if(ptype == OP_GOTO) {
	PerlIO_printf(g_fp,"* %"UVxf"\n", (UV)id);
    }
    else if(ptype == OP_DIE) {
	PerlIO_printf(g_fp,"/ %"UVxf"\n", (UV)id);
    }
    else {
	PerlIO_printf(g_fp,"Profiler unknown prof code %d\n", ptype);
    }
}   

static void
prof_dumps(pTHX_ U32 id, const char *pname, const char *gname)
{
    PerlIO_printf(g_fp,"& %"UVxf" %s %s\n", (UV)id, pname, gname);
}   

static void
prof_dumpt(pTHX_ long tms_utime, long tms_stime, long realtime)
{
    PerlIO_printf(g_fp,"@ %ld %ld %ld\n", tms_utime, tms_stime, realtime);
}   

static void
prof_dump_until(pTHX_ long ix)
{
    long base = 0;
    struct tms t1, t2;
    clock_t realtime2;

    const clock_t realtime1 = Times(&t1);

    while (base < ix) {
	const opcode ptype = g_profstack[base++].ptype;
	if (ptype == OP_TIME) {
	    const long tms_utime = g_profstack[base++].tms_utime;
	    const long tms_stime = g_profstack[base++].tms_stime;
	    const long realtime = g_profstack[base++].realtime;

	    prof_dumpt(aTHX_ tms_utime, tms_stime, realtime);
	}
	else if (ptype == OP_GV) {
	    const U32 id = g_profstack[base++].id;
	    const char * const pname = g_profstack[base++].name;
	    const char * const gname = g_profstack[base++].name;

	    prof_dumps(aTHX_ id, pname, gname);
	}
	else {
	    const U32 id = g_profstack[base++].id;
	    prof_dumpa(aTHX_ ptype, id);
	}
    }
    PerlIO_flush(g_fp);
    realtime2 = Times(&t2);
    if (realtime2 != realtime1 || t1.tms_utime != t2.tms_utime
	|| t1.tms_stime != t2.tms_stime) {
	g_wprof_r += realtime2 - realtime1;
	g_wprof_u += t2.tms_utime - t1.tms_utime;
	g_wprof_s += t2.tms_stime - t1.tms_stime;

	PerlIO_printf(g_fp,"+ & Devel::DProf::write\n");
	PerlIO_printf(g_fp,"@ %"IVdf" %"IVdf" %"IVdf"\n", 
		      /* The (IV) casts are one possibility:
		       * the Painfully Correct Way would be to
		       * have Clock_t_f. */
		      (IV)(t2.tms_utime - t1.tms_utime),
		      (IV)(t2.tms_stime - t1.tms_stime), 
		      (IV)(realtime2 - realtime1));
	PerlIO_printf(g_fp,"- & Devel::DProf::write\n");
	g_otms_utime = t2.tms_utime;
	g_otms_stime = t2.tms_stime;
	g_orealtime = realtime2;
	PerlIO_flush(g_fp);
    }
}

static void
set_cv_key(pTHX_ CV *cv, const char *pname, const char *gname)
{
	SvGROW(g_key_hash, sizeof(CV**) + strlen(pname) + strlen(gname) + 3);
	sv_setpvn(g_key_hash, (char*)&cv, sizeof(CV**));
	sv_catpv(g_key_hash, pname);
	sv_catpv(g_key_hash, "::");
	sv_catpv(g_key_hash, gname);
}

static void
prof_mark(pTHX_ opcode ptype)
{
    struct tms t;
    clock_t realtime, rdelta, udelta, sdelta;
    U32 id;
    SV * const Sub = GvSV(PL_DBsub);	/* name of current sub */

    if (g_SAVE_STACK) {
	if (g_profstack_ix + 10 > g_profstack_max) {
		g_profstack_max = g_profstack_max * 3 / 2;
		Renew(g_profstack, g_profstack_max, PROFANY);
	}
    }

    realtime = Times(&t);
    rdelta = realtime - g_orealtime;
    udelta = t.tms_utime - g_otms_utime;
    sdelta = t.tms_stime - g_otms_stime;
    if (rdelta || udelta || sdelta) {
	if (g_SAVE_STACK) {
	    ASSERT(g_profstack_ix + 4 <= g_profstack_max);
	    g_profstack[g_profstack_ix++].ptype = OP_TIME;
	    g_profstack[g_profstack_ix++].tms_utime = udelta;
	    g_profstack[g_profstack_ix++].tms_stime = sdelta;
	    g_profstack[g_profstack_ix++].realtime = rdelta;
	}
	else { /* Write it to disk now so's not to eat up core */
	    if (g_prof_pid == (int)getpid()) {
		prof_dumpt(aTHX_ udelta, sdelta, rdelta);
		PerlIO_flush(g_fp);
	    }
	}
	g_orealtime = realtime;
	g_otms_stime = t.tms_stime;
	g_otms_utime = t.tms_utime;
    }

    {
	SV **svp;
	char *gname, *pname;

	CV * const cv = db_get_cv(aTHX_ Sub);
	GV * const gv = CvGV(cv);
	if (isGV_with_GP(gv)) {
	    pname = GvSTASH(gv) ? HvNAME_get(GvSTASH(gv)) : NULL;
	    pname = pname ? pname : (char *) "(null)";
	    gname = GvNAME(gv);
	} else {
	    gname = pname = (char *) "(null)";
	}

	set_cv_key(aTHX_ cv, pname, gname);
	svp = hv_fetch(g_cv_hash, SvPVX_const(g_key_hash), SvCUR(g_key_hash), TRUE);
	if (!SvOK(*svp)) {
	    sv_setiv(*svp, id = ++g_lastid);
	    if (CvXSUB(cv) == XS_Devel__DProf_END)
		return;
	    if (g_SAVE_STACK) { /* Store it for later recording  -JH */
		ASSERT(g_profstack_ix + 4 <= g_profstack_max);
		g_profstack[g_profstack_ix++].ptype = OP_GV;
		g_profstack[g_profstack_ix++].id = id;
		g_profstack[g_profstack_ix++].name = pname;
		g_profstack[g_profstack_ix++].name = gname;
	    }
	    else { /* Write it to disk now so's not to eat up core */
		/* Only record the parent's info */
		if (g_prof_pid == (int)getpid()) {
		    prof_dumps(aTHX_ id, pname, gname);
		    PerlIO_flush(g_fp);
		}
		else
		    PL_perldb = 0;		/* Do not debug the kid. */
	    }
	}
	else {
	    id = SvIV(*svp);
	}
    }

    g_total++;
    if (g_SAVE_STACK) { /* Store it for later recording  -JH */
	ASSERT(g_profstack_ix + 2 <= g_profstack_max);
	g_profstack[g_profstack_ix++].ptype = ptype;
	g_profstack[g_profstack_ix++].id = id;

	/* Only record the parent's info */
	if (g_SAVE_STACK < g_profstack_ix) {
	    if (g_prof_pid == (int)getpid())
		prof_dump_until(aTHX_ g_profstack_ix);
	    else
		PL_perldb = 0;		/* Do not debug the kid. */
	    g_profstack_ix = 0;
	}
    }
    else { /* Write it to disk now so's not to eat up core */

	/* Only record the parent's info */
	if (g_prof_pid == (int)getpid()) {
	    prof_dumpa(aTHX_ ptype, id);
	    PerlIO_flush(g_fp);
	}
	else
	    PL_perldb = 0;		/* Do not debug the kid. */
    }
}

/* Counts overhead of prof_mark and extra XS call. */
static void
test_time(pTHX_ clock_t *r, clock_t *u, clock_t *s)
{
    CV * const cv = get_cvs("Devel::DProf::NONESUCH_noxs", 0);
    HV * const oldstash = PL_curstash;
    struct tms t1, t2;
    const U32 ototal = g_total;
    const U32 ostack = g_SAVE_STACK;
    const U32 operldb = PL_perldb;
    int k = 0;

    clock_t realtime1 = Times(&t1);
    clock_t realtime2 = 0;

    g_SAVE_STACK = 1000000;

    while (k < 2) {
	int i = 0;
	    /* Disable debugging of perl_call_sv on second pass: */
	PL_curstash = (k == 0 ? PL_defstash : PL_debstash);
	PL_perldb = g_default_perldb;
	while (++i <= 100) {
	    int j = 0;
	    g_profstack_ix = 0;		/* Do not let the stack grow */
	    while (++j <= 100) {
/* 		prof_mark(aTHX_ OP_ENTERSUB); */

		PUSHMARK(PL_stack_sp);
		perl_call_sv((SV*)cv, G_SCALAR);
		PL_stack_sp--;
/* 		prof_mark(aTHX_ OP_LEAVESUB); */
	    }
	}
	PL_curstash = oldstash;
	if (k == 0) {			/* Put time with debugging */
	    realtime2 = Times(&t2);
	    *r = realtime2 - realtime1;
	    *u = t2.tms_utime - t1.tms_utime;
	    *s = t2.tms_stime - t1.tms_stime;
	}
	else {				/* Subtract time without debug */
	    realtime1 = Times(&t1);
	    *r -= realtime1 - realtime2;
	    *u -= t1.tms_utime - t2.tms_utime;
	    *s -= t1.tms_stime - t2.tms_stime;	    
	}
	k++;
    }
    g_total = ototal;
    g_SAVE_STACK = ostack;
    PL_perldb = operldb;
}

static void
prof_recordheader(pTHX)
{
    clock_t r, u, s;

    /* g_fp is opened in the BOOT section */
    PerlIO_printf(g_fp, "#fOrTyTwO\n");
    PerlIO_printf(g_fp, "$hz=%"IVdf";\n", (IV)DPROF_HZ);
    PerlIO_printf(g_fp, "$XS_VERSION='DProf %s';\n", XS_VERSION);
    PerlIO_printf(g_fp, "# All values are given in HZ\n");
    test_time(aTHX_ &r, &u, &s);
    PerlIO_printf(g_fp,
		  "$over_utime=%"IVdf"; $over_stime=%"IVdf"; $over_rtime=%"IVdf";\n",
		  /* The (IV) casts are one possibility:
		   * the Painfully Correct Way would be to
		   * have Clock_t_f. */
		  (IV)u, (IV)s, (IV)r);
    PerlIO_printf(g_fp, "$over_tests=10000;\n");

    g_TIMES_LOCATION = PerlIO_tell(g_fp);

    /* Pad with whitespace. */
    /* This should be enough even for very large numbers. */
    PerlIO_printf(g_fp, "%*s\n", 240 , "");

    PerlIO_printf(g_fp, "\n");
    PerlIO_printf(g_fp, "PART2\n");

    PerlIO_flush(g_fp);
}

static void
prof_record(pTHX)
{
    /* g_fp is opened in the BOOT section */

    /* Now that we know the runtimes, fill them in at the recorded
       location -JH */

    if (g_SAVE_STACK) {
	prof_dump_until(aTHX_ g_profstack_ix);
    }
    PerlIO_seek(g_fp, g_TIMES_LOCATION, SEEK_SET);
    /* Write into reserved 240 bytes: */
    PerlIO_printf(g_fp,
		  "$rrun_utime=%"IVdf"; $rrun_stime=%"IVdf"; $rrun_rtime=%"IVdf";",
		  /* The (IV) casts are one possibility:
		   * the Painfully Correct Way would be to
		   * have Clock_t_f. */
		  (IV)(g_prof_end.tms_utime-g_prof_start.tms_utime-g_wprof_u),
		  (IV)(g_prof_end.tms_stime-g_prof_start.tms_stime-g_wprof_s),
		  (IV)(g_rprof_end-g_rprof_start-g_wprof_r));
    PerlIO_printf(g_fp, "\n$total_marks=%"IVdf, (IV)g_total);
    
    PerlIO_close(g_fp);
}

#define NONESUCH()

static void
check_depth(pTHX_ void *foo)
{
    const U32 need_depth = PTR2UV(foo);
    if (need_depth != g_depth) {
	if (need_depth > g_depth) {
	    warn("garbled call depth when profiling");
	}
	else {
	    IV marks = g_depth - need_depth;

/* 	    warn("Check_depth: got %d, expected %d\n", g_depth, need_depth); */
	    while (marks--) {
		prof_mark(aTHX_ OP_DIE);
	    }
	    g_depth = need_depth;
	}
    }
}

#define for_real
#ifdef for_real

XS(XS_DB_sub);
XS(XS_DB_sub)
{
    dMARK;
    dORIGMARK;
    SV * const Sub = GvSV(PL_DBsub);		/* name of current sub */

#ifdef PERL_IMPLICIT_CONTEXT
    /* profile only the interpreter that loaded us */
    if (g_THX != aTHX) {
        PUSHMARK(ORIGMARK);
        perl_call_sv((SV*)db_get_cv(aTHX_ Sub), GIMME_V | G_NODEBUG);
    }
    else
#endif
    {
	HV * const oldstash = PL_curstash;
	const I32 old_scopestack_ix = PL_scopestack_ix;
	const I32 old_cxstack_ix = cxstack_ix;

        DBG_SUB_NOTIFY(Sub);

	SAVEDESTRUCTOR_X(check_depth, INT2PTR(void*,g_depth));
	g_depth++;

        prof_mark(aTHX_ OP_ENTERSUB);
        PUSHMARK(ORIGMARK);
        perl_call_sv((SV*)db_get_cv(aTHX_ Sub), GIMME_V | G_NODEBUG);
        PL_curstash = oldstash;

	/* Make sure we are on the same context and scope as before the call
	 * to the sub. If the called sub was exited via a goto, next or
	 * last then this will try to croak(), however perl may still crash
	 * with a segfault. */
	if (PL_scopestack_ix != old_scopestack_ix || cxstack_ix != old_cxstack_ix)
	    croak("panic: Devel::DProf inconsistent subroutine return");

        prof_mark(aTHX_ OP_LEAVESUB);
	g_depth--;
    }
    return;
}

XS(XS_DB_goto);
XS(XS_DB_goto)
{
#ifdef PERL_IMPLICIT_CONTEXT
    if (g_THX == aTHX)
#endif
    {
        prof_mark(aTHX_ OP_GOTO);
        return;
    }
}

#endif /* for_real */

#ifdef testing

        MODULE = Devel::DProf           PACKAGE = DB

        void
        sub(...)
	PPCODE:
	    {
                dORIGMARK;
		HV * const oldstash = PL_curstash;
		SV * const Sub = GvSV(PL_DBsub);	/* name of current sub */
                /* SP -= items;  added by xsubpp */
                DBG_SUB_NOTIFY(Sub);

                sv_setiv(PL_DBsingle, 0);	/* disable DB single-stepping */

                prof_mark(aTHX_ OP_ENTERSUB);
                PUSHMARK(ORIGMARK);

                PL_curstash = PL_debstash;	/* To disable debugging of perl_call_sv */
                perl_call_sv(Sub, GIMME_V);
                PL_curstash = oldstash;

                prof_mark(aTHX_ OP_LEAVESUB);
                SPAGAIN;
                /* PUTBACK;  added by xsubpp */
	    }

#endif /* testing */

MODULE = Devel::DProf           PACKAGE = Devel::DProf

void
END()
PPCODE:
    {
        if (PL_DBsub) {
	    /* maybe the process forked--we want only
	     * the parent's profile.
	     */
	    if (
#ifdef PERL_IMPLICIT_CONTEXT
		g_THX == aTHX &&
#endif
		g_prof_pid == (int)getpid())
	    {
		g_rprof_end = Times(&g_prof_end);
		DBG_TIMER_NOTIFY("Profiler timer is off.\n");
		prof_record(aTHX);
	    }
	}
    }

void
NONESUCH()

BOOT:
    {
	g_TIMES_LOCATION = 42;
	g_SAVE_STACK = 1<<14;
    	g_profstack_max = 128;
#ifdef PERL_IMPLICIT_CONTEXT
	g_THX = aTHX;
#endif

        /* Before we go anywhere make sure we were invoked
         * properly, else we'll dump core.
         */
        if (!PL_DBsub)
	    croak("DProf: run perl with -d to use DProf.\n");

        /* When we hook up the XS DB::sub we'll be redefining
         * the DB::sub from the PM file.  Turn off warnings
         * while we do this.
         */
        {
	    const bool warn_tmp = PL_dowarn;
	    PL_dowarn = 0;
	    newXS("DB::sub", XS_DB_sub, file);
	    newXS("DB::goto", XS_DB_goto, file);
	    PL_dowarn = warn_tmp;
        }

        sv_setiv(PL_DBsingle, 0);	/* disable DB single-stepping */

	{
	    const char *buffer = getenv("PERL_DPROF_BUFFER");

	    if (buffer) {
		g_SAVE_STACK = atoi(buffer);
	    }

	    buffer = getenv("PERL_DPROF_TICKS");

	    if (buffer) {
		g_dprof_ticks = atoi(buffer); /* Used under OS/2 only */
	    }
	    else {
		g_dprof_ticks = HZ;
	    }

	    buffer = getenv("PERL_DPROF_OUT_FILE_NAME");
	    g_out_file_name = savepv(buffer ? buffer : "tmon.out");
	}

        if ((g_fp = PerlIO_open(g_out_file_name, "w")) == NULL)
	    croak("DProf: unable to write '%s', errno = %d\n",
		  g_out_file_name, errno);

	g_default_perldb = PERLDBf_NONAME | PERLDBf_SUB | PERLDBf_GOTO;
	g_cv_hash = newHV();
	g_key_hash = newSV(256);
        g_prof_pid = (int)getpid();

	Newx(g_profstack, g_profstack_max, PROFANY);
        prof_recordheader(aTHX);
        DBG_TIMER_NOTIFY("Profiler timer is on.\n");
	g_orealtime = g_rprof_start = Times(&g_prof_start);
	g_otms_utime = g_prof_start.tms_utime;
	g_otms_stime = g_prof_start.tms_stime;
	PL_perldb = g_default_perldb;
    }