Skip to content

Commit

Permalink
Merge pull request #650 from fjtrujy/gprof
Browse files Browse the repository at this point in the history
Implementing profglue
  • Loading branch information
fjtrujy authored Jul 17, 2024
2 parents 2ef91b7 + b580972 commit bd0ca72
Show file tree
Hide file tree
Showing 8 changed files with 309 additions and 5 deletions.
1 change: 1 addition & 0 deletions .github/workflows/compilation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
# Create symbolink links using relative paths
(cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libcglue.a libcglue.a && cd -)
(cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libpthreadglue.a libpthreadglue.a && cd -)
(cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libprofglue.a libprofglue.a && cd -)
(cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libkernel.a libkernel.a && cd -)
(cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libcdvd.a libcdvd.a && cd -)
Expand Down
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ RUN cd /src && \
# Create symbolink links using relative paths
RUN (cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libcglue.a libcglue.a && cd -)
RUN (cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libpthreadglue.a libpthreadglue.a && cd -)
RUN (cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libprofglue.a libprofglue.a && cd -)
RUN (cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libkernel.a libkernel.a && cd -)
RUN (cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libcdvd.a libcdvd.a && cd -)

Expand Down
4 changes: 2 additions & 2 deletions ee/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
# Licenced under Academic Free License version 2.0
# Review ps2sdk README & LICENSE files for further details.

SUBDIRS = startup erl kernel libcglue libpthreadglue rpc debug \
SUBDIRS = startup erl kernel libcglue libpthreadglue libprofglue rpc debug \
eedebug sbv dma graph math3d \
packet packet2 draw libgs \
libvux font input inputx network iopreboot \
mpeg \
elf-loader elf-loader-nocolour
elf-loader elf-loader-nocolour \

include $(PS2SDKSRC)/Defs.make
include $(PS2SDKSRC)/Rules.make
Expand Down
9 changes: 8 additions & 1 deletion ee/libcglue/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,14 @@ FDMAN_OBJS = \
__fdman_get_dup2_descriptor.o \
__fdman_release_descriptor.o

INIT_OBJS = __libpthreadglue_init.o __libpthreadglue_deinit.o _libcglue_init.o _libcglue_deinit.o _libcglue_args_parse.o
INIT_OBJS = \
__gprof_init.o \
__gprof_cleanup.o \
__libpthreadglue_init.o \
__libpthreadglue_deinit.o \
_libcglue_init.o \
_libcglue_deinit.o \
_libcglue_args_parse.o

SLEEP_OBJS = nanosleep.o

Expand Down
23 changes: 21 additions & 2 deletions ee/libcglue/src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,22 @@ void __locks_deinit();

int chdir(const char *path);

#ifdef F___gprof_init
/* Note: This function is being called from _init and it is overrided when compiling with -pg */
__attribute__((weak))
void __gprof_init() {}
#else
void __gprof_init();
#endif

#ifdef F___gprof_cleanup
/* Note: This function is being called from _exit and it is overrided when compiling with -pg */
__attribute__((weak))
void __gprof_cleanup() {}
#else
void __gprof_cleanup();
#endif

#ifdef F___libpthreadglue_init
/* Note: This function is being called from __libcglue_init.
* It is a weak function because can be override by user program
Expand All @@ -46,8 +62,6 @@ __attribute__((weak))
void __libpthreadglue_deinit()
{
pthread_terminate();
__fdman_deinit();
__locks_deinit();
}
#else
void __libpthreadglue_deinit();
Expand All @@ -68,14 +82,19 @@ void _libcglue_init()

_libcglue_timezone_update();
_libcglue_rtc_update();

__gprof_init();
}
#endif

#ifdef F__libcglue_deinit
__attribute__((weak))
void _libcglue_deinit()
{
__gprof_cleanup();
__libpthreadglue_deinit();
__fdman_deinit();
__locks_deinit();
}
#endif

Expand Down
16 changes: 16 additions & 0 deletions ee/libprofglue/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# _____ ___ ____ ___ ____
# ____| | ____| | | |____|
# | ___| |____ ___| ____| | \ PS2DEV Open Source Project.
#-----------------------------------------------------------------------
# Copyright 2001-2004, ps2dev - http://www.ps2dev.org
# Licenced under Academic Free License version 2.0
# Review ps2sdk README & LICENSE files for further details.

EE_LIB = libprofglue.a

EE_OBJS = prof.o mcount.o

include $(PS2SDKSRC)/Defs.make
include $(PS2SDKSRC)/ee/Rules.lib.make
include $(PS2SDKSRC)/ee/Rules.make
include $(PS2SDKSRC)/ee/Rules.release
39 changes: 39 additions & 0 deletions ee/libprofglue/src/mcount.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
.set noreorder
.set noat

.global _mcount
.ent _mcount

_mcount:

# Generated code already substracts 8 bytes
# We store our ra, at and a0-a3
daddiu $sp, $sp, -56 # Adjust stack pointer for 64-bit registers, 7 registers * 8 bytes each
sd $ra, 0($sp) # store ra
sd $at, 8($sp) # at = ra of caller
sd $a0, 16($sp)
sd $a1, 24($sp)
sd $a2, 32($sp)
sd $a3, 40($sp)

# Call internal C handler
move $a0, $at
move $a1, $ra
jal __mcount
nop

# Restore registers
ld $ra, 0($sp)
ld $at, 8($sp)
ld $a0, 16($sp)
ld $a1, 24($sp)
ld $a2, 32($sp)
ld $a3, 40($sp)
daddiu $sp, $sp, 56 # Adjust stack pointer back
jr $ra
move $ra, $at # restore caller's ra

.end _mcount

.set reorder
.set at
221 changes: 221 additions & 0 deletions ee/libprofglue/src/prof.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
/*
# _____ ___ ____ ___ ____
# ____| | ____| | | |____|
# | ___| |____ ___| ____| | \ PS2DEV Open Source Project.
#-----------------------------------------------------------------------
# Copyright 2001-2004, ps2dev - http://www.ps2dev.org
# Licenced under Academic Free License version 2.0
# Review ps2sdk README & LICENSE files for further details.
*/

#include <stdlib.h>
#include <malloc.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>

#define GMON_PROF_ON 0
#define GMON_PROF_BUSY 1
#define GMON_PROF_ERROR 2
#define GMON_PROF_OFF 3

#define GMONVERSION 0x00051879

#include <kernel.h>
#include <timer_alarm.h>

/** gmon.out file header */
struct gmonhdr
{
int lpc; /* lowest pc address */
int hpc; /* highest pc address */
int ncnt; /* size of samples + size of header */
int version; /* version number */
int profrate; /* profiling clock rate */
int resv[3]; /* reserved */
};

/** frompc -> selfpc graph */
struct rawarc
{
unsigned int frompc;
unsigned int selfpc;
unsigned int count;
};

/** context */
struct gmonparam
{
int state;
unsigned int lowpc;
unsigned int highpc;
unsigned int textsize;
unsigned int hashfraction;

int narcs;
struct rawarc *arcs;

int nsamples;
unsigned int *samples;

int timerId;

unsigned int pc;
};

/// holds context statistics
static struct gmonparam gp;

/// one histogram per four bytes of text space
#define HISTFRACTION 4

/// define sample frequency - 1000 hz = 1ms
#define SAMPLE_FREQ 1000

/// defined by linker
extern int _ftext;
extern int _etext;

/** Internal timer handler
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
static uint64_t timer_handler(int id, uint64_t scheduled_time, uint64_t actual_time, void *arg, void *pc_value)
{
struct gmonparam *current_gp = (struct gmonparam *)arg;

unsigned int frompc = current_gp->pc;

if (current_gp->state == GMON_PROF_ON) {
/* call might come from stack */
if (frompc >= current_gp->lowpc && frompc <= current_gp->highpc) {
int e = (frompc - current_gp->lowpc) / current_gp->hashfraction;
current_gp->samples[e]++;
}
}


current_gp->timerId = iSetTimerAlarm(USec2TimerBusClock(SAMPLE_FREQ), &timer_handler, arg);
return 0;
}

/** Initializes pg library
After calculating the text size, __gprof_initialize() allocates enough
memory to allow fastest access to arc structures, and some more
for sampling statistics. Note that this also installs a timer that
runs at 1000 hert.
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void __gprof_init()
{
memset(&gp, '\0', sizeof(gp));
gp.state = GMON_PROF_ON;
gp.lowpc = (unsigned int)&_ftext;
gp.highpc = (unsigned int)&_etext;
gp.textsize = gp.highpc - gp.lowpc;
gp.hashfraction = HISTFRACTION;

gp.narcs = (gp.textsize + gp.hashfraction - 1) / gp.hashfraction;
gp.arcs = (struct rawarc *)malloc(sizeof(struct rawarc) * gp.narcs);
if (gp.arcs == NULL) {
gp.state = GMON_PROF_ERROR;
return;
}

gp.nsamples = (gp.textsize + gp.hashfraction - 1) / gp.hashfraction;
gp.samples = (unsigned int *)malloc(sizeof(unsigned int) * gp.nsamples);
if (gp.samples == NULL) {
free(gp.arcs);
gp.arcs = 0;
gp.state = GMON_PROF_ERROR;
return;
}

memset((void *)gp.arcs, '\0', gp.narcs * (sizeof(struct rawarc)));
memset((void *)gp.samples, '\0', gp.nsamples * (sizeof(unsigned int)));


gp.state = GMON_PROF_ON;
gp.timerId = SetTimerAlarm(USec2TimerBusClock(SAMPLE_FREQ), &timer_handler, &gp);
}

/** Writes gmon.out dump file and stops profiling
Called from atexit() handler; will dump out a host:gmon.out file
with all collected information.
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void __gprof_cleanup()
{
FILE *fp;
int i;
struct gmonhdr hdr;

if (gp.state != GMON_PROF_ON) {
/* profiling was disabled anyway */
return;
}

/* disable profiling before we make plenty of libc calls */
gp.state = GMON_PROF_OFF;

ReleaseTimerAlarm(gp.timerId);

fp = fopen("gmon.out", "wb");
hdr.lpc = gp.lowpc;
hdr.hpc = gp.highpc;
hdr.ncnt = sizeof(hdr) + (sizeof(unsigned int) * gp.nsamples);
hdr.version = GMONVERSION;
hdr.profrate = SAMPLE_FREQ;
hdr.resv[0] = 0;
hdr.resv[1] = 0;
hdr.resv[2] = 0;
fwrite(&hdr, 1, sizeof(hdr), fp);
fwrite(gp.samples, gp.nsamples, sizeof(unsigned int), fp);

for (i = 0; i < gp.narcs; i++) {
if (gp.arcs[i].count > 0) {
fwrite(gp.arcs + i, sizeof(struct rawarc), 1, fp);
}
}

fclose(fp);

// free memory
free(gp.arcs);
free(gp.samples);
}

/** Internal C handler for _mcount()
@param frompc pc address of caller
@param selfpc pc address of current function
Called from mcount.S to make life a bit easier. __mcount is called
right before a function starts. GCC generates a tiny stub at the very
beginning of each compiled routine, which eventually brings the
control to here.
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void __mcount(unsigned int frompc, unsigned int selfpc)
{
int e;
struct rawarc *arc;

if (gp.state != GMON_PROF_ON) {
/* returned off for some reason */
return;
}

frompc = frompc & 0x0FFFFFFF;
selfpc = selfpc & 0x0FFFFFFF;

/* call might come from stack */
if (frompc >= gp.lowpc && frompc <= gp.highpc) {
gp.pc = selfpc;
e = (frompc - gp.lowpc) / gp.hashfraction;
arc = gp.arcs + e;
arc->frompc = frompc;
arc->selfpc = selfpc;
arc->count++;
}
}

0 comments on commit bd0ca72

Please sign in to comment.