Some notes on ptrace for IA64

Sun 13 March 2005

As a background you need to understand the instruction format of IA64. Itanium groups instructions into groups of three called "bundles". Each of the three instructions are in a slot (slot0-2). Each instruction is 41 bits, and there is 5 bits of template information (making for 128 bit bundles). There are rules about what instructions can be bundled together and in what order they come (the templates). This allows the compiler to determine optimal bundling ... the theory being that the compiler has more information about what is happening (having access to the source code) so it can make best use of the processor resources rather than the processor having to guess what is happening at runtime. This is why it is important to use a good compiler to get good results out of the Itanium processor.

Using the linux ptrace() call we can step through the instructions a program is executing.

#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include <sys/ptrace.h>
#include <sys/wait.h>

char *prog_name;

#include <asm/ptrace.h>
#include <asm/ptrace_offsets.h>

static int lastslot = 0;

union bundle_t {
        struct {
                struct {
                        unsigned long template  : 5 ;
                        unsigned long slot0     : 41;
                        unsigned long bot_slot1 : 18;
                } word0;
                struct  {
                        unsigned long top_slot1 : 23;
                        unsigned long slot2     : 41;
                } word1;
        } bitfield;
        unsigned long array[2];
};

void
print_instruction (int child_pid, int state)
{
        long scnum;
        long ip, slot;
        union bundle_t bundle;

        ip = ptrace (PTRACE_PEEKUSER, child_pid, PT_CR_IIP, 0);
        slot = (ptrace (PTRACE_PEEKUSER, child_pid, PT_CR_IPSR, 0) >> 41) & 0x3;
        scnum = ptrace (PTRACE_PEEKUSER, child_pid, PT_R15, 0);

        printf("%lx %d\n", ip, slot);
}

int
main (int argc, char **argv, char **envp)
{
        int status, pid, child_pid, state = 1, arg = 1;

        prog_name = argv[0];

        child_pid = fork ();
        if (child_pid == 0)
        {
                ptrace (PTRACE_TRACEME, 0, 0, 0);
                execve (argv[arg], argv + arg, envp);
                printf ("%s: execve failed (errno=%d)\n", prog_name, errno);
                exit(-2);
        }

        while (1)
        {
                pid = wait4 (-1, &status, 0, 0);
                if (pid == -1)
                {
                        if (errno == EINTR)
                                continue;

                        printf ("%s: wait4() failed (errno=%d)\n", prog_name, errno);
                }

                if (WIFSIGNALED (status) || WIFEXITED (status)
                    || (WIFSTOPPED (status) && WSTOPSIG (status) != SIGTRAP))
                {
                        if (WIFEXITED (status))
                        {
                                printf ("%s: exit status %d\n", prog_name, WEXITSTATUS (status));
                                break;
                        }
                        else if (WIFSIGNALED (status))
                        {
                                printf ("%s: terminated by signal %d\n",
                                        prog_name, WTERMSIG (status));
                        }
                        else
                                printf ("%s: got signal %d\n", prog_name, WSTOPSIG (status));
                }

                print_instruction (child_pid, state);
                ptrace (PTRACE_SINGLESTEP, child_pid, 0, 0);

        }
        return 0;
}

This will produce a lot of output which should show you increasing instruction pointer values and slot values. Each exception happens after the instruction, so to see what you just executed you have to go back one slot.

...
20000000000043d0 0
20000000000043d0 1
20000000000043d0 2
20000000000043e0 0
20000000000043e0 1
20000000000043e0 2
...

So far, that's all quite straight forward. The only tricky bit comes around system calls. To make a system call on IA64 you put the system call number into r15 and execute a break 0x100000 call (well, you used to, until fast system calls were introduced).

But you will have issues single stepping around the break system call, because it has a higher priority than the single step exception. This means that the system call will be handled, the instruction pointer updated, the next instruction executed and then you'll get your fault.

To illustrate with an example, image a function like

400000000000e500 :
400000000000e500:       01 10 24 02 80 05       [MII]       alloc r2=ar.pfs,9,1,0
400000000000e506:       f0 00 80 00 42 00                   mov r15=r32
400000000000e50c:       00 00 00 08                         break.i 0x100000;;
400000000000e510:       13 00 fc 15 06 bb       [MBB]       cmp.eq p0,p6=-1,r10
400000000000e516:       41 00 00 42 00 00             (p06) br.ret.sptk.few b0
400000000000e51c:       30 51 00 41                         br.cond.spnt.few 4000000000013640 <__syscall_error>;;

You're going to see output like

400000000000e500 0 <--- after call into function
400000000000e500 1 <--- slot 0 of syscall first bundle
400000000000e500 2 <--- slot 1 of syscall first bundle
                   <--- system call handled (nothing printed)
400000000000e510 1 <--- slot 0 of syscall second bundle
400000000000e510 2 <--- etc

Of course, you can change the ptrace argument to PTRACE_SYSCALL and you will get two faults at 0x400000000000e50c ... one on entry and one on exit.