Search code examples
clinuxptrace

Stopping a child process at start


I'm trying to implement a checkpoint function to checkpoint a process. I do it by forking a child process. However I need to pause the child process at start. Later on, we can restart from a checkpoint by unpausing the child process and making the parent kill itself.

Here is the code I've written for checkpoint and restart_from_checkpoint along with an example of how to call them.

#include <stdio.h>
#include <unistd.h>
#include <sys/ptrace.h>
#include <sys/wait.h>

pid_t checkpoint();
void restart_from_checkpoint( pid_t pid );

int main( int argc, char *argv[] )
{
  int i;
  pid_t child_pid;
  pid_t parent_pid = getpid();

  for( i = 0; i < 10; i++ )
  {
    if ( i == 4 )
    {
      printf( "%6s: Checkpointing!\n", (getpid() == parent_pid)? "parent":"child" );
      child_pid = checkpoint();
    }

    if ( i == 7 )
    {
      printf( "%6s: Restarting!\n", (getpid() == parent_pid)? "parent":"child" );
      restart_from_checkpoint( child_pid );
    }

    printf( "%6s: i = %d\n", (getpid() == parent_pid)? "parent":"child", i );
  }

  return 0;
}

pid_t checkpoint()
{
    pid_t pid;
    int wait_val;

    switch (pid=fork()) 
    {
    case -1: 
        perror("fork"); 
        break;
    case 0:         // child process starts
        ptrace(PTRACE_TRACEME,0,0,0);
        raise( SIGTRAP ); // Note that this is the solution to first part
                              // of the question, which I added after
                              // asking this question.
        break;  // child process ends
    default:        // parent process starts
        wait(&wait_val);
        return pid;
    }
}

void restart_from_checkpoint( pid_t pid )
{
    ptrace(PTRACE_CONT, pid, NULL, NULL);
    wait(NULL); // I'm just waiting here, but actually 
                //  I need to kill the calling process.
}

I don't know how to stop the child process after calling ptrace(PTRACE_TRACEME,0,0,0). Secondly, I don't know how to kill the parent process while letting the child continue at restart_from_checkpoint.

The best thing would be the possibility of using a parameter with ptrace that stops the forked process at the beginning and later started with PTRACE_CONT. Unfortunately, PTRACE_TRACEME only stops at an exec function call.


Solution

  • OK, I've found the solution. Its working perfectly now. Here is the code for those of you who are interested.

    #include <stdio.h>
    #include <unistd.h>
    #include <sys/ptrace.h>
    #include <sys/wait.h>
    #include <stdlib.h>
    
    pid_t checkpoint();
    void restart_from_checkpoint( pid_t pid );
    
    int main( int argc, char *argv[] )
    {
      int i;
      pid_t child_pid;
      pid_t parent_pid = getpid();
    
      for( i = 0; i < 10; i++ )
      {
        if ( i == 4 )
        {
          printf( "%6s: Checkpointing!\n", (getpid() == parent_pid)? "parent":"child" );
          child_pid = checkpoint();
        }
    
        if ( i == 7 && ( getpid() == parent_pid ) )
        {
          printf( "%6s: Restarting!\n", (getpid() == parent_pid)? "parent":"child" );
          restart_from_checkpoint( child_pid );
        }
    
        printf( "%6s: i = %d\n", (getpid() == parent_pid)? "parent":"child", i );
      }
    
      return 0;
    }
    
    pid_t checkpoint()
    {
        pid_t pid;
        int wait_val;
    
        switch (pid=fork()) 
        {
        case -1: 
            perror("fork"); 
            break;
        case 0:         // child process starts
            ptrace(PTRACE_TRACEME,0,0,0);
            raise(SIGTRAP);
            break;  // child process ends
        default:        // parent process starts
            wait(&wait_val);
            return pid;
        }
    }
    
    void restart_from_checkpoint( pid_t pid )
    {
        ptrace(PTRACE_CONT, pid, NULL, NULL);
        ptrace(PTRACE_DETACH, pid, NULL, NULL);
        exit( 1 );
    }