I'm trying to implement a checkpoint function to checkpoint a process. I do it by forking a child process. However I need to pause the child process at start. Later on, we can restart from a checkpoint by unpausing the child process and making the parent kill itself.
Here is the code I've written for checkpoint
and restart_from_checkpoint
along with an example of how to call them.
#include <stdio.h>
#include <unistd.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
pid_t checkpoint();
void restart_from_checkpoint( pid_t pid );
int main( int argc, char *argv[] )
{
int i;
pid_t child_pid;
pid_t parent_pid = getpid();
for( i = 0; i < 10; i++ )
{
if ( i == 4 )
{
printf( "%6s: Checkpointing!\n", (getpid() == parent_pid)? "parent":"child" );
child_pid = checkpoint();
}
if ( i == 7 )
{
printf( "%6s: Restarting!\n", (getpid() == parent_pid)? "parent":"child" );
restart_from_checkpoint( child_pid );
}
printf( "%6s: i = %d\n", (getpid() == parent_pid)? "parent":"child", i );
}
return 0;
}
pid_t checkpoint()
{
pid_t pid;
int wait_val;
switch (pid=fork())
{
case -1:
perror("fork");
break;
case 0: // child process starts
ptrace(PTRACE_TRACEME,0,0,0);
raise( SIGTRAP ); // Note that this is the solution to first part
// of the question, which I added after
// asking this question.
break; // child process ends
default: // parent process starts
wait(&wait_val);
return pid;
}
}
void restart_from_checkpoint( pid_t pid )
{
ptrace(PTRACE_CONT, pid, NULL, NULL);
wait(NULL); // I'm just waiting here, but actually
// I need to kill the calling process.
}
I don't know how to stop the child process after calling ptrace(PTRACE_TRACEME,0,0,0)
. Secondly, I don't know how to kill the parent process while letting the child continue at restart_from_checkpoint
.
The best thing would be the possibility of using a parameter with ptrace
that stops the forked process at the beginning and later started with PTRACE_CONT
. Unfortunately, PTRACE_TRACEME only stops at an exec
function call.
OK, I've found the solution. Its working perfectly now. Here is the code for those of you who are interested.
#include <stdio.h>
#include <unistd.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
#include <stdlib.h>
pid_t checkpoint();
void restart_from_checkpoint( pid_t pid );
int main( int argc, char *argv[] )
{
int i;
pid_t child_pid;
pid_t parent_pid = getpid();
for( i = 0; i < 10; i++ )
{
if ( i == 4 )
{
printf( "%6s: Checkpointing!\n", (getpid() == parent_pid)? "parent":"child" );
child_pid = checkpoint();
}
if ( i == 7 && ( getpid() == parent_pid ) )
{
printf( "%6s: Restarting!\n", (getpid() == parent_pid)? "parent":"child" );
restart_from_checkpoint( child_pid );
}
printf( "%6s: i = %d\n", (getpid() == parent_pid)? "parent":"child", i );
}
return 0;
}
pid_t checkpoint()
{
pid_t pid;
int wait_val;
switch (pid=fork())
{
case -1:
perror("fork");
break;
case 0: // child process starts
ptrace(PTRACE_TRACEME,0,0,0);
raise(SIGTRAP);
break; // child process ends
default: // parent process starts
wait(&wait_val);
return pid;
}
}
void restart_from_checkpoint( pid_t pid )
{
ptrace(PTRACE_CONT, pid, NULL, NULL);
ptrace(PTRACE_DETACH, pid, NULL, NULL);
exit( 1 );
}