I implemented in C a shared memory to let forked child comunicate eachother, here's a Minimal, Complete, and Verificable example:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/sem.h>
#define SHMEMORY
#define NUM_SEMS 2
#define LOCK \
sops.sem_num = 1; \
sops.sem_op = -1; \
semop(sem_Id, &sops, 1);
#define UNLOCK \
sops.sem_num = 1; \
sops.sem_op = 1; \
semop(sem_Id, &sops, 1);
#define TEST_ERROR if (errno) {dprintf(STDERR_FILENO, \
"%s:%d: PID=%5d: Error %d (%s)\n", \
__FILE__, \
__LINE__, \
getpid(), \
errno, \
strerror(errno));}
#define POP_SIZE 100 //number of child
#define TRUE 1
struct shared_data {
/* index where next write will happen */
unsigned long cur_idx;
int invite_sent[POP_SIZE][POP_SIZE];
};
static void init();
static int invite_sent_check(int stud);
int maxMin_rand(int max,int min);
void handle_signal(int sig);
int sim_time = 10;
unsigned long next_num;
struct sembuf sops;
pid_t *kid_pids;
int mem_Id, sem_Id;
int main() {
int i = 0;
int j = 0;
int status, cur_i;
struct shared_data* corso;
pid_t child_pid, my_pid;
int stud = 0;
int exit_loop = 0;
/*********************************************************/
struct sigaction sa;
sigset_t my_mask;
/* handler SIGALRM
*/
sa.sa_handler = handle_signal;
sa.sa_flags = 0;
sigemptyset(&my_mask);
sa.sa_mask = my_mask;
sigaction(SIGALRM, &sa, NULL);
/**********************************************************/
mem_Id = shmget(IPC_PRIVATE, sizeof(*corso), 0600);
TEST_ERROR;
/* Attach the shared memory to a pointer */
corso = shmat(mem_Id, NULL, 0);
TEST_ERROR;
corso->cur_idx = 0; /* init first counter */
/*********************************************************/
sem_Id = semget(IPC_PRIVATE, NUM_SEMS, 0600);
TEST_ERROR;
/* Sem 0 to syncronize the start of child processes */
semctl(sem_Id, 0, SETVAL, 0);
#ifdef SHMEMORY
semctl(sem_Id, 1, SETVAL, 1);
#endif
TEST_ERROR;
sops.sem_num = 0; /* check the 0-th semaphore */
sops.sem_flg = 0; /* no flag */
init();
kid_pids = malloc(POP_SIZE*sizeof(*kid_pids));
for (i = 0; i < POP_SIZE; i++) {
switch (kid_pids[i] = fork()) {
case -1:
/* Handle error */
TEST_ERROR;
break;
case 0:
/* Wait for the green light */
sops.sem_op = -1;
semop(sem_Id, &sops, 1);
while(exit_loop==0 || exit_loop==1){
LOCK;
if(exit_loop == 0){
stud = corso->cur_idx;
printf("%d %d\n",stud,getpid());
corso->cur_idx++;
exit_loop = 1;
}
if(invite_sent_check(stud) == 1){
}
UNLOCK;
}
exit(0);
break;
default:
break;
}
}
alarm(sim_time);
while (shmctl(mem_Id, IPC_RMID, NULL)) { TEST_ERROR; }
sops.sem_op = POP_SIZE;
semop(sem_Id, &sops, 1);
/* Waiting for all child POP_SIZEesses to terminate */
while ((child_pid = wait(&status)) != -1) {
dprintf(2,"PID=%d. Sender (PID=%d) terminated with status 0x%04X\n",
getpid(),
child_pid,
status);
}
/* Now the semaphore may be deallocated */
semctl(sem_Id, 0, IPC_RMID);
exit(0);
}
static void init(){
printf("INIT\n");
struct shared_data * corso;
corso = shmat(mem_Id, NULL, 0);
corso->cur_idx=0;
int r, q, j;
j = 0;
q = 0;
while(j < POP_SIZE){
q = 0;
while(q < POP_SIZE){
corso->invite_sent[j][q] = -1;
q++;
}
j++;
}
}
int maxMin_rand(int max, int min){
int reset;
int randomics=12345;
int w=0;
while(w<reset) {
randomics++;
w++;
}
next_num = next_num+randomics;
next_num = next_num*1103515245 +12345;
unsigned int result=(unsigned int) ((next_num*65536)%(max+1))+min;
int reload;
w=0;
while(w<reload) {
next_num++;
w++;
}
return result;
}
static int invite_sent_check(int stud){
struct shared_data * corso;
corso = shmat(mem_Id, NULL, 0);
int i = 0;
int q = 0;
while(i < POP_SIZE){
if(i == stud){
q = 0;
while(q < POP_SIZE){
if(corso->invite_sent[i][q] != -1){
return 1;
}
q++;
}
}
i++;
}
return 0;
}
void handle_signal(int signal){
int child_pid;
int status;
struct shared_data * corso;
corso = shmat(mem_Id, NULL, 0);
switch (signal) {
case SIGALRM:
for(int i = 0; i < POP_SIZE; i++){
kill(kid_pids[i], SIGKILL);
}
while (shmctl(mem_Id, IPC_RMID, NULL)) {
TEST_ERROR;
}
while ((child_pid = wait(&status)) != -1) {
dprintf(2,"PID=%d. Sender (PID=%d) terminated with status 0x%04X\n",
getpid(),
child_pid,
status);
}
semctl(sem_Id, 0, IPC_RMID);
exit(0);
break;
}
}
The forked children keep LOCK and UNLOCK as long as the timer goes (sim_time = 10). Then the SIGNAL_HANDLER kills all the children, and terminate. I keep getting a SIGSEGV error from a RANDOM child that terminate with status 008B and stops his "brothers" until the handler kills all the other processes. As long as I know, this error concern pointer in the shared memory, is that right? Or I'm missing/I wrote something really wrong? Even this little method that checks if in the INVITE_SENT matrix, at least 1 value is different from -1 cause a crash, instead of just returning 0. Thank you for your time.
I have been unable to reproduce the segfault locally, and Valgrind does not detect any invalid memory accesses. There are other possible causes for a segfault, but they are uncommon. Not being able to reproduce the issue locally, I cannot be confident about identifying its source, but there are a number of issues in the code, mostly minor.
The only issue that seems to have scope to be related to your problem is your redundant attachment of the shared memory segment in function invite_sent_check()
, especially in view of the fact that you use the return value of shmat()
without checking it ((void*)-1
is returned on failure). Redundant attachments such as that are explicitly allowed, but it would be cleaner and more efficient for the caller to just pass its existing pointer to the original attachment point of the segment. Moreover, if you form a local attachment in that function, then you must be certain to detach again before the function returns. Failing to do so could plausibly be the source of the problem, as the metadata and address-space reservations for the resulting many, many attachments could exhaust available resources.
Other issues include
dprintf()
is not async-signal-safe, but is called from a signal handler (both explicitly and via macro TEST_ERROR
).
shmat()
is not async-signal-safe, but it is called from a signal handler. Moreover, this seems unnecessary, because the new segment attachment is not used in the handler. Furthermore, it is not detached, either.
semctl()
is not async-signal-safe, but it is called from a signal handler.
exit()
is not async-signal-safe, but it is called from a signal handler. You may use _Exit()
or _exit()
instead, but it seems off that this handler should exit the program at all, since the main process seems to have other work that it wants to do to clean up.
Considering all the things you seem to want to do when you receive the SIGALRM
, many of them not async-signal-safe, you should consider using sigsupend()
to receive the signal synchronously, and afterward calling a regular function to do that work. If you go that direction, then the safest, most reliable approach would involve first blocking SIGALRM
before stting your alarm, then passing a signal mask to sigsuspend()
that allows that signal. That will prevent any chance of the signal being delivered before the process is ready for it.
Function init()
redundantly attaches the shared memory segment. This is allowed, but it would be better form for the caller to just pass a pointer to the struct shared_data
that is to be initialized. This function, too, fails to detach.
If you want to check for errors by examining errno
, you must be certain to set it to 0 before the call you want to check (and test it immediately after, before doing anything else). However, it is better practice to use functions' return values to detect whether an error has occurred, and to rely on errno
only to discern which one.
The name and signature of function maxMin_rand()
suggest it is intended to return a number between max
and min
, but it looks like it can return a number as large as max + min
.