Search code examples
csegmentation-faultansi-c

Segfault 11 on long string, PRIOR to accessing string, only when string > 14


ANSI c on OSX 10.13.6
Apple LLVM version 9.1.0 (clang-902.0.39.2) Target: x86_64-apple-darwin17.7.0 Thread model: posix

I'm learning c

This is a function that manually (character-by-character) adds two character strings representing large numbers (that exceed the unsigned long long or double size).

It functions fine with any two strings 14 or less characters long, but segmentation fault 11 with any strings greater than 14 chars.

Changing the string's memory allocation method seems to have no effect (I.e. from
char[15] addend1; // not a ptr
to
char *addend1 = (char *) malloc(sizeof(char) * (16) ); // pointer

One things that's curious, is that it seems to segfault on the ...
for (int j = maxlength - 1 ; j >= 0; j--)
... prior to accessing either of addend1 or addend2, but I'm not able to find an error there or change it to prevent the segfault.

Am I misreading where the error arises, or could it be related to the for loop?

Successful run (less than 15 chars)

maxlength = 14
char *sum = (char *) malloc(sizeof(char) * (maxlength + 1) ) ... DONE
for (int i = 0; i < (maxlength); i++) { sum[i] = '0'; } ... DONE
Start adding individual ints from end (right side) ...
13 ...12 ...11 ...10 ...9 ...8 ...7 ...6 ...5 ...4 ...3 ...2 ...1 ...0 ...main.sum = 28147497671064

UNSuccessful run (15 chars)

maxlength = 15
char *sum = (char *) malloc(sizeof(char) * (maxlength + 1) ) ... DONE
for (int i = 0; i < (maxlength); i++) { sum[i] = '0'; } ... DONE
Start adding individual ints from end (right side) ...
Segmentation fault: 11

MAIN.c

#include <stdio.h>
#include <stdlib.h>
#include "../../c-library/include/addViaStrings.h"

int main(void) {
    //  s[0] = 72; s[1] = 101; s[2] = 108; s[3] = 108; s[4] = 111; s[5] = 32; s[6] = 87; s[7] = 111; s[8] = 114; s[9] = 108; s[10] = 100; s[11] = 0;

    // WORKS
    // char s1[] = "14073748835532";
    // char s2[] = "14073748835532";

    // FAILS
    char s1[] = "140737488355328";
    char s2[] = "140737488355328";

    char *sum = addNumericStrings(&s1, &s2);
    printf("main.sum = %s\n", sum);
}

addViaStrings.h

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>

char* addNumericStrings(char *s1, char *s2);
char leftPad(char *result, char *s, int maxlength);
int  findMaxLength(char *s1, char *s2);

char* addNumericStrings(char *s1, char *s2){
    // Find the length of the greater of the two
    int maxlength = findMaxLength(s1, s2);
    printf("maxlength = %d\n", maxlength); //333

    ///////////////////////////////////////////////
    // Using malloc instead of char[maxlength] seems to have NO EFFECT on the issue
    // char addend1[maxlength]; // not a pointer
    char *addend1 = (char *) malloc(sizeof(char) * (maxlength + 1) );
    addend1[maxlength + 1] = 0; // end flag

    // char addend2[maxlength]; // not a pointer
    char *addend2 = (char *) malloc(sizeof(char) * (maxlength + 1) );
    addend2[maxlength + 1] = 0; // end flag

    // Allocate sum pointer
    printf("char *sum = (char *) malloc(sizeof(char) * (maxlength + 1) ) ... "); //333
    char *sum = (char *) malloc(sizeof(char) * (maxlength + 1) );
    printf("DONE\n"); //333

    // General use vars
    int a1, a2, total;
    int carry = 0;

    // Prepare the strings for manual addition. Pad the left with char 0s
    leftPad(addend1, s1, maxlength);
    leftPad(addend2, s2, maxlength);

    // Buffer sum with zeros
    sum[maxlength + 1] = 0; // end flag
    printf("for (int i = 0; i < (maxlength); i++) { sum[i] = '0'; } ... "); //333
    for (int i = 0; i < (maxlength); i++) { sum[i] = '0'; } // Fill w/ 0s
    printf("DONE\n"); //333

    // Run the manual addition
    // Start adding individual ints from end (right side)
    printf("Start adding individual ints from end (right side) ...\n"); //333

    // maxlength -1 because(I think) the termination char takes 2 bytes
    // If I use (maxlength) instead of (maxlength -1) I get a weird 
    //  question mark char at the end of returnsum
    for (int j = maxlength - 1 ; j >= 0; j--) {

        ///////////////////////////////////////////
        // The segfault seems to happen BEFORE accessing addend1 or addend2
        printf("%d ...", j); // 333 This DOES NOT print
        ///////////////////////////////////////////
        a1 = addend1[j] - '0'; // Convert to int
        a2 = addend2[j] - '0'; // Convert to int
        total = (a1 + a2 + carry);
        carry = 0;
        if ( total >= 10){
            carry += 1;
            total -= 10;
        }
        sum[j + 1] = '0'+total; // convert to ascii value for numbers (adding 48)
    }
    sum[0] = '0' + carry; // add last carry to start of num always, even if 0
    // Before returning, truncate leading zeros
    char *returnsum = (char *) malloc(sizeof(char) * (strlen(sum) + 1) );
    int sum_i = 0;
    int returnsm_i = 0;
//    bool truncate = true; // Find out why this wont compile
    int truncate = 1; // true
    while (1){
        // if order is important here
        if (sum[sum_i] == '\0') { break; } // we're done
        if (sum[sum_i] == '0' && truncate == 1) { sum_i += 1; continue; } // 1 is true
        // if a num, Stop truncating 0s but DO continue adding numbers
        if (sum[sum_i] != '0') { truncate = 0; } // 0 is false
        returnsum[returnsm_i] = sum[sum_i];
        returnsm_i += 1;
        sum_i += 1;
    }
    return returnsum;
}

char leftPad(char *result, char *s, int maxlength){
    int slength = strlen(s);
    // buffer with zeros, not '\0's
    for (int i = (maxlength); i >= 0; i--){ result[i] = '0'; }
    // right fill result with s
    for (int j = 0; j <= slength; j++){
        int index = ((maxlength - slength) + j);
        result[index] = s[j];
    }
    result[maxlength + 1] = 0;

}

int findMaxLength(char *s1, char *s2){
//  int length1 = findEndLength(s1);
//  int length2 = findEndLength(s2);
    int length1 = strlen(s1);
    int length2 = strlen(s2);
    int maxlength;
    (length1 > length2) ? (maxlength = length1) : (maxlength = length2);
    return maxlength;
}

Solution

  • The issue was I was trying to access the sum string as if it was one position longer than the addends strings, but I had declared it as the same length (I.e. maxlength + 1). So I was attempting to access one position past the actual sum array.

    This was a somewhat hidden problem, because it was not until the length of sum needed to be greater than 15, that this access error stepped into disallowed memory space, resulting in a segfault.

    Details

    Because the sum of two addends could conceivably require at least one additional position in the array if the sums carried over a one (I.e. 999 + 999 = 1998), the sum string needs to be one array position longer than the addends.

    If the addends were 3 digits long (length of array = 4) then the sum needed to be 4 digits long (array length = 5).

    // Correct code if "maxlength" (number of actual digits in string) = 14
    char *addend1 = (char *) malloc(sizeof(char) * (maxlength + 1) ); // +1 To include termination byte
    char *addend2 = (char *) malloc(sizeof(char) * (maxlength + 1) ); // +1 To include termination byte
    char *sum     = (char *) malloc(sizeof(char) * (maxlength + 2) ); // +2 To include termination byte, AND an extra char at the front
    

    ...so that the final actual digit character of sum is accessed via maxlength + 1

    CORRECTED CODE

    NOTE: Because calculating against maxlength as the number of digits (versus the length of the entire array including terminator) was confusing - as well as considered bad form, I have since learned - the following final code has been simplified to use more intuitive variables.

    #include <stdio.h>
    #include <stdlib.h>
    
    char* addIntsAsStrings(char *s1, char *s2);
    
    char* addIntsAsStrings(char *s1, char *s2){
        // Find the length of the greater of the two
        int length1 = strlen(s1);
        int length2 = strlen(s2);
        int addendDigits;
        (length1 > length2) ? (addendDigits = length1) : (addendDigits = length2);
    
        // We need separate strings of so they can be buffered with zeros
        // Create the string for the addends and buffer with zeros.
        char addend1[addendDigits + 1];
        char addend2[addendDigits + 1];
        for (int i = 0; i < (addendDigits) ; i++){ // "<" not "<="
            addend1[i] = '0'; // buffer w/ 0s
            addend2[i] = '0'; // buffer w/ 0s
        } // buffer w/ 0s
        addend1[addendDigits] = '\0'; // terminate
    
        // put s1 and s2 into buffered addends strings
        int s1_index = (strlen(s1) - 1);
        int s2_index = (strlen(s2) - 1);
        for (int i = (addendDigits - 1); i >= 0; i--){ //Start at back of addend
            if ( s1_index >= 0) { addend1[i] = s1[s1_index]; }
            if ( s2_index >= 0) { addend2[i] = s2[s2_index]; }
            s1_index -= 1;
            s2_index -= 1;
        }
    
        // Allocate sum pointer. The sum pointer needs to be ONE char
        //  longer than the addends, in the event that the addends need
        //  to carry a final one to the front. I.e. 999 + 999 = 1998
        int sumDigits = addendDigits + 1;
        char *sum = (char *) malloc(sizeof(char) * (sumDigits + 1) ); // +1 To include termination byte, AND an extra char at the front
        for (int i = 0; i < (sumDigits) ; i++){ // "<" not "<="
            sum[i] = '0'; // buffer w/ 0s
        }
        sum[sumDigits] = '\0';
    
        // Manual addition vars
        int a1, a2, total;
        int carry = 0;
    
        // Run the manual addition
        // Start adding individual ints from end (right side)
        for (int j = addendDigits - 1; j >= 0; j--) {
            a1 = addend1[j] - '0'; // Convert to int
            a2 = addend2[j] - '0'; // Convert to int
            total = (a1 + a2 + carry);
            carry = 0;
            if ( total >= 10){
                carry += 1;
                total -= 10;
            }
            // convert to ascii value for numbers (adding 48)
            sum[j + 1] = '0'+total; // sum[j + 1] because `sum`is always one index larger than the addends
        }
        sum[0] = '0' + carry; // add last carry to start of num always, even if 0
    
        // Before returning, truncate leading zeros
        char *returnsum = (char *) malloc(sizeof(char) * (strlen(sum) + 1) );
        int sum_i = 0;
        int returnsm_i = 0;
        int truncate = 1; // true
        while (1){
            // if order is important here
            if (sum[sum_i] == '\0') { break; } // we're done
            if (sum[sum_i] == '0' && truncate == 1) { sum_i += 1; continue; } // 1 is true
            // if a num, Stop truncating 0s but DO continue adding numbers
            if (sum[sum_i] != '0') { truncate = 0; } // 0 is false
            returnsum[returnsm_i] = sum[sum_i];
            returnsm_i += 1;
            sum_i += 1;
        }
        return returnsum;
    }
    

    Array Lengths