Search code examples
cfindstrstr

Find string in another string


So I have a .txt file with the following format:

[email protected]:number
[email protected]:number
[email protected]:number
And another file with:
[email protected]
[email protected]

And I want to using the email from second file to find the [email protected]:number ,and print out the email in a third file .Only problem that strstr doesnt work for me ,it print out all the lines ,because I guess the @example.com is in all emails.I am saving all the emails from second file in a array ,then read from first file one by one the lines ,and use strstr.

    #include <iostream>
#include<stdio.h>
#include<stdlib.h>
#include <string.h>
using namespace std;

int main()
{
FILE *f,*g,*h;
char x[80],y[1000][60];
char *a,*b;
int i,found,n;
i=0;
found=0;
f=fopen("input.txt","r");
g=fopen("registered.txt","r");
h=fopen("output.txt","w");
 if(f==NULL)
        {
        perror("No input!\n");
        exit(1);
        }
 if(g==NULL)
        {
        perror("No registered!\n");
        exit(1);
        }
while(!feof(g))
{
    fscanf(g,"%s",&y[i]);
    i++;
}
n=i;
while(!feof(f))
{
    fscanf(f,"%s",&x);
    a=x;
    for(i=0;i<=n;i++)
    {
        b=y[i];
          if (strstr(a,b)!=NULL)
            {
                fprintf(h,"%s\n",x);
                found++;
            }
    }
}
fclose(f);
fclose(g);
fclose(h);
printf("Found Email:%d\n",found);

}

input.txt:

[email protected]:150995
[email protected]:30000
[email protected]:160988
[email protected]:469375
[email protected]:30013568

registered.txt:

[email protected]
[email protected]
[email protected]

I'm using CodeBlocks if it matter.


Solution

  • Broken code from question instrumented

    With a bare minimum of fixes (so I can get it to compile as C code under my stringent compiler flags), and with diagnostic printing added, this variant of your code:

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    int main(void)
    {
        FILE *f, *g, *h;
        char x[80], y[1000][60];
        char *a, *b;
        int i, found, n;
        i = 0;
        found = 0;
        f = fopen("input.txt", "r");
        g = fopen("registered.txt", "r");
        h = fopen("output.txt", "w");
        if (f == NULL)
        {
            perror("No input!\n");
            exit(1);
        }
        if (g == NULL)
        {
            perror("No registered!\n");
            exit(1);
        }
        while (!feof(g))
        {
            fscanf(g, "%s", y[i]);
            printf("Registered: [%s]\n", y[i]);
            i++;
        }
        n = i;
        while (!feof(f))
        {
            fscanf(f, "%s", x);
            printf("Scanned: [%s]\n", x);
            a = x;
            for (i = 0; i <= n; i++)
            {
                b = y[i];
                printf("Find: does [%s] contain [%s]?\n", a, b);
                if (strstr(a, b) != NULL)
                {
                    printf("Found: [%s] does contain [%s]!\n", a, b);
                    fprintf(h, "%s\n", x);
                    found++;
                }
            }
        }
        fclose(f);
        fclose(g);
        fclose(h);
        printf("Found Email:%d\n", found);
    }
    

    produces this output (given your data):

    Registered: [[email protected]]
    Registered: [[email protected]]
    Registered: [[email protected]]
    Registered: []
    Scanned: [[email protected]:150995]
    Find: does [[email protected]:150995] contain [[email protected]]?
    Found: [[email protected]:150995] does contain [[email protected]]!
    Find: does [[email protected]:150995] contain [[email protected]]?
    Find: does [[email protected]:150995] contain [[email protected]]?
    Find: does [[email protected]:150995] contain []?
    Found: [[email protected]:150995] does contain []!
    Find: does [[email protected]:150995] contain []?
    Found: [[email protected]:150995] does contain []!
    Scanned: [[email protected]:30000]
    Find: does [[email protected]:30000] contain [[email protected]]?
    Find: does [[email protected]:30000] contain [[email protected]]?
    Found: [[email protected]:30000] does contain [[email protected]]!
    Find: does [[email protected]:30000] contain [[email protected]]?
    Find: does [[email protected]:30000] contain []?
    Found: [[email protected]:30000] does contain []!
    Find: does [[email protected]:30000] contain []?
    Found: [[email protected]:30000] does contain []!
    Scanned: [[email protected]:160988]
    Find: does [[email protected]:160988] contain [[email protected]]?
    Find: does [[email protected]:160988] contain [[email protected]]?
    Find: does [[email protected]:160988] contain [[email protected]]?
    Found: [[email protected]:160988] does contain [[email protected]]!
    Find: does [[email protected]:160988] contain []?
    Found: [[email protected]:160988] does contain []!
    Find: does [[email protected]:160988] contain []?
    Found: [[email protected]:160988] does contain []!
    Scanned: [[email protected]:469375]
    Find: does [[email protected]:469375] contain [[email protected]]?
    Find: does [[email protected]:469375] contain [[email protected]]?
    Find: does [[email protected]:469375] contain [[email protected]]?
    Find: does [[email protected]:469375] contain []?
    Found: [[email protected]:469375] does contain []!
    Find: does [[email protected]:469375] contain []?
    Found: [[email protected]:469375] does contain []!
    Scanned: [[email protected]:30013568]
    Find: does [[email protected]:30013568] contain [[email protected]]?
    Find: does [[email protected]:30013568] contain [[email protected]]?
    Find: does [[email protected]:30013568] contain [[email protected]]?
    Find: does [[email protected]:30013568] contain []?
    Found: [[email protected]:30013568] does contain []!
    Find: does [[email protected]:30013568] contain []?
    Found: [[email protected]:30013568] does contain []!
    Scanned: [[email protected]:30013568]
    Find: does [[email protected]:30013568] contain [[email protected]]?
    Find: does [[email protected]:30013568] contain [[email protected]]?
    Find: does [[email protected]:30013568] contain [[email protected]]?
    Find: does [[email protected]:30013568] contain []?
    Found: [[email protected]:30013568] does contain []!
    Find: does [[email protected]:30013568] contain []?
    Found: [[email protected]:30013568] does contain []!
    Found Email:15
    

    Please note that while (!feof(file)) is always wrong. You checked the input file were open (good); you didn't check that the output file was open (bad).

    For debugging this, the first thing to do is print the data as it is read, so that you know what the program is seeing. It is surprising how often the computer sees something other than what you thought it was going to see. But it is one of the most basic debugging techniques.

    Because you don't test the inputs correctly, and because you have for(i=0;i<=n;i++), and because your array is mostly zeroed, you end up trying to see if the empty string is found in your data, and it is, every time. You can spot the empty string easily enough if you show the data that is being compared in a printf() statement. The square brackets (or any bracketing characters) around the string outputs helps you spot unexpected characters, such as trailing spaces or embedded carriage return ('\r') characters or newline ('\n') in the strings.

    Fixed code

    Far from perfect, but demonstrably better:

    #include<stdio.h>
    #include<stdlib.h>
    #include <string.h>
    
    int main(void)
    {
        FILE *f, *g, *h;
        char x[80], y[1000][60];
        char *a, *b;
        int i, found, n;
        i = 0;
        found = 0;
        f = fopen("input.txt", "r");
        g = fopen("registered.txt", "r");
        h = fopen("output.txt", "w");
        if (f == NULL)
        {
            perror("No input.txt!\n");
            exit(1);
        }
        if (g == NULL)
        {
            perror("No registered.txt!\n");
            exit(1);
        }
        if (h == NULL)
        {
            perror("No output.txt!\n");
            exit(1);
        }
        while (fscanf(g, "%s", y[i]) == 1)
        {
            printf("Registered: [%s]\n", y[i]);
            i++;
        }
        n = i;
        while (fscanf(f, "%s", x) == 1)
        {
            printf("Scanned: [%s]\n", x);
            a = x;
            for (i = 0; i < n; i++)
            {
                b = y[i];
                printf("Find: does [%s] contain [%s]\n", a, b);
                if (strstr(a, b) != NULL)
                {
                    printf("Match: %s\n", x);
                    fprintf(h, "%s\n", x);
                    found++;
                    break;
                }
            }
        }
        fclose(f);
        fclose(g);
        fclose(h);
        printf("Found Email: %d\n", found);
    }
    

    Sample output:

    Registered: [[email protected]]
    Registered: [[email protected]]
    Registered: [[email protected]]
    Scanned: [[email protected]:150995]
    Find: does [[email protected]:150995] contain [[email protected]]
    Match: [email protected]:150995
    Scanned: [[email protected]:30000]
    Find: does [[email protected]:30000] contain [[email protected]]
    Find: does [[email protected]:30000] contain [[email protected]]
    Match: [email protected]:30000
    Scanned: [[email protected]:160988]
    Find: does [[email protected]:160988] contain [[email protected]]
    Find: does [[email protected]:160988] contain [[email protected]]
    Find: does [[email protected]:160988] contain [[email protected]]
    Match: [email protected]:160988
    Scanned: [[email protected]:469375]
    Find: does [[email protected]:469375] contain [[email protected]]
    Find: does [[email protected]:469375] contain [[email protected]]
    Find: does [[email protected]:469375] contain [[email protected]]
    Scanned: [[email protected]:30013568]
    Find: does [[email protected]:30013568] contain [[email protected]]
    Find: does [[email protected]:30013568] contain [[email protected]]
    Find: does [[email protected]:30013568] contain [[email protected]]
    Found Email: 3