Search code examples
cextractarchive

Extract archived file in C Programming


I want to expand on this question here:

C theory on how to extract files from an archived file

Where it talks about taking a file that has been archived and extracting it and returning it to a file form, without using the command ar -x.

The algorithm in the answers states some steps where you:

  1. Get a file name to extract, also from the command line.
  2. Create memory for a structure to read meta data about each file.
  3. Read all the meta data from the archive file.

Can someone give me some tips on functions to use in these steps? I don't really know what structure it's talking about in step 2.


Solution

  • Given is implementation of archiving and extraction details + code. To archive the file use use this simplest format for archive file.

    HEADER | CONTENTS

    Header contains meta data about the files and including size of header excluding header size data, file names and respective file sizes separated by /. I used / as separator because they don't appear in Linux simple filenames unless it is a pathname. You can thing of another separator if you plan to include pathnames.

    Body contains the contents of each file appended one after another.

    Following is the format of my archived file.


    FILE SIZE/FILE1 NAME/FILE1 SIZE/FILE2 NAME/FILE2 SIZE/BODY

    I used following C structure to Collect meta data about single file for both archiving and extraction.

    struct mdata
    {
       char name[255];
       FILE *fp;
       int size;
       int nsize;
       int ssize;
       struct mdata *next;
    };
    

    I used linked list to contain list of files meta data.

    Archiving code: ./archive file1 file2 archive1

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    #define BSIZE 2048
    
    
    struct mdata
    {
        char name[255];
        FILE *fp;
        int size;
        int nsize;
        int ssize;
        struct mdata *next;
    };
    
    struct mdata *mhead = NULL, *current = NULL;
    
    int getsize(FILE *fp)
    {
        int size = 0;
        fseek(fp, 0L, SEEK_END);      
        size  = ftell(fp);
        fseek(fp, 0L, SEEK_SET);   
        return size;
    }
    
    int add(char *name)
    {    
        FILE *fp;
        char ntemp[255];
        struct mdata *newm;
        newm = (struct mdata *)malloc(sizeof(struct mdata));
        strcpy(newm->name, name);    
        fp = fopen(name, "r+");  
        newm->fp = fp;
        newm->size = getsize(fp);  
        newm->nsize = strlen(name);
        sprintf(ntemp, "%d", newm->size);
        newm->ssize = strlen(ntemp);
        newm->next = NULL;
        printf("File %s is being processed...\n", name);
        if(mhead == NULL) 
        {
            mhead = newm;
            current = newm;
        }
        else
        {
            current->next = newm;
            current = newm;
        }
    }
    
    int preproc(int argc, char** argv)
    {
        int i;
    
        for(i = 1; i <= argc-2; i++)  
        {   
            add(argv[i]);  
        }    
    }
    
    int main(int argc, char** argv) 
    {
        char block[BSIZE];
        char stsize[5];
        char shsize[100];
        int rsize = 0;
        int tnsize = 0, tssize = 0, hsize = 0, scount = 0;
        struct mdata *ptr;
        FILE *fpar, *fp;
    
        //CREATE HEADER
        printf("Pre-processing the files to collect meta data...\n");
        preproc(argc, argv);
        printf("Pre-processing completed.\n");
        printf("Compiling header information.\n");
        fpar = fopen(argv[argc-1], "w+");
        ptr = mhead;
        while(ptr != NULL)
        {
            tnsize += ptr->nsize;        
            tssize += ptr->ssize;
            ptr = ptr->next;
            scount +=2;
        }
        hsize = tnsize+tssize+scount+1; 
        printf("Total length of file names is %d\n", tnsize);
        printf("Total length of file sizes is %d\n", tssize);
        printf("Total size of header except file size is %d.\n", hsize);
        sprintf(shsize, "%d/", hsize); //10 bytes of header size
        fwrite(shsize, 1, strlen(shsize), fpar);
        ptr = mhead;
        while(ptr != NULL)
        {
            fwrite(ptr->name, 1, ptr->nsize, fpar);   
            fwrite("/", 1, 1, fpar);
            sprintf(stsize, "%d", ptr->size);
            fwrite(stsize, 1, ptr->ssize, fpar);  
            fwrite("/", 1, 1, fpar);
            ptr = ptr->next;
        }    
        printf("The header created and written to archieve file.\n");
        //CREATE BODY
        ptr = mhead;
        while(ptr != NULL)
        {
            fp = ptr->fp;
            while(rsize = fread(block, 1, sizeof(block), fp))
            {
                fwrite(block, 1, rsize, fpar);
            }
            ptr = ptr->next;
        }   
        printf("Contents of all files written to archieve file.\n");
        fclose(fpar);  
        return 0;
    }
    

    Extraction code: ./extract archive

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    #define BSIZE 2048
    
    
    struct mdata
    {
        char name[255];
        FILE *fp;
        int size;
        int nsize;
        int ssize;
        struct mdata *next;
    };
    
    struct mdata *mhead = NULL, *current = NULL;
    
    int min(int x, int y)
    {
        if(x < y) return x;
        else return y;
    }
    int getsize(FILE *fp)
    {
        int size = 0;
        fseek(fp, 0L, SEEK_END);      
        size  = ftell(fp);
        fseek(fp, 0L, SEEK_SET);   
        return size;
    }
    
    int add(char *name, int size)
    {    
        char ntemp[255];
        struct mdata *newm;
        newm = (struct mdata *)malloc(sizeof(struct mdata));
        strcpy(newm->name, name);    
        newm->size = size;
        newm->nsize = strlen(name);
        sprintf(ntemp, "%d", newm->size);
        newm->ssize = strlen(ntemp);
        newm->next = NULL;
        printf("File %s is being processed...\n", name);
        if(mhead == NULL) 
        {
            mhead = newm;
            current = newm;
        }
        else
        {
            current->next = newm;
            current = newm;
        }
    }
    
    int readh(FILE *fp1)
    {
        int i = 0, j= 1 ;    
        int hsize = 0, size = 0;
        int byte;
        char shsize[50]; 
        char name[255];
        char ssize[50];
        while((byte = fgetc(fp1))!='/')
        {
            shsize[i] = (char)byte;
            i++;
        }
        shsize[i] = '\0';
        hsize = atoi(shsize);
        hsize += strlen(shsize);
        printf("The total size of header is %d.\n", hsize);
        printf("Contents starts at %dth byte.\n", hsize);
        //COLLECT NAMES AND SIZES
        j = strlen(shsize)+1;
        while(j <= hsize-1)
        {
            i = 0;
            while((byte = fgetc(fp1))!='/')
            {
                name[i++] = byte;
                j++;
            }
            j++;
            name[i] = '\0';
            i = 0;
            while((byte = fgetc(fp1))!='/')
            {
                ssize[i++] = byte;
                j++;
            }     
            j++;
            ssize[i] = '\0';
            size = atoi(ssize);
            printf("File '%s' with size %d added to list.\n", name, size);
            add(name, size);
            printf("File '%s' processing completed.\n", name);
        }
        printf("File meta data collection successfully completed.\n");
    }
    
    int main(int argc, char** argv) 
    {
        char block[BSIZE];
        char stsize[5];
        char shsize[100];
        int rsize = 0;
        int tnsize = 0, tssize = 0, hsize = 0, scount = 0;
        int totsize = 0;
        int unreadcount = 0;
        struct mdata *ptr;
        FILE *fpar, *fp;
    
        //COLLECTING HEADER
        printf("Opening file %s...\n", argv[1]);
        fpar = fopen(argv[1], "r+");
        if(fpar == NULL)
        {
            printf("Error opening file %s.\n", argv[1]);
        }
        readh(fpar);
        ptr = mhead;
        lseek(fpar, hsize+1, SEEK_SET);
        while(ptr != NULL)
        {
            totsize = 0;
            printf("Creating file %s...\n", ptr->name);
            fp = fopen(ptr->name, "w+"); 
            printf("Writing %d bytes of %s...\n", ptr->size, ptr->name);
            unreadcount = ptr->size;
            while(unreadcount > 0)
            {
                if(sizeof(block)>= unreadcount)
                {
                    rsize = fread(block, 1, unreadcount, fpar);
                }
                else
                {
                    rsize = fread(block, 1, sizeof(block), fpar);
                }
                unreadcount -= rsize;
                totsize += rsize;
                fwrite(block, 1, rsize, fp);
            }
            printf("Written %d bytes to file %s.\n", totsize, ptr->name);
            ptr = ptr->next;
        }
        printf("Extraction completed.\n");
        fclose(fpar);  
        return 0;
    }