Search code examples
c#windowsexearchive

Making self-extracting executable with C#


I'm creating simple self-extracting archive using magic number to mark the beginning of the content. For now it is a textfile:

MAGICNUMBER .... content of the text file

Next, textfile copied to the end of the executable:

copy programm.exe/b+textfile.txt/b sfx.exe

I'm trying to find the second occurrence of the magic number (the first one would be a hardcoded constant obviously) using the following code:

    string my_filename = System.Diagnostics.Process.GetCurrentProcess().MainModule.FileName;
    StreamReader file = new StreamReader(my_filename);
    const int block_size = 1024;
    const string magic = "MAGICNUMBER";
    char[] buffer = new Char[block_size];
    Int64 count = 0;
    Int64 glob_pos = 0;
    bool flag = false;
    while (file.ReadBlock(buffer, 0, block_size) > 0)
    {
        var rel_pos = buffer.ToString().IndexOf(magic);
        if ((rel_pos > -1) & (!flag))
        {
            flag = true;
            continue;
        }

        if ((rel_pos > -1) & (flag == true))
        {
            glob_pos = block_size * count + rel_pos;
            break;
        }
        count++;
    }



    using (FileStream fs = new FileStream(my_filename, FileMode.Open, FileAccess.Read))
    {
        byte[] b = new byte[fs.Length - glob_pos];
        fs.Seek(glob_pos, SeekOrigin.Begin);
        fs.Read(b, 0, (int)(fs.Length - glob_pos));
        File.WriteAllBytes("c:/output.txt", b);

but for some reason I'm copying almost entire file, not the last few kilobytes. Is it because of the compiler optimization, inlining magic constant in while loop of something similar?

How should I do self-extraction archive properly?

Guessed I should read file backwards to avoid problems of compiler inlining magic constant multiply times. So I've modified my code in the following way:

    string my_filename = System.Diagnostics.Process.GetCurrentProcess().MainModule.FileName;
    StreamReader file = new StreamReader(my_filename);
    const int block_size = 1024;
    const string magic = "MAGIC";
    char[] buffer = new Char[block_size];
    Int64 count = 0;
    Int64 glob_pos = 0;
    while (file.ReadBlock(buffer, 0, block_size) > 0)
    {
        var rel_pos = buffer.ToString().IndexOf(magic);
        if (rel_pos > -1)
        {
            glob_pos = block_size * count + rel_pos;
        }
        count++;
    }



    using (FileStream fs = new FileStream(my_filename, FileMode.Open, FileAccess.Read))
    {
        byte[] b = new byte[fs.Length - glob_pos];
        fs.Seek(glob_pos, SeekOrigin.Begin);
        fs.Read(b, 0, (int)(fs.Length - glob_pos));
        File.WriteAllBytes("c:/output.txt", b);
    }

So I've scanned the all file once, found that I though would be the last occurrence of the magic number and copied from here to the end of it. While the file created by this procedure seems smaller than in previous attempt it in no way the same file I've attached to my "self-extracting" archive. Why?

My guess is that position calculation of the beginning of the attached file is wrong due to used conversion from binary to string. If so how should I modify my position calculation to make it correct?

Also how should I choose magic number then working with real files, pdfs for example? I wont be able to modify pdfs easily to include predefined magic number in it.


Solution

  • The easiest solution is to replace

    const string magic = "MAGICNUMBER";
    

    with

    static string magic = "magicnumber".ToUpper();
    

    But there are more problems with the whole magic string approach. What is the file contains the magic string? I think that the best solution is to put the file size after the file. The extraction is much easier that way: Read the length from the last bytes and read the required amount of bytes from the end of the file.

    Update: This should work unless your files are very big. (You'd need to use a revolving pair of buffers in that case (to read the file in small blocks)):

    string inputFilename = System.Diagnostics.Process.GetCurrentProcess().MainModule.FileName;
    string outputFilename = inputFilename + ".secret";
    string magic = "magic".ToUpper();
    
    byte[] data = File.ReadAllBytes(inputFilename);
    byte[] magicData = Encoding.ASCII.GetBytes(magic);
    
    for (int idx = magicData.Length - 1; idx < data.Length; idx++) {
        bool found = true;
        for (int magicIdx = 0; magicIdx < magicData.Length; magicIdx++) {
            if (data[idx - magicData.Length + 1 + magicIdx] != magicData[magicIdx]) {
                found = false;
                break;
            }
        }
        if (found) {
            using (FileStream output = new FileStream(outputFilename, FileMode.Create)) {
                output.Write(data, idx + 1, data.Length - idx - 1);
            }
        }
    }
    

    Update2: This should be much faster, use little memory and work on files of all size, but the program your must be proper executable (with size being a multiple of 512 bytes):

    string inputFilename = System.Diagnostics.Process.GetCurrentProcess().MainModule.FileName;
    string outputFilename = inputFilename + ".secret";
    string marker = "magic".ToUpper();
    
    byte[] data = File.ReadAllBytes(inputFilename);
    byte[] markerData = Encoding.ASCII.GetBytes(marker);
    int markerLength = markerData.Length;
    
    const int blockSize = 512; //important!
    
    using(FileStream input = File.OpenRead(inputFilename)) {
        long lastPosition = 0;
        byte[] buffer = new byte[blockSize];
        while (input.Read(buffer, 0, blockSize) >= markerLength) {
            bool found = true;
            for (int idx = 0; idx < markerLength; idx++) {
                if (buffer[idx] != markerData[idx]) {
                    found = false;
                    break;
                }
            }
            if (found) {
                input.Position = lastPosition + markerLength;
                using (FileStream output = File.OpenWrite(outputFilename)) {
                    input.CopyTo(output);
                }
            }
            lastPosition = input.Position;
        }
    }
    

    Read about some approaches here: http://www.strchr.com/creating_self-extracting_executables