Search code examples
c#algorithmfindstreamarrays

Best way to find position in the Stream where given byte sequence starts


How do you think what is the best way to find position in the System.Stream where given byte sequence starts (first occurence):

public static long FindPosition(Stream stream, byte[] byteSequence)
{
    long position = -1;

    /// ???
    return position;
}

P.S. The simpliest yet fastest solution is preffered. :)


Solution

  • I've reached this solution.

    I did some benchmarks with an ASCII file that was 3.050 KB and 38803 lines. With a search byte array of 22 bytes in the last line of the file I've got the result in about 2.28 seconds (in a slow/old machine).

    public static long FindPosition(Stream stream, byte[] byteSequence)
    {
        if (byteSequence.Length > stream.Length)
            return -1;
    
        byte[] buffer = new byte[byteSequence.Length];
    
        using (BufferedStream bufStream = new BufferedStream(stream, byteSequence.Length))
        {
            int i;
            while ((i = bufStream.Read(buffer, 0, byteSequence.Length)) == byteSequence.Length)
            {
                if (byteSequence.SequenceEqual(buffer))
                    return bufStream.Position - byteSequence.Length;
                else
                    bufStream.Position -= byteSequence.Length - PadLeftSequence(buffer, byteSequence);
            }
        }
    
        return -1;
    }
    
    private static int PadLeftSequence(byte[] bytes, byte[] seqBytes)
    {
        int i = 1;
        while (i < bytes.Length)
        {
            int n = bytes.Length - i;
            byte[] aux1 = new byte[n];
            byte[] aux2 = new byte[n];
            Array.Copy(bytes, i, aux1, 0, n);
            Array.Copy(seqBytes, aux2, n);
            if (aux1.SequenceEqual(aux2))
                return i;
            i++;
        }
        return i;
    }