Search code examples
parsingrustnom

How to match exactly one byte using nom?


I want to match exactly one alphabetic character (a-zA-Z) with nom.

I know I can match greedily using take_while! with something like this:

// match one or more alphabetical characters
pub fn alpha_many(input: &[u8]) -> IResult<&[u8], &[u8]> {
    take_while!(input, |c| {
        (c >= 0x41 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a)
    })
}

But I can't find how to match only one byte. There is one_of!, but I can't use a closure, I have to pass a whole slice:

// match exactly one alphabetical character
pub fn alpha_one(input: &[u8]) -> IResult<&[u8], u8> {
    one_of!(
        input,
        [
            0x41, 0x42, 0x43,
            // etc until 0x5a and then from 0x61 to 0x7a
            // ...
        ].as_ref()
    )
}

Solution

  • I've come up with this. I'll mark this as the accepted answer tomorrow if nobody comes up with a better solution:

    use nom::{self, ErrorKind, IResult, Needed};
    
    /// Alphabetical characters ([RFC5234 appendix B.1])
    ///
    /// [RFC5234 appendix B.1]: https://tools.ietf.org/html/rfc5234#appendix-B.1
    ///
    /// ```no_rust
    /// ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z
    /// ```
    pub struct Alpha;
    
    impl Alpha {
        /// Return true if the given byte represents an alphabetical character
        pub fn is_alpha(c: u8) -> bool {
            (c >= 0x41 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a)
        }
    
        /// Parse one or more alphabetical characters
        pub fn parse_many(input: &[u8]) -> IResult<&[u8], &[u8]> {
            take_while!(input, Self::is_alpha)
        }
    
        /// Parse one alphabetical character
        pub fn parse_one(input: &[u8]) -> IResult<&[u8], u8> {
            Self::parse_n(input, 1).map(|res| res[0])
        }
    
        /// Parse n alphabetical characters
        pub fn parse_n(input: &[u8], n: usize) -> IResult<&[u8], &[u8]> {
            Self::parse_m_n(input, n, n)
        }
    
        /// Parse between m and n alphabetical characters
        pub fn parse_m_n(input: &[u8], m: usize, n: usize) -> IResult<&[u8], &[u8]> {
            if input.len() < m {
                return IResult::Incomplete(Needed::Size(input.len() - m));
            }
            for i in 0..n {
                if !Self::is_alpha(input[i]) {
                    // We were supposed to have at least m printable bytes
                    if i < m {
                        return IResult::Error(error_position!(ErrorKind::ManyMN, &input[..]));
                    } else {
                        return IResult::Done(&input[i..], &input[0..i]);
                    }
                }
            }
            return IResult::Done(&input[n..], &input[0..n]);
        }
    }