I have this format, the result should be of Item data type,
test 1: "<A \"Test\">"
test 2: r#"<A "Test">"#
Result: Item { item_type: TEXT, ascii_data: Some("Test") }
test 3: <A>
Result: Item { item_type: TEXT, ascii_data: None }
For test 1 and test 2, following code parse, but for type 3, it is failing,
It also contains nested types.
<A "Test1">
<A "Test2">
<A "Test3">
Item {
item_type: LIST,
sub_items: [
Item {
item_type: ASCII,
ascii_data: "Test1",
Item {
item_type: LIST,
sub_items: [
Item {
item_type: ASCII,
ascii_data: None,
Item {
item_type: ASCII,
ascii_data: "Test2",
Item {
item_type: ASCII,
ascii_data: "Test3",
use nom::{
bytes::complete::{tag, take_until},
#[derive(Clone, Debug, PartialEq)]
enum ItemType {
#[derive(Clone, Debug, PartialEq)]
struct Item {
item_type: ItemType,
sub_items: Option<Vec<Item>>,
ascii_data: Option<String>,
impl Default for Item {
fn default() -> Self {
Item {
item_type: ItemType::NONE,
sub_items: None,
ascii_data: None,
// Parse string data, it may empty then return none,
fn parse_ascii_data(input: &str) -> IResult<&str, String> {
let (input, _) = tag("\"")(input)?;
let (input, ascii_data) = take_until("\"")(input)?;
let (input, _) = tag("\"")(input)?;
Ok((input, ascii_data.to_string()))
// Parse <A> or <A "string">, if no string then return empty string then return none
fn parse_ascii_item(input: &str) -> IResult<&str, Item> {
let (input, _) = tag("<A")(input)?;
let (input, _) = multispace0(input)?;
let (input, ascii_data) = alt((parse_ascii_data, map(tag("\"\""), |_| "".to_string())))(input)?;
let (input, _) = tag(">")(input)?;
Item {
item_type: ItemType::TEXT,
ascii_data: Some(ascii_data),
// Parse <L> or <L <A "string">>, if no string then return empty string then return none
fn parse_list_item(input: &str) -> IResult<&str, Item> {
let (input, _) = tag("<L")(input)?;
let (input, _) = multispace0(input)?;
let (input, sub_items) = alt((parse_ascii_item, map(tag("<>"), |_| Item::default())))(input)?;
let (input, _) = tag(">")(input)?;
Item {
item_type: ItemType::LIST,
sub_items: Some(vec![sub_items]),
mod tests {
use super::*;
fn test_parse_ascii_item() {
let input = "<A \"Test\">";
let expected_item = Item {
item_type: ItemType::TEXT,
ascii_data: Some("Test".to_string()),
assert_eq!(parse_ascii_item(input), Ok(("", expected_item)));
let input = r#"<A "Test">"#;
let expected_item = Item {
item_type: ItemType::TEXT,
ascii_data: Some("Test".to_string()),
assert_eq!(parse_ascii_item(input), Ok(("", expected_item)));
Item {
item_type: ItemType::TEXT,
ascii_data: None,
fn test_parse_list_item() {
let input = "<L <A \"Test\">>";
let expected_item = Item {
item_type: ItemType::LIST,
sub_items: Some(vec![Item {
item_type: ItemType::TEXT,
ascii_data: Some("Test".to_string()),
assert_eq!(parse_list_item(input), Ok(("", expected_item)));
Item {
item_type: ItemType::LIST,
sub_items: Some(vec![Item::default()]),
fn test_parse_nested_list_item() {
let input = "<L \n <A \"Test1\">\n <L\n <A \"Test2\">\n >\n>";
let expected_item = Item {
item_type: ItemType::LIST,
sub_items: Some(vec![Item {
item_type: ItemType::TEXT,
ascii_data: Some("Test".to_string()),
assert_eq!(parse_list_item(input), Ok(("", expected_item)));
Item {
item_type: ItemType::LIST,
sub_items: Some(vec![Item::default()]),
In parse_ascii_item()
you don't want to use alt()
you want to use opt()
. While in parse_list_item()
you actually do want do use alt()
The difference is that alt()
executes the parsers one-by-one until one succeeds (if any). While in parse_ascii_item()
you want to accept opt
ional ascii_data
The fixed parse_ascii_item()
looks like this:
fn parse_ascii_item(input: &str) -> IResult<&str, Item> {
let (input, _) = multispace0(input)?;
let (input, _) = tag("<A")(input)?;
let (input, _) = multispace0(input)?;
let (input, ascii_data) = opt(parse_ascii_data)(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = tag(">")(input)?;
Item {
item_type: ItemType::TEXT,
Now in parse_list_item()
we do actually want to use alt()
, since we want to accept either parse_ascii_item()
or parse_list_item()
. Additionally, since we want to accept zero-to-many of them, we also need to wrap it in many0()
The fixed parse_list_item()
looks like this:
fn parse_list_item(input: &str) -> IResult<&str, Item> {
let (input, _) = multispace0(input)?;
let (input, _) = tag("<L")(input)?;
let (input, mut sub_items) = many0(|input| {
let (input, _) = multispace0(input)?;
alt((parse_ascii_item, parse_list_item))(input)
let (input, _) = multispace0(input)?;
let (input, _) = tag(">")(input)?;
if sub_items.is_empty() {
Item {
item_type: ItemType::LIST,
sub_items: Some(sub_items),
Additionally, your test_parse_nested_list_item()
is wrong. At least your input
doesn't reasonably match the expected_item
. So I assume expected_item
actually needs to look like this:
let expected_item = Item {
item_type: ItemType::LIST,
sub_items: Some(vec![
Item {
item_type: ItemType::TEXT,
ascii_data: Some("Test1".to_string()),
Item {
item_type: ItemType::LIST,
sub_items: Some(vec![Item {
item_type: ItemType::TEXT,
ascii_data: Some("Test2".to_string()),
Here's a complete example on Rust Playground.
As an aside, in the future remember to sprinkle multispace0()
around, since you allow for optional whitespace in various places:
let (input, _) = multispace0(input)?;
Another aside Rust's string literally allow for newlines. Additionally, instead of escaping \"
you can also use raw string literals:
// Before
let input = "<L \n <A \"Test1\">\n <L\n <A \"Test2\">\n >\n>";
// After:
let input = r#"
<A "Test1">
<A "Test2">