Search code examples
rusttokenizedslsymbolic-math

Is there any way of accessing the tokens from a token tree or expression in rust (without stringifing and having to parse)


So my question is two-fold, I'm trying to make a computer algebra system similar to SymPy or Math.NET symbolics

my idea is to use some sort of macro to allow this syntax:

let symbolic!(fn function(x, a) -> 2/4*x^2 + a*x + 4)
function.derive(x) // x + 1

What I'm looking for is a way to access the tokens from the token tree so that x, a becomes Symbolic::Symbol("x"), Symbolic::Symbol("a") and 2/4, 4 become Symbolic::Rational(2,4) Symbolic::Rational(4,1), i can then use operator overloading to construct the abstract syntax tree, : eg :

impl Add<Self> for Symbolic {
    type Output = Node;
    fn add(self, rhs: Self) -> Self::Output {
        use Node::*;
        BinaryExpr { op: '+', lhs: Box::new(Symb(self)), rhs: Box::new(Symb(rhs)) }
    }
}

where the enumsSymbolic and Node are:

pub enum Symbolic{
    Symbol(String),
    Rational(isize, usize),
    Operator(char)
}

pub enum Node{
    Symb(Symbolic),
    UnaryExpr{
        op: char,
        child: Box<Node>
    },
    BinaryExpr{
        op: char,
        lhs: Box<Node>,
        rhs: Box<Node>
    }

}

Solution

  • There is certainly a way to accomplish this using a declarative macro_rules! macro.

    Here is what I came up with:

    /// Macro for declaring a symbolic `Function`
    ///
    /// # Example
    ///
    /// ```
    /// let function = symbolic!(fn (a, b) -> a + b^2);
    /// let functione = Function {
    ///     parameters: vec![String::from("a"), String::from("b")],
    ///     expression: Node::Symb(Symbolic::Symbol(String::from("a"))) + (Node::Symb(Symbolic::Symbol(String::from("b"))) ^ Node::Symb(Symbolic::Rational(2, 1))),
    /// };
    /// assert_eq!(function, functione);
    /// ```
    macro_rules! symbolic {
        // Main entry point
        ( fn ($($params:ident),* $(,)*) -> $($expression:tt)* ) => {
            Function {
                // Extract parameters
                parameters: vec![
                    $( String::from(stringify!($params)), )*
                ],
                // Pass expression to tt muncher
                // Starting with an empty accumulator
                expression: symbolic!(@munch ( $($expression)* ) -> ()),
            }
        };
        // Handle exponentiation with ^ as highest priority
        //
        // Capture the left (base) and right (exponent) sides as raw token trees,
        // which helpfully captures parenthesized expressions
        ( @munch ($base:tt^$exponent:tt $($rest:tt)*) -> ($($accum:tt)*) ) => {
            // Pass the rest of the expression to continue parsing recursively
            symbolic!(@munch ( $($rest)* ) -> (
                // Append the exponentiation wrapped in parens to the accumulator
                $($accum)*
                // Parse the base and exponent as expressions
                (symbolic!(@munch ($base) -> ()) ^ symbolic!(@munch ($exponent) -> ())) 
            ))
        };
        // Handle parenthesized expressions directly
        // 
        // Unwrap the parenthesis
        ( @munch (($($expression:tt)*) $($rest:tt)*) -> ($($accum:tt)*) ) => {
            // Pass the rest of the expression to continue parsing recursively
            symbolic!(@munch ( $($rest)* ) -> (
                // Append the expression parse invocation to the accumulator
                $($accum)*
                // Parse the inner expression
                // This is wrapped in parens by the final munch case
                symbolic!(@munch ( $($expression)* ) -> ())
            ))
        };
        // Handle division of two literal integers as a single rational
        //
        // Capture the left (numerator) and right (denominator) sides,
        // and pass them through as a rational
        ( @munch ($numerator:literal/$denominator:literal $($rest:tt)*) -> ($($accum:tt)*) ) => {
            // Pass the rest of the expression to continue parsing recursively
            symbolic!(@munch ( $($rest)* ) -> (
                // Append the rational to the accumulator
                $($accum)*
                Node::Symb(Symbolic::Rational($numerator, $denominator))
            ))
        };
        // Handle a single literal number as a rational with denominator of 1
        ( @munch ($num:literal $($rest:tt)*) -> ($($accum:tt)*) ) => {
            // Pass the rest of the expression to continue parsing recursively
            symbolic!(@munch ( $($rest)* ) -> (
                // Append the rational to the accumulator
                $($accum)*
                Node::Symb(Symbolic::Rational($num, 1))
            ))
        };
        // Handle a parameter name
        ( @munch ($param:ident $($rest:tt)*) -> ($($accum:tt)*) ) => {
            // Pass the rest of the expression to continue parsing recursively
            symbolic!(@munch ( $($rest)* ) -> (
                // Append the parameter symbol to the accumulator
                $($accum)*
                Node::Symb(Symbolic::Symbol(String::from(stringify!($param))))
            ))
        };
        // Pass through operators directly
        //
        // For better ergonomics, you may want to handle each operator separately,
        // as this will allow literally any token through
        ( @munch ($op:tt $($rest:tt)*) -> ($($accum:tt)*) ) => {
            symbolic!(@munch ( $($rest)* ) -> ( $($accum)* $op ))
        };
        // Handle the final output when all tokens have been handled
        ( @munch () -> ($($accum:tt)*) ) => {
            // Just return the final accumulated Rust expression
            ( $($accum)* )
        };
    }
    

    This macro is a lot. The first thing you might pick out is a lot of patterns that look like this:

    ( @munch ($first:thing $($rest:tt)*) -> ($($accum:tt)*) )
    

    This is a combination of two patterns:

    I highly recommend reading through that macro book, especially those pages. Here is a quick explanation.

    Incremental TT munchers allow the declarative macro to handle a token stream bit by bit. You give a pattern for the bit you want matched, and then the $($rest:tt)* pattern matches any sequence of tokens after it. By passing through $($rest)* to a subsequent invocation, the whole token sequence can be handled.

    Push-down Accumulation is way of working around the fact that a macro must always produce a valid item. Essentially, macros can't return something like 1, 2 because that's not a valid expression. Instead, we have to pass through an accumulated value and append to it with each subsequent macro invocation.


    However, I don't think that macro with overloading operators is really what would serve you best. I think a better way would be to have a simpler macro that just converts tokens as a kind of lexer, and then parse the output of that yourself.

    That way, you could completely control operator precedence, etc.

    Here's a macro that will do the simpler lexing instead:

    fn parse_symbolic_expression(symbols: Vec<Symbolic>) -> Node {
        todo!()
    }
    
    macro_rules! symbolic2 {
        ( fn ($($params:ident),* $(,)?) -> $($expression:tt)* ) => {
            Function {
                // Extract parameters
                parameters: vec![
                    $( String::from(stringify!($params)), )*
                ],
                expression: parse_symbolic_expression(
                    symbolic2!(@munch ( $($expression)* ) -> [])
                ),
            }
        };
    
        ( @munch ($num:literal $($rest:tt)*) -> [$($accum:tt)*] ) => {
            symbolic2!(@munch ( $($rest)* ) -> [ $($accum)* Symbolic::Integer($num), ])
        };
        ( @munch ($param:ident $($rest:tt)*) -> [$($accum:tt)*] ) => {
            symbolic2!(@munch ( $($rest)* ) -> [ $($accum)* Symbolic::Symbol(String::from(stringify!($param))), ])
        };
        ( @munch (($($expression:tt)*) $($rest:tt)*) -> [$($accum:tt)*] ) => {
            symbolic2!(@munch ( $($rest)* ) -> [ $($accum)* Symbolic::Parenthesized(symbolic2!(@munch ( $($expression)* ) -> [])), ])
        };
        ( @munch (+ $($rest:tt)*) -> [$($accum:tt)*] ) => {
            symbolic2!(@munch ( $($rest)* ) -> [ $($accum)* Symbolic::BinaryOperator('+'), ])
        };
        ( @munch (- $($rest:tt)*) -> [$($accum:tt)*] ) => {
            symbolic2!(@munch ( $($rest)* ) -> [ $($accum)* Symbolic::BinaryOperator('-'), ])
        };
        ( @munch (/ $($rest:tt)*) -> [$($accum:tt)*] ) => {
            symbolic2!(@munch ( $($rest)* ) -> [ $($accum)* Symbolic::BinaryOperator('/'), ])
        };
        ( @munch (* $($rest:tt)*) -> [$($accum:tt)*] ) => {
            symbolic2!(@munch ( $($rest)* ) -> [ $($accum)* Symbolic::BinaryOperator('*'), ])
        };
        ( @munch (^ $($rest:tt)*) -> [$($accum:tt)*] ) => {
            symbolic2!(@munch ( $($rest)* ) -> [ $($accum)* Symbolic::BinaryOperator('^'), ])
        };
        ( @munch () -> [$($accum:tt)*] ) => {
            vec![ $($accum)* ]
        };
    }