I'm trying to get the token position in the string when using pyparsing. I want to report the location of an include guard issue in C files:
import pyparsing as pp
m = None
n = None
#a sample C header file
lines = "\
#ifndef HEADER_FILE_H\n\
#define HEADER_FILE_H 1\n\
\n\
\n\
/* code is here */\n\
\n\
#endif /* HEADER_FILE_H */\
"
LBRACE,RBRACE,LBRACK,RBRACK,LT,GT,LPAREN,RPAREN,DQ,SEMI = map(pp.Suppress,'{}[]<>()";')
CIDENT = pp.Word(pp.alphanums + "_") #any C identifier
LCOMMENT = pp.Suppress("/*")
RCOMMENT = pp.Suppress("*/")
last_line = lines.split("\n")[-1] #get last line
pound = pp.Literal("#") + pp.Suppress(pp.Optional(pp.White(" \t")))
ifndef = pound + pp.Literal("ifndef")
ifnotdefined = pound + pp.Literal("if") + pp.Literal("!defined")
define = pound + pp.Literal("define")
endif = pound + pp.Literal("endif")
comment = pp.Optional(LCOMMENT + CIDENT("guardname_endif") + RCOMMENT)("guard_end_comment")
includeguardifndef = pp.Or([ifndef, ifnotdefined]) + pp.Optional(LPAREN) + CIDENT("guardname_ifndef_val") + pp.Optional(RPAREN)
includeguard = define + CIDENT("guardname_define_val") + pp.Optional(pp.Literal("1")("guard_is_one"))
includeguard_top = includeguardifndef + includeguard
includeguardendif = endif("includeguardendif") + comment
try:
m = includeguard_top.parseString(lines)
except pp.ParseException:
pass
try:
n = includeguardendif.parseString(last_line)
except pp.ParseException:
pass
print(m)
print(n)
Now when I get my match "m", I can get m.guardname_define_value, and ultimately I want to get something like m.guardname_define_value.pos, which is the position of the match in "lines".
I arrived at this question which gets me nearly there, but I can't figure out how to still get named ranges with the tokens? I don't want to use magic numbers to get the position at the end of the match.
I'm no stranger to regex, but I'm new to pyparsing and pretty amazed with how powerful and clear it is. Really enjoying it. If there are suggestions on what I've done above I'll take that too.
m.guardname_define_value[0][0]
to get the position#ifdef ... #endif
that magically match? (maybe if opener is '#ifdef' and closer is '#endif'?)".*(FOO).*"
would consume and discard anything until it found and captured FOO, then consume and discard anything after it, I'm having a hard time replicating that.Thank you.
Here is your sample code, with some slight modifications. (I really dislike backslashes, and you might find working with triple quotes easier on the eyes when embedding text samples in your scripts.) Notably, I'm showing the use of locatedExpr
. It may be what you really wanted to see. Also, see this SO question: Pyparsing: get token location in results name
import pyparsing as pp
#a sample C header file
lines = """
#ifndef HEADER_FILE_H
#define HEADER_FILE_H 1
/* code is here */
#endif /* HEADER_FILE_H */
"""
LBRACE,RBRACE,LBRACK,RBRACK,LT,GT,LPAREN,RPAREN,DQ,SEMI = map(pp.Suppress,'{}[]<>()";')
CIDENT = pp.Word(pp.alphas + "_", pp.alphanums + "_") #any C identifier
LCOMMENT = pp.Suppress("/*")
RCOMMENT = pp.Suppress("*/")
def make_directive(s, pound=pp.Literal("#")):
return pp.Combine(pound + s, adjacent=False)
ifndef = make_directive("ifndef")
ifnotdefined = make_directive("if") + pp.Literal("!defined")
define = make_directive("define")
endif = make_directive("endif")
comment = pp.Optional(LCOMMENT
+ CIDENT("guardname_endif")
+ RCOMMENT)("guard_end_comment")
includeguardifndef = ((ifndef | ifnotdefined)
+ pp.Optional(LPAREN)
+ CIDENT("guardname_ifndef_val")
+ pp.Optional(RPAREN))
includeguard = (define
+ CIDENT("guardname_define_val")
+ pp.Optional(pp.Literal("1")("guard_is_one")))
includeguard_top = includeguardifndef + includeguard
includeguardendif = endif("includeguardendif") + comment
# parse the header
parser = includeguard_top + pp.SkipTo(includeguardendif).suppress() + includeguardendif
print(parser.parseString(lines).dump())
# parse the header, with locns
loc = pp.locatedExpr
parser = loc(includeguard_top) + pp.SkipTo(includeguardendif).suppress() + loc(includeguardendif)
print(parser.parseString(lines).dump())
Prints:
['#ifndef', 'HEADER_FILE_H', '#define', 'HEADER_FILE_H', '1', '#endif', 'HEADER_FILE_H']
- guard_end_comment: ['HEADER_FILE_H']
- guard_is_one: '1'
- guardname_define_val: 'HEADER_FILE_H'
- guardname_endif: 'HEADER_FILE_H'
- guardname_ifndef_val: 'HEADER_FILE_H'
- includeguardendif: '#endif'
[[1, '#ifndef', 'HEADER_FILE_H', '#define', 'HEADER_FILE_H', '1', 46], [69, '#endif', 'HEADER_FILE_H', 95]]
[0]:
[1, '#ifndef', 'HEADER_FILE_H', '#define', 'HEADER_FILE_H', '1', 46]
- guard_is_one: '1'
- guardname_define_val: 'HEADER_FILE_H'
- guardname_ifndef_val: 'HEADER_FILE_H'
- locn_end: 46
- locn_start: 1
- value: ['#ifndef', 'HEADER_FILE_H', '#define', 'HEADER_FILE_H', '1']
[1]:
[69, '#endif', 'HEADER_FILE_H', 95]
- guard_end_comment: ['HEADER_FILE_H']
- guardname_endif: 'HEADER_FILE_H'
- includeguardendif: '#endif'
- locn_end: 95
- locn_start: 69
- value: ['#endif', 'HEADER_FILE_H']