2012-12-23 22:35:35 +08:00
|
|
|
#!/usr/bin/python3 -tt
|
|
|
|
|
|
|
|
# Copyright 2012 Jussi Pakkanen
|
|
|
|
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
import ply.lex as lex
|
|
|
|
import ply.yacc as yacc
|
2012-12-27 00:28:06 +08:00
|
|
|
import nodes
|
2012-12-23 22:35:35 +08:00
|
|
|
|
2013-01-26 02:40:00 +08:00
|
|
|
reserved = {'true' : 'TRUE',
|
2013-01-26 03:06:08 +08:00
|
|
|
'false' : 'FALSE',
|
|
|
|
'if' : 'IF',
|
2013-01-26 03:25:52 +08:00
|
|
|
'endif' : 'ENDIF',
|
|
|
|
'else' : 'ELSE',
|
|
|
|
}
|
2013-01-26 02:40:00 +08:00
|
|
|
|
2012-12-23 22:54:52 +08:00
|
|
|
tokens = ['LPAREN',
|
|
|
|
'RPAREN',
|
2012-12-23 23:19:26 +08:00
|
|
|
'LBRACKET',
|
|
|
|
'RBRACKET',
|
2012-12-24 00:54:53 +08:00
|
|
|
'LBRACE',
|
|
|
|
'RBRACE',
|
2012-12-26 00:51:34 +08:00
|
|
|
'ATOM',
|
2012-12-23 22:54:52 +08:00
|
|
|
'COMMENT',
|
2013-01-26 03:29:59 +08:00
|
|
|
'ASSIGN',
|
2013-01-26 03:59:53 +08:00
|
|
|
'EQUALS',
|
|
|
|
'NEQUALS',
|
2012-12-23 22:54:52 +08:00
|
|
|
'COMMA',
|
2012-12-23 23:02:39 +08:00
|
|
|
'DOT',
|
2012-12-23 23:09:28 +08:00
|
|
|
'STRING',
|
2013-01-28 04:18:40 +08:00
|
|
|
'INT',
|
2012-12-23 23:09:28 +08:00
|
|
|
'EOL_CONTINUE',
|
|
|
|
'EOL',
|
2013-01-26 02:40:00 +08:00
|
|
|
] + list(reserved.values())
|
2012-12-23 22:54:52 +08:00
|
|
|
|
2013-01-26 03:29:59 +08:00
|
|
|
t_ASSIGN = '='
|
2013-01-26 03:59:53 +08:00
|
|
|
t_EQUALS = '=='
|
|
|
|
t_NEQUALS = '\!='
|
2012-12-23 22:54:52 +08:00
|
|
|
t_LPAREN = '\('
|
|
|
|
t_RPAREN = '\)'
|
2012-12-23 23:19:26 +08:00
|
|
|
t_LBRACKET = '\['
|
|
|
|
t_RBRACKET = '\]'
|
2012-12-24 00:54:53 +08:00
|
|
|
t_LBRACE = '\{'
|
|
|
|
t_RBRACE = '\}'
|
2012-12-23 22:54:52 +08:00
|
|
|
t_COMMENT = '\#[^\n]*'
|
|
|
|
t_COMMA = ','
|
|
|
|
t_DOT = '\.'
|
|
|
|
|
2012-12-23 23:09:28 +08:00
|
|
|
t_ignore = ' \t'
|
2012-12-23 22:54:52 +08:00
|
|
|
|
2013-01-26 02:40:00 +08:00
|
|
|
def t_ATOM(t):
|
|
|
|
'[a-zA-Z][_0-9a-zA-Z]*'
|
|
|
|
t.type = reserved.get(t.value, 'ATOM')
|
|
|
|
return t
|
|
|
|
|
2012-12-26 01:23:33 +08:00
|
|
|
def t_STRING(t):
|
|
|
|
"'[^']*'"
|
|
|
|
t.value = t.value[1:-1]
|
|
|
|
return t
|
|
|
|
|
2013-01-28 04:18:40 +08:00
|
|
|
def t_INT(t):
|
|
|
|
'[0-9]+'
|
|
|
|
t.value = int(t.value)
|
|
|
|
return t
|
|
|
|
|
2012-12-24 00:57:50 +08:00
|
|
|
def t_EOL(t):
|
|
|
|
r'\n'
|
|
|
|
t.lexer.lineno += 1
|
|
|
|
return t
|
|
|
|
|
|
|
|
def t_EOL_CONTINUE(t):
|
|
|
|
r'\\[ \t]*\n'
|
|
|
|
t.lexer.lineno += 1
|
|
|
|
|
2012-12-23 22:54:52 +08:00
|
|
|
def t_error(t):
|
|
|
|
print("Illegal character '%s'" % t.value[0])
|
|
|
|
t.lexer.skip(1)
|
|
|
|
|
2012-12-26 00:51:34 +08:00
|
|
|
# Yacc part
|
|
|
|
|
|
|
|
def p_codeblock(t):
|
|
|
|
'codeblock : statement EOL codeblock'
|
2012-12-27 00:50:49 +08:00
|
|
|
cb = t[3]
|
|
|
|
cb.prepend(t[1])
|
|
|
|
t[0] = cb
|
2012-12-27 03:38:36 +08:00
|
|
|
|
|
|
|
def p_codeblock_emptyline(t):
|
|
|
|
'codeblock : EOL codeblock'
|
|
|
|
t[0] = t[2]
|
2012-12-26 00:51:34 +08:00
|
|
|
|
|
|
|
def p_codeblock_last(t):
|
|
|
|
'codeblock : statement EOL'
|
2012-12-27 03:34:55 +08:00
|
|
|
cb = nodes.CodeBlock(t.lineno(1))
|
2012-12-27 00:50:49 +08:00
|
|
|
cb.prepend(t[1])
|
|
|
|
t[0] = cb
|
2012-12-26 01:04:22 +08:00
|
|
|
|
|
|
|
def p_expression_atom(t):
|
|
|
|
'expression : ATOM'
|
2012-12-27 03:34:55 +08:00
|
|
|
t[0] = nodes.AtomExpression(t[1], t.lineno(1))
|
2012-12-26 01:04:22 +08:00
|
|
|
|
2013-01-28 04:18:40 +08:00
|
|
|
def p_expression_int(t):
|
|
|
|
'expression : INT'
|
|
|
|
t[0] = nodes.IntExpression(t[1], t.lineno(1))
|
|
|
|
|
2013-01-26 02:40:00 +08:00
|
|
|
def p_expression_bool(t):
|
|
|
|
'''expression : TRUE
|
|
|
|
| FALSE'''
|
|
|
|
if t[1] == 'true':
|
|
|
|
t[0] = nodes.BoolExpression(True, t.lineno(1))
|
|
|
|
else:
|
|
|
|
t[0] = nodes.BoolExpression(False, t.lineno(1))
|
|
|
|
|
2012-12-26 01:04:22 +08:00
|
|
|
def p_expression_string(t):
|
|
|
|
'expression : STRING'
|
2012-12-27 03:34:55 +08:00
|
|
|
t[0] = nodes.StringExpression(t[1], t.lineno(1))
|
2012-12-26 00:51:34 +08:00
|
|
|
|
|
|
|
def p_statement_assign(t):
|
2013-01-26 03:29:59 +08:00
|
|
|
'statement : expression ASSIGN statement'
|
2012-12-27 03:34:55 +08:00
|
|
|
t[0] = nodes.Assignment(t[1], t[3], t.lineno(1))
|
2012-12-26 00:51:34 +08:00
|
|
|
|
2013-01-26 04:42:11 +08:00
|
|
|
def p_statement_comparison(t):
|
2013-01-26 03:59:53 +08:00
|
|
|
'''statement : statement EQUALS statement
|
|
|
|
| statement NEQUALS statement'''
|
|
|
|
t[0] = nodes.Comparison(t[1], t[2], t[3], t.lineno(1))
|
|
|
|
|
2013-01-26 04:42:11 +08:00
|
|
|
def p_statement_array(t):
|
|
|
|
'''statement : LBRACKET args RBRACKET'''
|
|
|
|
t[0] = nodes.ArrayStatement(t[2], t.lineno(1))
|
|
|
|
|
2012-12-26 00:51:34 +08:00
|
|
|
def p_statement_func_call(t):
|
|
|
|
'statement : expression LPAREN args RPAREN'
|
2012-12-27 03:34:55 +08:00
|
|
|
t[0] = nodes.FunctionCall(t[1], t[3], t.lineno(1))
|
2012-12-26 00:51:34 +08:00
|
|
|
|
|
|
|
def p_statement_method_call(t):
|
|
|
|
'statement : expression DOT expression LPAREN args RPAREN'
|
2012-12-27 03:34:55 +08:00
|
|
|
t[0] = nodes.MethodCall(t[1], t[3], t[5], t.lineno(1))
|
2012-12-26 00:51:34 +08:00
|
|
|
|
2013-01-26 03:06:08 +08:00
|
|
|
def p_statement_if(t):
|
2013-01-26 03:25:52 +08:00
|
|
|
'statement : IF LPAREN statement RPAREN EOL codeblock elseblock ENDIF'
|
|
|
|
t[0] = nodes.IfStatement(t[3], t[6], t[7], t.lineno(1))
|
|
|
|
|
|
|
|
def p_empty_else(t):
|
|
|
|
'elseblock : '
|
|
|
|
return None
|
|
|
|
|
|
|
|
def p_else(t):
|
|
|
|
'elseblock : ELSE EOL codeblock'
|
|
|
|
t[0] = t[3]
|
2013-01-26 03:06:08 +08:00
|
|
|
|
2012-12-26 00:51:34 +08:00
|
|
|
def p_statement_expression(t):
|
|
|
|
'statement : expression'
|
2012-12-27 00:50:49 +08:00
|
|
|
t[0] = nodes.statement_from_expression(t[1])
|
2012-12-26 00:51:34 +08:00
|
|
|
|
|
|
|
def p_args_multiple(t):
|
|
|
|
'args : statement COMMA args'
|
2012-12-27 00:50:49 +08:00
|
|
|
args = t[3]
|
|
|
|
args.prepend(t[1])
|
|
|
|
t[0] = args
|
2012-12-26 00:51:34 +08:00
|
|
|
|
|
|
|
def p_args_single(t):
|
|
|
|
'args : statement'
|
2012-12-27 03:34:55 +08:00
|
|
|
args = nodes.Arguments(t.lineno(1))
|
2012-12-27 00:50:49 +08:00
|
|
|
args.prepend(t[1])
|
|
|
|
t[0] = args
|
2012-12-26 00:51:34 +08:00
|
|
|
|
|
|
|
def p_args_none(t):
|
|
|
|
'args :'
|
2012-12-27 03:34:55 +08:00
|
|
|
t[0] = nodes.Arguments(t.lineno(0))
|
2012-12-26 00:51:34 +08:00
|
|
|
|
|
|
|
def p_error(t):
|
2013-01-02 00:03:30 +08:00
|
|
|
if t is None:
|
|
|
|
txt = 'NONE'
|
|
|
|
else:
|
|
|
|
txt = t.value
|
|
|
|
print('Parser errored out at: ' + txt)
|
2012-12-26 00:51:34 +08:00
|
|
|
|
2012-12-23 22:54:52 +08:00
|
|
|
def test_lexer():
|
|
|
|
s = """hello = (something) # this = (that)
|
2012-12-23 23:19:26 +08:00
|
|
|
two = ['file1', 'file2']
|
2012-12-24 00:54:53 +08:00
|
|
|
function(h) { stuff }
|
2012-12-23 23:11:59 +08:00
|
|
|
obj.method(lll, \\
|
2012-12-23 23:09:28 +08:00
|
|
|
'string')
|
2012-12-23 22:54:52 +08:00
|
|
|
"""
|
|
|
|
lexer = lex.lex()
|
|
|
|
lexer.input(s)
|
|
|
|
while True:
|
|
|
|
tok = lexer.token()
|
|
|
|
if not tok:
|
|
|
|
break
|
|
|
|
print(tok)
|
|
|
|
|
2012-12-26 00:51:34 +08:00
|
|
|
def test_parser():
|
2012-12-26 01:12:33 +08:00
|
|
|
code = """func_call('something', 'or else')
|
|
|
|
objectname.methodname(abc)
|
2012-12-27 03:38:36 +08:00
|
|
|
|
2012-12-27 03:34:55 +08:00
|
|
|
emptycall()"""
|
2012-12-27 02:58:48 +08:00
|
|
|
print(build_ast(code))
|
|
|
|
|
|
|
|
def build_ast(code):
|
2012-12-27 05:37:41 +08:00
|
|
|
code = code.rstrip() + '\n'
|
2012-12-27 00:50:49 +08:00
|
|
|
lex.lex()
|
2012-12-26 00:51:34 +08:00
|
|
|
parser = yacc.yacc()
|
|
|
|
result = parser.parse(code)
|
2012-12-27 02:58:48 +08:00
|
|
|
return result
|
2012-12-26 00:51:34 +08:00
|
|
|
|
2012-12-23 22:35:35 +08:00
|
|
|
if __name__ == '__main__':
|
2012-12-26 00:51:34 +08:00
|
|
|
#test_lexer()
|
|
|
|
test_parser()
|