|
|
from sympy.external import import_module import os
cin = import_module('clang.cindex', import_kwargs = {'fromlist': ['cindex']})
"""
This module contains all the necessary Classes and Function used to Parse C and C++ code into SymPy expression The module serves as a backend for SymPyExpression to parse C code It is also dependent on Clang's AST and SymPy's Codegen AST. The module only supports the features currently supported by the Clang and codegen AST which will be updated as the development of codegen AST and this module progresses. You might find unexpected bugs and exceptions while using the module, feel free to report them to the SymPy Issue Tracker
Features Supported ==================
- Variable Declarations (integers and reals) - Assignment (using integer & floating literal and function calls) - Function Definitions nad Declaration - Function Calls - Compound statements, Return statements
Notes =====
The module is dependent on an external dependency which needs to be installed to use the features of this module.
Clang: The C and C++ compiler which is used to extract an AST from the provided C source code.
Refrences =========
.. [1] https://github.com/sympy/sympy/issues .. [2] https://clang.llvm.org/docs/ .. [3] https://clang.llvm.org/docs/IntroductionToTheClangAST.html
"""
if cin: from sympy.codegen.ast import (Variable, Integer, Float, FunctionPrototype, FunctionDefinition, FunctionCall, none, Return, Assignment, intc, int8, int16, int64, uint8, uint16, uint32, uint64, float32, float64, float80, aug_assign, bool_, While, CodeBlock) from sympy.codegen.cnodes import (PreDecrement, PostDecrement, PreIncrement, PostIncrement) from sympy.core import Add, Mod, Mul, Pow, Rel from sympy.logic.boolalg import And, as_Boolean, Not, Or from sympy.core.symbol import Symbol from sympy.core.sympify import sympify from sympy.logic.boolalg import (false, true) import sys import tempfile
class BaseParser: """Base Class for the C parser"""
def __init__(self): """Initializes the Base parser creating a Clang AST index""" self.index = cin.Index.create()
def diagnostics(self, out): """Diagostics function for the Clang AST""" for diag in self.tu.diagnostics: print('%s %s (line %s, col %s) %s' % ( { 4: 'FATAL', 3: 'ERROR', 2: 'WARNING', 1: 'NOTE', 0: 'IGNORED', }[diag.severity], diag.location.file, diag.location.line, diag.location.column, diag.spelling ), file=out)
class CCodeConverter(BaseParser): """The Code Convereter for Clang AST
The converter object takes the C source code or file as input and converts them to SymPy Expressions. """
def __init__(self): """Initializes the code converter""" super().__init__() self._py_nodes = [] self._data_types = { "void": { cin.TypeKind.VOID: none }, "bool": { cin.TypeKind.BOOL: bool_ }, "int": { cin.TypeKind.SCHAR: int8, cin.TypeKind.SHORT: int16, cin.TypeKind.INT: intc, cin.TypeKind.LONG: int64, cin.TypeKind.UCHAR: uint8, cin.TypeKind.USHORT: uint16, cin.TypeKind.UINT: uint32, cin.TypeKind.ULONG: uint64 }, "float": { cin.TypeKind.FLOAT: float32, cin.TypeKind.DOUBLE: float64, cin.TypeKind.LONGDOUBLE: float80 } }
def parse(self, filenames, flags): """Function to parse a file with C source code
It takes the filename as an attribute and creates a Clang AST Translation Unit parsing the file. Then the transformation function is called on the transaltion unit, whose reults are collected into a list which is returned by the function.
Parameters ==========
filenames : string Path to the C file to be parsed
flags: list Arguments to be passed to Clang while parsing the C code
Returns =======
py_nodes: list A list of SymPy AST nodes
"""
filename = os.path.abspath(filenames) self.tu = self.index.parse( filename, args=flags, options=cin.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD ) for child in self.tu.cursor.get_children(): if child.kind == cin.CursorKind.VAR_DECL: self._py_nodes.append(self.transform(child)) elif (child.kind == cin.CursorKind.FUNCTION_DECL): self._py_nodes.append(self.transform(child)) else: pass return self._py_nodes
def parse_str(self, source, flags): """Function to parse a string with C source code
It takes the source code as an attribute, stores it in a temporary file and creates a Clang AST Translation Unit parsing the file. Then the transformation function is called on the transaltion unit, whose reults are collected into a list which is returned by the function.
Parameters ==========
source : string Path to the C file to be parsed
flags: list Arguments to be passed to Clang while parsing the C code
Returns =======
py_nodes: list A list of SymPy AST nodes
"""
file = tempfile.NamedTemporaryFile(mode = 'w+', suffix = '.cpp') file.write(source) file.seek(0) self.tu = self.index.parse( file.name, args=flags, options=cin.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD ) file.close() for child in self.tu.cursor.get_children(): if child.kind == cin.CursorKind.VAR_DECL: self._py_nodes.append(self.transform(child)) elif (child.kind == cin.CursorKind.FUNCTION_DECL): self._py_nodes.append(self.transform(child)) else: pass return self._py_nodes
def transform(self, node): """Transformation Function for Clang AST nodes
It determines the kind of node and calls the respective transformation function for that node.
Raises ======
NotImplementedError : if the transformation for the provided node is not implemented
"""
try: handler = getattr(self, 'transform_%s' % node.kind.name.lower()) except AttributeError: print( "Ignoring node of type %s (%s)" % ( node.kind, ' '.join( t.spelling for t in node.get_tokens()) ), file=sys.stderr ) handler = None if handler: result = handler(node) return result
def transform_var_decl(self, node): """Transformation Function for Variable Declaration
Used to create nodes for variable declarations and assignments with values or function call for the respective nodes in the clang AST
Returns =======
A variable node as Declaration, with the initial value if given
Raises ======
NotImplementedError : if called for data types not currently implemented
Notes =====
The function currently supports following data types:
Boolean: bool, _Bool
Integer: 8-bit: signed char and unsigned char 16-bit: short, short int, signed short, signed short int, unsigned short, unsigned short int 32-bit: int, signed int, unsigned int 64-bit: long, long int, signed long, signed long int, unsigned long, unsigned long int
Floating point: Single Precision: float Double Precision: double Extended Precision: long double
"""
if node.type.kind in self._data_types["int"]: type = self._data_types["int"][node.type.kind] elif node.type.kind in self._data_types["float"]: type = self._data_types["float"][node.type.kind] elif node.type.kind in self._data_types["bool"]: type = self._data_types["bool"][node.type.kind] else: raise NotImplementedError("Only bool, int " "and float are supported") try: children = node.get_children() child = next(children) #ignoring namespace and type details for the variable while child.kind == cin.CursorKind.NAMESPACE_REF: child = next(children)
while child.kind == cin.CursorKind.TYPE_REF: child = next(children)
val = self.transform(child)
supported_rhs = [ cin.CursorKind.INTEGER_LITERAL, cin.CursorKind.FLOATING_LITERAL, cin.CursorKind.UNEXPOSED_EXPR, cin.CursorKind.BINARY_OPERATOR, cin.CursorKind.PAREN_EXPR, cin.CursorKind.UNARY_OPERATOR, cin.CursorKind.CXX_BOOL_LITERAL_EXPR ]
if child.kind in supported_rhs: if isinstance(val, str): value = Symbol(val) elif isinstance(val, bool): if node.type.kind in self._data_types["int"]: value = Integer(0) if val == False else Integer(1) elif node.type.kind in self._data_types["float"]: value = Float(0.0) if val == False else Float(1.0) elif node.type.kind in self._data_types["bool"]: value = sympify(val) elif isinstance(val, (Integer, int, Float, float)): if node.type.kind in self._data_types["int"]: value = Integer(val) elif node.type.kind in self._data_types["float"]: value = Float(val) elif node.type.kind in self._data_types["bool"]: value = sympify(bool(val)) else: value = val
return Variable( node.spelling ).as_Declaration( type = type, value = value )
elif child.kind == cin.CursorKind.CALL_EXPR: return Variable( node.spelling ).as_Declaration( value = val )
else: raise NotImplementedError("Given " "variable declaration \"{}\" " "is not possible to parse yet!" .format(" ".join( t.spelling for t in node.get_tokens() ) ))
except StopIteration: return Variable( node.spelling ).as_Declaration( type = type )
def transform_function_decl(self, node): """Transformation Function For Function Declaration
Used to create nodes for function declarations and definitions for the respective nodes in the clang AST
Returns =======
function : Codegen AST node - FunctionPrototype node if function body is not present - FunctionDefinition node if the function body is present
"""
if node.result_type.kind in self._data_types["int"]: ret_type = self._data_types["int"][node.result_type.kind] elif node.result_type.kind in self._data_types["float"]: ret_type = self._data_types["float"][node.result_type.kind] elif node.result_type.kind in self._data_types["bool"]: ret_type = self._data_types["bool"][node.result_type.kind] elif node.result_type.kind in self._data_types["void"]: ret_type = self._data_types["void"][node.result_type.kind] else: raise NotImplementedError("Only void, bool, int " "and float are supported") body = [] param = [] try: children = node.get_children() child = next(children)
# If the node has any children, the first children will be the # return type and namespace for the function declaration. These # nodes can be ignored. while child.kind == cin.CursorKind.NAMESPACE_REF: child = next(children)
while child.kind == cin.CursorKind.TYPE_REF: child = next(children)
# Subsequent nodes will be the parameters for the function. try: while True: decl = self.transform(child) if (child.kind == cin.CursorKind.PARM_DECL): param.append(decl) elif (child.kind == cin.CursorKind.COMPOUND_STMT): for val in decl: body.append(val) else: body.append(decl) child = next(children) except StopIteration: pass except StopIteration: pass
if body == []: function = FunctionPrototype( return_type = ret_type, name = node.spelling, parameters = param ) else: function = FunctionDefinition( return_type = ret_type, name = node.spelling, parameters = param, body = body ) return function
def transform_parm_decl(self, node): """Transformation function for Parameter Declaration
Used to create parameter nodes for the required functions for the respective nodes in the clang AST
Returns =======
param : Codegen AST Node Variable node with the value nad type of the variable
Raises ======
ValueError if multiple children encountered in the parameter node
"""
if node.type.kind in self._data_types["int"]: type = self._data_types["int"][node.type.kind] elif node.type.kind in self._data_types["float"]: type = self._data_types["float"][node.type.kind] elif node.type.kind in self._data_types["bool"]: type = self._data_types["bool"][node.type.kind] else: raise NotImplementedError("Only bool, int " "and float are supported") try: children = node.get_children() child = next(children)
# Any namespace nodes can be ignored while child.kind in [cin.CursorKind.NAMESPACE_REF, cin.CursorKind.TYPE_REF, cin.CursorKind.TEMPLATE_REF]: child = next(children)
# If there is a child, it is the default value of the parameter. lit = self.transform(child) if node.type.kind in self._data_types["int"]: val = Integer(lit) elif node.type.kind in self._data_types["float"]: val = Float(lit) elif node.type.kind in self._data_types["bool"]: val = sympify(bool(lit)) else: raise NotImplementedError("Only bool, int " "and float are supported")
param = Variable( node.spelling ).as_Declaration( type = type, value = val ) except StopIteration: param = Variable( node.spelling ).as_Declaration( type = type )
try: self.transform(next(children)) raise ValueError("Can't handle multiple children on parameter") except StopIteration: pass
return param
def transform_integer_literal(self, node): """Transformation function for integer literal
Used to get the value and type of the given integer literal.
Returns =======
val : list List with two arguments type and Value type contains the type of the integer value contains the value stored in the variable
Notes =====
Only Base Integer type supported for now
"""
try: value = next(node.get_tokens()).spelling except StopIteration: # No tokens value = node.literal return int(value)
def transform_floating_literal(self, node): """Transformation function for floating literal
Used to get the value and type of the given floating literal.
Returns =======
val : list List with two arguments type and Value type contains the type of float value contains the value stored in the variable
Notes =====
Only Base Float type supported for now
"""
try: value = next(node.get_tokens()).spelling except (StopIteration, ValueError): # No tokens value = node.literal return float(value)
def transform_string_literal(self, node): #TODO: No string type in AST #type = #try: # value = next(node.get_tokens()).spelling #except (StopIteration, ValueError): # No tokens # value = node.literal #val = [type, value] #return val pass
def transform_character_literal(self, node): """Transformation function for character literal
Used to get the value of the given character literal.
Returns =======
val : int val contains the ascii value of the character literal
Notes =====
Only for cases where character is assigned to a integer value, since character literal is not in SymPy AST
"""
try: value = next(node.get_tokens()).spelling except (StopIteration, ValueError): # No tokens value = node.literal return ord(str(value[1]))
def transform_cxx_bool_literal_expr(self, node): """Transformation function for boolean literal
Used to get the value of the given boolean literal.
Returns =======
value : bool value contains the boolean value of the variable
"""
try: value = next(node.get_tokens()).spelling except (StopIteration, ValueError): value = node.literal return True if value == 'true' else False
def transform_unexposed_decl(self,node): """Transformation function for unexposed declarations""" pass
def transform_unexposed_expr(self, node): """Transformation function for unexposed expression
Unexposed expressions are used to wrap float, double literals and expressions
Returns =======
expr : Codegen AST Node the result from the wrapped expression
None : NoneType No childs are found for the node
Raises ======
ValueError if the expression contains multiple children
"""
# Ignore unexposed nodes; pass whatever is the first # (and should be only) child unaltered. try: children = node.get_children() expr = self.transform(next(children)) except StopIteration: return None
try: next(children) raise ValueError("Unexposed expression has > 1 children.") except StopIteration: pass
return expr
def transform_decl_ref_expr(self, node): """Returns the name of the declaration reference""" return node.spelling
def transform_call_expr(self, node): """Transformation function for a call expression
Used to create function call nodes for the function calls present in the C code
Returns =======
FunctionCall : Codegen AST Node FunctionCall node with parameters if any parameters are present
"""
param = [] children = node.get_children() child = next(children)
while child.kind == cin.CursorKind.NAMESPACE_REF: child = next(children) while child.kind == cin.CursorKind.TYPE_REF: child = next(children)
first_child = self.transform(child) try: for child in children: arg = self.transform(child) if (child.kind == cin.CursorKind.INTEGER_LITERAL): param.append(Integer(arg)) elif (child.kind == cin.CursorKind.FLOATING_LITERAL): param.append(Float(arg)) else: param.append(arg) return FunctionCall(first_child, param)
except StopIteration: return FunctionCall(first_child)
def transform_return_stmt(self, node): """Returns the Return Node for a return statement""" return Return(next(node.get_children()).spelling)
def transform_compound_stmt(self, node): """Transformation function for compond statemets
Returns =======
expr : list list of Nodes for the expressions present in the statement
None : NoneType if the compound statement is empty
"""
try: expr = [] children = node.get_children() for child in children: expr.append(self.transform(child)) except StopIteration: return None return expr
def transform_decl_stmt(self, node): """Transformation function for declaration statements
These statements are used to wrap different kinds of declararions like variable or function declaration The function calls the transformer function for the child of the given node
Returns =======
statement : Codegen AST Node contains the node returned by the children node for the type of declaration
Raises ======
ValueError if multiple children present
"""
try: children = node.get_children() statement = self.transform(next(children)) except StopIteration: pass
try: self.transform(next(children)) raise ValueError("Don't know how to handle multiple statements") except StopIteration: pass
return statement
def transform_paren_expr(self, node): """Transformation function for Parenthesized expressions
Returns the result from its children nodes
"""
return self.transform(next(node.get_children()))
def transform_compound_assignment_operator(self, node): """Transformation function for handling shorthand operators
Returns =======
augmented_assignment_expression: Codegen AST node shorthand assignment expression represented as Codegen AST
Raises ======
NotImplementedError If the shorthand operator for bitwise operators (~=, ^=, &=, |=, <<=, >>=) is encountered
"""
return self.transform_binary_operator(node)
def transform_unary_operator(self, node): """Transformation function for handling unary operators
Returns =======
unary_expression: Codegen AST node simplified unary expression represented as Codegen AST
Raises ======
NotImplementedError If dereferencing operator(*), address operator(&) or bitwise NOT operator(~) is encountered
"""
# supported operators list operators_list = ['+', '-', '++', '--', '!'] tokens = [token for token in node.get_tokens()]
# it can be either pre increment/decrement or any other operator from the list if tokens[0].spelling in operators_list: child = self.transform(next(node.get_children())) # (decl_ref) e.g.; int a = ++b; or simply ++b; if isinstance(child, str): if tokens[0].spelling == '+': return Symbol(child) if tokens[0].spelling == '-': return Mul(Symbol(child), -1) if tokens[0].spelling == '++': return PreIncrement(Symbol(child)) if tokens[0].spelling == '--': return PreDecrement(Symbol(child)) if tokens[0].spelling == '!': return Not(Symbol(child)) # e.g.; int a = -1; or int b = -(1 + 2); else: if tokens[0].spelling == '+': return child if tokens[0].spelling == '-': return Mul(child, -1) if tokens[0].spelling == '!': return Not(sympify(bool(child)))
# it can be either post increment/decrement # since variable name is obtained in token[0].spelling elif tokens[1].spelling in ['++', '--']: child = self.transform(next(node.get_children())) if tokens[1].spelling == '++': return PostIncrement(Symbol(child)) if tokens[1].spelling == '--': return PostDecrement(Symbol(child)) else: raise NotImplementedError("Dereferencing operator, " "Address operator and bitwise NOT operator " "have not been implemented yet!")
def transform_binary_operator(self, node): """Transformation function for handling binary operators
Returns =======
binary_expression: Codegen AST node simplified binary expression represented as Codegen AST
Raises ======
NotImplementedError If a bitwise operator or unary operator(which is a child of any binary operator in Clang AST) is encountered
"""
# get all the tokens of assignment # and store it in the tokens list tokens = [token for token in node.get_tokens()]
# supported operators list operators_list = ['+', '-', '*', '/', '%','=', '>', '>=', '<', '<=', '==', '!=', '&&', '||', '+=', '-=', '*=', '/=', '%=']
# this stack will contain variable content # and type of variable in the rhs combined_variables_stack = []
# this stack will contain operators # to be processed in the rhs operators_stack = []
# iterate through every token for token in tokens: # token is either '(', ')' or # any of the supported operators from the operator list if token.kind == cin.TokenKind.PUNCTUATION:
# push '(' to the operators stack if token.spelling == '(': operators_stack.append('(')
elif token.spelling == ')': # keep adding the expression to the # combined variables stack unless # '(' is found while (operators_stack and operators_stack[-1] != '('): if len(combined_variables_stack) < 2: raise NotImplementedError( "Unary operators as a part of " "binary operators is not " "supported yet!") rhs = combined_variables_stack.pop() lhs = combined_variables_stack.pop() operator = operators_stack.pop() combined_variables_stack.append( self.perform_operation( lhs, rhs, operator))
# pop '(' operators_stack.pop()
# token is an operator (supported) elif token.spelling in operators_list: while (operators_stack and self.priority_of(token.spelling) <= self.priority_of( operators_stack[-1])): if len(combined_variables_stack) < 2: raise NotImplementedError( "Unary operators as a part of " "binary operators is not " "supported yet!") rhs = combined_variables_stack.pop() lhs = combined_variables_stack.pop() operator = operators_stack.pop() combined_variables_stack.append( self.perform_operation( lhs, rhs, operator))
# push current operator operators_stack.append(token.spelling)
# token is a bitwise operator elif token.spelling in ['&', '|', '^', '<<', '>>']: raise NotImplementedError( "Bitwise operator has not been " "implemented yet!")
# token is a shorthand bitwise operator elif token.spelling in ['&=', '|=', '^=', '<<=', '>>=']: raise NotImplementedError( "Shorthand bitwise operator has not been " "implemented yet!") else: raise NotImplementedError( "Given token {} is not implemented yet!" .format(token.spelling))
# token is an identifier(variable) elif token.kind == cin.TokenKind.IDENTIFIER: combined_variables_stack.append( [token.spelling, 'identifier'])
# token is a literal elif token.kind == cin.TokenKind.LITERAL: combined_variables_stack.append( [token.spelling, 'literal'])
# token is a keyword, either true or false elif (token.kind == cin.TokenKind.KEYWORD and token.spelling in ['true', 'false']): combined_variables_stack.append( [token.spelling, 'boolean']) else: raise NotImplementedError( "Given token {} is not implemented yet!" .format(token.spelling))
# process remaining operators while operators_stack: if len(combined_variables_stack) < 2: raise NotImplementedError( "Unary operators as a part of " "binary operators is not " "supported yet!") rhs = combined_variables_stack.pop() lhs = combined_variables_stack.pop() operator = operators_stack.pop() combined_variables_stack.append( self.perform_operation(lhs, rhs, operator))
return combined_variables_stack[-1][0]
def priority_of(self, op): """To get the priority of given operator""" if op in ['=', '+=', '-=', '*=', '/=', '%=']: return 1 if op in ['&&', '||']: return 2 if op in ['<', '<=', '>', '>=', '==', '!=']: return 3 if op in ['+', '-']: return 4 if op in ['*', '/', '%']: return 5 return 0
def perform_operation(self, lhs, rhs, op): """Performs operation supported by the SymPy core
Returns =======
combined_variable: list contains variable content and type of variable
"""
lhs_value = self.get_expr_for_operand(lhs) rhs_value = self.get_expr_for_operand(rhs) if op == '+': return [Add(lhs_value, rhs_value), 'expr'] if op == '-': return [Add(lhs_value, -rhs_value), 'expr'] if op == '*': return [Mul(lhs_value, rhs_value), 'expr'] if op == '/': return [Mul(lhs_value, Pow(rhs_value, Integer(-1))), 'expr'] if op == '%': return [Mod(lhs_value, rhs_value), 'expr'] if op in ['<', '<=', '>', '>=', '==', '!=']: return [Rel(lhs_value, rhs_value, op), 'expr'] if op == '&&': return [And(as_Boolean(lhs_value), as_Boolean(rhs_value)), 'expr'] if op == '||': return [Or(as_Boolean(lhs_value), as_Boolean(rhs_value)), 'expr'] if op == '=': return [Assignment(Variable(lhs_value), rhs_value), 'expr'] if op in ['+=', '-=', '*=', '/=', '%=']: return [aug_assign(Variable(lhs_value), op[0], rhs_value), 'expr']
def get_expr_for_operand(self, combined_variable): """Gives out SymPy Codegen AST node
AST node returned is corresponding to combined variable passed.Combined variable contains variable content and type of variable
"""
if combined_variable[1] == 'identifier': return Symbol(combined_variable[0]) if combined_variable[1] == 'literal': if '.' in combined_variable[0]: return Float(float(combined_variable[0])) else: return Integer(int(combined_variable[0])) if combined_variable[1] == 'expr': return combined_variable[0] if combined_variable[1] == 'boolean': return true if combined_variable[0] == 'true' else false
def transform_null_stmt(self, node): """Handles Null Statement and returns None""" return none
def transform_while_stmt(self, node): """Transformation function for handling while statement
Returns =======
while statement : Codegen AST Node contains the while statement node having condition and statement block
"""
children = node.get_children()
condition = self.transform(next(children)) statements = self.transform(next(children))
if isinstance(statements, list): statement_block = CodeBlock(*statements) else: statement_block = CodeBlock(statements)
return While(condition, statement_block)
else: class CCodeConverter(): # type: ignore def __init__(self, *args, **kwargs): raise ImportError("Module not Installed")
def parse_c(source): """Function for converting a C source code
The function reads the source code present in the given file and parses it to give out SymPy Expressions
Returns =======
src : list List of Python expression strings
"""
converter = CCodeConverter() if os.path.exists(source): src = converter.parse(source, flags = []) else: src = converter.parse_str(source, flags = []) return src
|