diff --git a/src/solvers/Makefile b/src/solvers/Makefile index f1484b6b695..e49c9d3c3f6 100644 --- a/src/solvers/Makefile +++ b/src/solvers/Makefile @@ -203,6 +203,7 @@ SRC = $(BOOLEFORCE_SRC) \ smt2/smt2_conv.cpp \ smt2/smt2_dec.cpp \ smt2/smt2_parser.cpp \ + smt2/smt2_tokenizer.cpp \ smt2/smt2irep.cpp \ # Empty last line diff --git a/src/solvers/smt2/smt2_tokenizer.cpp b/src/solvers/smt2/smt2_tokenizer.cpp new file mode 100644 index 00000000000..ec6c3181fc3 --- /dev/null +++ b/src/solvers/smt2/smt2_tokenizer.cpp @@ -0,0 +1,286 @@ +/*******************************************************************\ + +Module: + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +#include "smt2_tokenizer.h" + +#include + +bool smt2_tokenizert::is_simple_symbol_character(char ch) +{ + // any non-empty sequence of letters, digits and the characters + // ~ ! @ $ % ^ & * _ - + = < > . ? / + // that does not start with a digit and is not a reserved word. + + return isalnum(ch) || + ch=='~' || ch=='!' || ch=='@' || ch=='$' || ch=='%' || + ch=='^' || ch=='&' || ch=='*' || ch=='_' || ch=='-' || + ch=='+' || ch=='=' || ch=='<' || ch=='>' || ch=='.' || + ch=='?' || ch=='/'; +} + +smt2_tokenizert::tokent smt2_tokenizert::get_simple_symbol() +{ + // any non-empty sequence of letters, digits and the characters + // ~ ! @ $ % ^ & * _ - + = < > . ? / + // that does not start with a digit and is not a reserved word. + + buffer.clear(); + + char ch; + while(in->get(ch)) + { + if(is_simple_symbol_character(ch)) + { + buffer+=ch; + } + else + { + in->unget(); // put back + return SYMBOL; + } + } + + // eof -- this is ok here + if(buffer.empty()) + return END_OF_FILE; + else + return SYMBOL; +} + +smt2_tokenizert::tokent smt2_tokenizert::get_decimal_numeral() +{ + // we accept any sequence of digits and dots + + buffer.clear(); + + char ch; + while(in->get(ch)) + { + if(isdigit(ch) || ch=='.') + { + buffer+=ch; + } + else + { + in->unget(); // put back + return NUMERAL; + } + } + + // eof -- this is ok here + if(buffer.empty()) + return END_OF_FILE; + else + return NUMERAL; +} + +smt2_tokenizert::tokent smt2_tokenizert::get_bin_numeral() +{ + // we accept any sequence of '0' or '1' + + buffer.clear(); + buffer+='#'; + buffer+='b'; + + char ch; + while(in->get(ch)) + { + if(ch=='0' || ch=='1') + { + buffer+=ch; + } + else + { + in->unget(); // put back + return NUMERAL; + } + } + + // eof -- this is ok here + if(buffer.empty()) + return END_OF_FILE; + else + return NUMERAL; +} + +smt2_tokenizert::tokent smt2_tokenizert::get_hex_numeral() +{ + // we accept any sequence of '0'-'9', 'a'-'f', 'A'-'F' + + buffer.clear(); + buffer+='#'; + buffer+='x'; + + char ch; + while(in->get(ch)) + { + if(isxdigit(ch)) + { + buffer+=ch; + } + else + { + in->unget(); // put back + return NUMERAL; + } + } + + // eof -- this is ok here + if(buffer.empty()) + return END_OF_FILE; + else + return NUMERAL; +} + +smt2_tokenizert::tokent smt2_tokenizert::get_quoted_symbol() +{ + // any sequence of printable ASCII characters (including space, + // tab, and line-breaking characters) except for the backslash + // character \, that starts and ends with | and does not otherwise + // contain | + + buffer.clear(); + + char ch; + while(in->get(ch)) + { + if(ch=='|') + return SYMBOL; // done + + buffer+=ch; + + if(ch=='\n') + line_no++; + } + + // Hmpf. Eof before end of quoted symbol. This is an error. + error() << "EOF within quoted symbol" << eom; + return ERROR; +} + +smt2_tokenizert::tokent smt2_tokenizert::get_string_literal() +{ + buffer.clear(); + + char ch; + while(in->get(ch)) + { + if(ch=='"') + { + // quotes may be escaped by repeating + if(in->get(ch)) + { + if(ch=='"') + { + } + else + { + in->unget(); + return STRING_LITERAL; // done + } + } + else + return STRING_LITERAL; // done + } + buffer+=ch; + } + + // Hmpf. Eof before end of string literal. This is an error. + error() << "EOF within string literal" << eom; + return ERROR; +} + +smt2_tokenizert::tokent smt2_tokenizert::next_token() +{ + if(peeked) + return peeked=false, token; + + char ch; + + while(in->get(ch)) + { + switch(ch) + { + case '\n': + line_no++; + break; + + case ' ': + case '\r': + case '\t': + case static_cast(160): // non-breaking space + // skip any whitespace + break; + + case ';': // comment + // skip until newline + while(in->get(ch) && ch!='\n') + { + // ignore + } + break; + + case '(': + // produce sub-expression + return token=OPEN; + + case ')': + // done with sub-expression + return token=CLOSE; + + case '|': // quoted symbol + return token=get_quoted_symbol(); + + case '"': // string literal + return token=get_string_literal(); + + case ':': // keyword + return token=get_simple_symbol(); + + case '#': + if(in->get(ch)) + { + if(ch=='b') + return token=get_bin_numeral(); + else if(ch=='x') + return token=get_hex_numeral(); + else + { + error() << "unknown numeral token" << eom; + return token=ERROR; + } + } + else + { + error() << "unexpected EOF in numeral token" << eom; + return token=ERROR; + } + break; + + default: // likely a simple symbol or a numeral + if(isdigit(ch)) + { + in->unget(); + return token=get_decimal_numeral(); + } + else if(is_simple_symbol_character(ch)) + { + in->unget(); + return token=get_simple_symbol(); + } + else + { + // illegal character, error + error() << "unexpected character `" << ch << '\'' << eom; + return token=ERROR; + } + } + } + + return token=END_OF_FILE; +} diff --git a/src/solvers/smt2/smt2_tokenizer.h b/src/solvers/smt2/smt2_tokenizer.h new file mode 100644 index 00000000000..328c5d8fcb1 --- /dev/null +++ b/src/solvers/smt2/smt2_tokenizer.h @@ -0,0 +1,69 @@ +/*******************************************************************\ + +Module: + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +#ifndef CPROVER_SOLVERS_SMT2_SMT2_TOKENIZER_H +#define CPROVER_SOLVERS_SMT2_SMT2_TOKENIZER_H + +#include + +#include + +class smt2_tokenizert:public parsert +{ +public: + explicit smt2_tokenizert(std::istream &_in): + ok(true), peeked(false), token(NONE) + { + in=&_in; + line_no=1; + } + + operator bool() + { + return ok; + } + +protected: + std::string buffer; + bool ok, peeked; + using tokent=enum { NONE, END_OF_FILE, ERROR, STRING_LITERAL, + NUMERAL, SYMBOL, OPEN, CLOSE }; + tokent token; + + tokent next_token(); + + tokent peek() + { + if(peeked) + return token; + else + { + next_token(); + peeked=true; + return token; + } + } + + mstreamt &error() + { + ok=false; + messaget::error().source_location.set_line(line_no); + return messaget::error(); + } + +private: + tokent get_decimal_numeral(); + tokent get_hex_numeral(); + tokent get_bin_numeral(); + tokent get_simple_symbol(); + tokent get_quoted_symbol(); + tokent get_string_literal(); + static bool is_simple_symbol_character(char); +}; + +#endif // CPROVER_SOLVERS_SMT2_SMT2_PARSER_H diff --git a/src/solvers/smt2/smt2irep.cpp b/src/solvers/smt2/smt2irep.cpp index a549d558670..5964565c65f 100644 --- a/src/solvers/smt2/smt2irep.cpp +++ b/src/solvers/smt2/smt2irep.cpp @@ -11,76 +11,84 @@ Author: Daniel Kroening, kroening@kroening.com #include #include -#include "smt2_parser.h" +#include "smt2_tokenizer.h" -class smt2irept:public smt2_parsert +class smt2irept:public smt2_tokenizert { public: - explicit smt2irept(std::istream &_in):smt2_parsert(_in) + explicit smt2irept(std::istream &_in):smt2_tokenizert(_in) { } inline irept operator()() { - smt2_parsert::operator()(); + parse(); return result; } + bool parse() override; + protected: irept result; - std::stack stack; - - // overload from smt2_parsert - - virtual void symbol() - { - if(stack.empty()) - result=irept(buffer); - else - stack.top().get_sub().push_back(irept(buffer)); - } - - virtual void string_literal() - { - symbol(); // we don't distinguish - } - - virtual void numeral() - { - symbol(); // we don't distinguish - } - - // '(' - virtual void open_expression() - { - // produce sub-irep - stack.push(irept()); - } - - // ')' - virtual void close_expression() - { - // done with sub-irep - assert(!stack.empty()); // unexpected ) - - irept tmp=stack.top(); - stack.pop(); - - if(stack.empty()) - result=tmp; - else - stack.top().get_sub().push_back(tmp); - } +}; - virtual void keyword() - { - // ignore - } +bool smt2irept::parse() +{ + std::stack stack; + result.clear(); - virtual void error(const std::string &message) + while(true) { + switch(next_token()) + { + case END_OF_FILE: + error() << "unexpected end of file" << eom; + return true; + + case STRING_LITERAL: + case NUMERAL: + case SYMBOL: + if(stack.empty()) + { + result=irept(buffer); + return false; // all done! + } + else + stack.top().get_sub().push_back(irept(buffer)); + break; + + case OPEN: // '(' + // produce sub-irep + stack.push(irept()); + break; + + case CLOSE: // ')' + // done with sub-irep + if(stack.empty()) + { + error() << "unexpected ')'" << eom; + return true; + } + else + { + irept tmp=stack.top(); + stack.pop(); + + if(stack.empty()) + { + result=tmp; + return false; // all done! + } + + stack.top().get_sub().push_back(tmp); + break; + } + + default: + return true; + } } -}; +} irept smt2irep(std::istream &in) {