|
| 1 | +//===-- DILLexer.cpp ------------------------------------------------------===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +// This implements the recursive descent parser for the Data Inspection |
| 8 | +// Language (DIL), and its helper functions, which will eventually underlie the |
| 9 | +// 'frame variable' command. The language that this parser recognizes is |
| 10 | +// described in lldb/docs/dil-expr-lang.ebnf |
| 11 | +// |
| 12 | +//===----------------------------------------------------------------------===// |
| 13 | + |
| 14 | +#include "lldb/ValueObject/DILLexer.h" |
| 15 | +#include "llvm/ADT/StringMap.h" |
| 16 | + |
| 17 | +namespace lldb_private { |
| 18 | + |
| 19 | +namespace dil { |
| 20 | + |
| 21 | +// For fast keyword lookup. More keywords will be added later. |
| 22 | +const llvm::StringMap<dil::TokenKind> Keywords = { |
| 23 | + {"namespace", dil::TokenKind::kw_namespace}, |
| 24 | +}; |
| 25 | + |
| 26 | +const std::string DILToken::getTokenName(dil::TokenKind kind) { |
| 27 | + switch (kind) { |
| 28 | + case dil::TokenKind::coloncolon: |
| 29 | + return "coloncolon"; |
| 30 | + case dil::TokenKind::eof: |
| 31 | + return "eof"; |
| 32 | + case dil::TokenKind::identifier: |
| 33 | + return "identifier"; |
| 34 | + case dil::TokenKind::kw_namespace: |
| 35 | + return "namespace"; |
| 36 | + case dil::TokenKind::l_paren: |
| 37 | + return "l_paren"; |
| 38 | + case dil::TokenKind::r_paren: |
| 39 | + return "r_paren"; |
| 40 | + case dil::TokenKind::unknown: |
| 41 | + return "unknown"; |
| 42 | + default: |
| 43 | + return "token_name"; |
| 44 | + } |
| 45 | +} |
| 46 | + |
| 47 | +static bool Is_Letter(char c) { |
| 48 | + if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) |
| 49 | + return true; |
| 50 | + return false; |
| 51 | +} |
| 52 | + |
| 53 | +static bool Is_Digit(char c) { return ('0' <= c && c <= '9'); } |
| 54 | + |
| 55 | +// A word starts with a letter, underscore, or dollar sign, followed by |
| 56 | +// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores. |
| 57 | +bool DILLexer::Is_Word(std::string::iterator start, uint32_t &length) { |
| 58 | + bool done = false; |
| 59 | + bool dollar_start = false; |
| 60 | + |
| 61 | + // Must not start with a digit. |
| 62 | + if (m_cur_pos == m_expr.end() || Is_Digit(*m_cur_pos)) |
| 63 | + return false; |
| 64 | + |
| 65 | + // First character *may* be a '$', for a register name or convenience |
| 66 | + // variable. |
| 67 | + if (*m_cur_pos == '$') { |
| 68 | + dollar_start = true; |
| 69 | + ++m_cur_pos; |
| 70 | + length++; |
| 71 | + } |
| 72 | + |
| 73 | + // Contains only letters, digits or underscores |
| 74 | + for (; m_cur_pos != m_expr.end() && !done; ++m_cur_pos) { |
| 75 | + char c = *m_cur_pos; |
| 76 | + if (!Is_Letter(c) && !Is_Digit(c) && c != '_') { |
| 77 | + done = true; |
| 78 | + break; |
| 79 | + } else |
| 80 | + length++; |
| 81 | + } |
| 82 | + |
| 83 | + if (dollar_start && length > 1) // Must have something besides just '$' |
| 84 | + return true; |
| 85 | + |
| 86 | + if (!dollar_start && length > 0) |
| 87 | + return true; |
| 88 | + |
| 89 | + // Not a valid word, so re-set the lexing position. |
| 90 | + m_cur_pos = start; |
| 91 | + return false; |
| 92 | +} |
| 93 | + |
| 94 | +void DILLexer::UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind, |
| 95 | + std::string tok_str, uint32_t tok_pos) { |
| 96 | + DILToken new_token; |
| 97 | + result.setValues(tok_kind, tok_str, tok_pos); |
| 98 | + new_token = result; |
| 99 | + m_lexed_tokens.push_back(std::move(new_token)); |
| 100 | +} |
| 101 | + |
| 102 | +bool DILLexer::Lex(DILToken &result, bool look_ahead) { |
| 103 | + bool retval = true; |
| 104 | + |
| 105 | + if (!look_ahead) { |
| 106 | + // We're being asked for the 'next' token, and not a part of a LookAhead. |
| 107 | + // Check to see if we've already lexed it and pushed it onto our tokens |
| 108 | + // vector; if so, return the next token from the vector, rather than doing |
| 109 | + // more lexing. |
| 110 | + if ((m_tokens_idx != UINT_MAX) && |
| 111 | + (m_tokens_idx < m_lexed_tokens.size() - 1)) { |
| 112 | + result = m_lexed_tokens[m_tokens_idx + 1]; |
| 113 | + return retval; |
| 114 | + } |
| 115 | + } |
| 116 | + |
| 117 | + // Skip over whitespace (spaces). |
| 118 | + while (m_cur_pos != m_expr.end() && *m_cur_pos == ' ') |
| 119 | + m_cur_pos++; |
| 120 | + |
| 121 | + // Check to see if we've reached the end of our input string. |
| 122 | + if (m_cur_pos == m_expr.end()) { |
| 123 | + UpdateLexedTokens(result, dil::TokenKind::eof, "", m_expr.length()); |
| 124 | + return retval; |
| 125 | + } |
| 126 | + |
| 127 | + uint32_t position = m_cur_pos - m_expr.begin(); |
| 128 | + ; |
| 129 | + std::string::iterator start = m_cur_pos; |
| 130 | + uint32_t length = 0; |
| 131 | + if (Is_Word(start, length)) { |
| 132 | + dil::TokenKind kind; |
| 133 | + std::string word = m_expr.substr(position, length); |
| 134 | + auto iter = Keywords.find(word); |
| 135 | + if (iter != Keywords.end()) |
| 136 | + kind = iter->second; |
| 137 | + else |
| 138 | + kind = dil::TokenKind::identifier; |
| 139 | + |
| 140 | + UpdateLexedTokens(result, kind, word, position); |
| 141 | + return true; |
| 142 | + } |
| 143 | + |
| 144 | + switch (*m_cur_pos) { |
| 145 | + case '(': |
| 146 | + m_cur_pos++; |
| 147 | + UpdateLexedTokens(result, dil::TokenKind::l_paren, "(", position); |
| 148 | + return true; |
| 149 | + case ')': |
| 150 | + m_cur_pos++; |
| 151 | + UpdateLexedTokens(result, dil::TokenKind::r_paren, ")", position); |
| 152 | + return true; |
| 153 | + case ':': |
| 154 | + if (position + 1 < m_expr.size() && m_expr[position + 1] == ':') { |
| 155 | + m_cur_pos += 2; |
| 156 | + UpdateLexedTokens(result, dil::TokenKind::coloncolon, "::", position); |
| 157 | + return true; |
| 158 | + } |
| 159 | + break; |
| 160 | + default: |
| 161 | + break; |
| 162 | + } |
| 163 | + // Empty Token |
| 164 | + result.setValues(dil::TokenKind::none, "", m_expr.length()); |
| 165 | + return false; |
| 166 | +} |
| 167 | + |
| 168 | +const DILToken &DILLexer::LookAhead(uint32_t N) { |
| 169 | + uint32_t extra_lexed_tokens = m_lexed_tokens.size() - m_tokens_idx - 1; |
| 170 | + |
| 171 | + if (N + 1 < extra_lexed_tokens) |
| 172 | + return m_lexed_tokens[m_tokens_idx + N + 1]; |
| 173 | + |
| 174 | + uint32_t remaining_tokens = |
| 175 | + (m_tokens_idx + N + 1) - m_lexed_tokens.size() + 1; |
| 176 | + |
| 177 | + bool done = false; |
| 178 | + bool look_ahead = true; |
| 179 | + while (!done && remaining_tokens > 0) { |
| 180 | + DILToken tok; |
| 181 | + Lex(tok, look_ahead); |
| 182 | + if (tok.getKind() == dil::TokenKind::eof) |
| 183 | + done = true; |
| 184 | + remaining_tokens--; |
| 185 | + }; |
| 186 | + |
| 187 | + if (remaining_tokens > 0) { |
| 188 | + m_invalid_token.setValues(dil::TokenKind::invalid, "", 0); |
| 189 | + return m_invalid_token; |
| 190 | + } |
| 191 | + |
| 192 | + return m_lexed_tokens[m_tokens_idx + N + 1]; |
| 193 | +} |
| 194 | + |
| 195 | +const DILToken &DILLexer::AcceptLookAhead(uint32_t N) { |
| 196 | + if (m_tokens_idx + N + 1 > m_lexed_tokens.size()) |
| 197 | + return m_invalid_token; |
| 198 | + |
| 199 | + m_tokens_idx += N + 1; |
| 200 | + return m_lexed_tokens[m_tokens_idx]; |
| 201 | +} |
| 202 | + |
| 203 | +} // namespace dil |
| 204 | + |
| 205 | +} // namespace lldb_private |
0 commit comments