-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LLDB] Add DIL code for handling plain variable names. #120971
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Add the Data Inspection Language (DIL) implementation pieces for handling plain local and global variable names. See https://discourse.llvm.org/t/rfc-data-inspection-language/69893 for information about DIL. This change includes the basic AST, Lexer, Parser and Evaluator pieces, as well as some tests.
@llvm/pr-subscribers-lldb Author: None (cmtice) ChangesAdd the Data Inspection Language (DIL) implementation pieces for handling plain local and global variable names. See https://discourse.llvm.org/t/rfc-data-inspection-language/69893 for information about DIL. This change includes the basic AST, Lexer, Parser and Evaluator pieces, as well as some tests. Patch is 63.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/120971.diff 20 Files Affected:
diff --git a/lldb/docs/dil-expr-lang.ebnf b/lldb/docs/dil-expr-lang.ebnf
new file mode 100644
index 00000000000000..64b3e7758229c2
--- /dev/null
+++ b/lldb/docs/dil-expr-lang.ebnf
@@ -0,0 +1,40 @@
+(* Data Inspection Language (DIL) definition - LLDB Debug Expressions *)
+
+(* This is currently a subset of the final DIL Language, matching the current
+ DIL implementation. *)
+
+expression = primary_expression ;
+
+primary_expression = id_expression
+ | "this"
+ | "(" expression ")";
+
+id_expression = unqualified_id
+ | qualified_id ;
+
+unqualified_id = identifier ;
+
+qualified_id = ["::"] [nested_name_specifier] unqualified_id
+ | ["::"] identifier ;
+
+identifier = ? dil::TokenKind::identifier ? ;
+
+nested_name_specifier = type_name "::"
+ | namespace_name '::'
+ | nested_name_specifier identifier "::" ;
+
+type_name = class_name
+ | enum_name
+ | typedef_name;
+
+class_name = identifier ;
+
+enum_name = identifier ;
+
+typedef_name = identifier ;
+
+namespace_name = identifier ;
+
+
+
+
diff --git a/lldb/include/lldb/ValueObject/DILAST.h b/lldb/include/lldb/ValueObject/DILAST.h
new file mode 100644
index 00000000000000..9f0a1a2221e388
--- /dev/null
+++ b/lldb/include/lldb/ValueObject/DILAST.h
@@ -0,0 +1,161 @@
+//===-- DILAST.h ------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_VALUEOBJECT_DILAST_H
+#define LLDB_VALUEOBJECT_DILAST_H
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "lldb/ValueObject/ValueObject.h"
+
+namespace lldb_private {
+
+namespace dil {
+
+/// The various types DIL AST nodes (used by the DIL parser).
+enum class NodeKind {
+ eErrorNode,
+ eIdentifierNode,
+};
+
+/// Class used to store & manipulate information about identifiers.
+class IdentifierInfo {
+public:
+ enum class Kind {
+ eValue,
+ eContextArg,
+ };
+
+ static std::unique_ptr<IdentifierInfo> FromValue(ValueObject &valobj) {
+ CompilerType type;
+ type = valobj.GetCompilerType();
+ return std::unique_ptr<IdentifierInfo>(
+ new IdentifierInfo(Kind::eValue, type, valobj.GetSP(), {}));
+ }
+
+ static std::unique_ptr<IdentifierInfo> FromContextArg(CompilerType type) {
+ lldb::ValueObjectSP empty_value;
+ return std::unique_ptr<IdentifierInfo>(
+ new IdentifierInfo(Kind::eContextArg, type, empty_value, {}));
+ }
+
+ Kind GetKind() const { return m_kind; }
+ lldb::ValueObjectSP GetValue() const { return m_value; }
+
+ CompilerType GetType() { return m_type; }
+ bool IsValid() const { return m_type.IsValid(); }
+
+ IdentifierInfo(Kind kind, CompilerType type, lldb::ValueObjectSP value,
+ std::vector<uint32_t> path)
+ : m_kind(kind), m_type(type), m_value(std::move(value)) {}
+
+private:
+ Kind m_kind;
+ CompilerType m_type;
+ lldb::ValueObjectSP m_value;
+};
+
+/// Given the name of an identifier (variable name, member name, type name,
+/// etc.), find the ValueObject for that name (if it exists) and create and
+/// return an IdentifierInfo object containing all the relevant information
+/// about that object (for DIL parsing and evaluating).
+std::unique_ptr<IdentifierInfo> LookupIdentifier(
+ const std::string &name, std::shared_ptr<ExecutionContextScope> ctx_scope,
+ lldb::DynamicValueType use_dynamic, CompilerType *scope_ptr = nullptr);
+
+/// Forward declaration, for use in DIL AST nodes. Definition is at the very
+/// end of this file.
+class Visitor;
+
+/// The rest of the classes in this file, except for the Visitor class at the
+/// very end, define all the types of AST nodes used by the DIL parser and
+/// expression evaluator. The DIL parser parses the input string and creates
+/// the AST parse tree from the AST nodes. The resulting AST node tree gets
+/// passed to the DIL expression evaluator, which evaluates the DIL AST nodes
+/// and creates/returns a ValueObjectSP containing the result.
+
+/// Base class for AST nodes used by the Data Inspection Language (DIL) parser.
+/// All of the specialized types of AST nodes inherit from this (virtual) base
+/// class.
+class DILASTNode {
+public:
+ DILASTNode(uint32_t location, NodeKind kind)
+ : m_location(location), m_kind(kind) {}
+ virtual ~DILASTNode() = default;
+
+ virtual void Accept(Visitor *v) const = 0;
+
+ uint32_t GetLocation() const { return m_location; }
+ NodeKind GetKind() const { return m_kind; }
+
+private:
+ uint32_t m_location;
+ const NodeKind m_kind;
+};
+
+using DILASTNodeUP = std::unique_ptr<DILASTNode>;
+
+class ErrorNode : public DILASTNode {
+public:
+ ErrorNode(CompilerType empty_type)
+ : DILASTNode(0, NodeKind::eErrorNode), m_empty_type(empty_type) {}
+ void Accept(Visitor *v) const override;
+
+ static bool classof(const DILASTNode *node) {
+ return node->GetKind() == NodeKind::eErrorNode;
+ }
+
+private:
+ CompilerType m_empty_type;
+};
+
+class IdentifierNode : public DILASTNode {
+public:
+ IdentifierNode(uint32_t location, std::string name,
+ lldb::DynamicValueType use_dynamic,
+ std::shared_ptr<ExecutionContextScope> exe_ctx_scope)
+ : DILASTNode(location, NodeKind::eIdentifierNode),
+ m_name(std::move(name)), m_use_dynamic(use_dynamic),
+ m_ctx_scope(exe_ctx_scope) {}
+
+ void Accept(Visitor *v) const override;
+
+ lldb::DynamicValueType use_dynamic() const { return m_use_dynamic; }
+ std::string name() const { return m_name; }
+ std::shared_ptr<ExecutionContextScope> get_exe_context() const {
+ return m_ctx_scope;
+ }
+
+ static bool classof(const DILASTNode *node) {
+ return node->GetKind() == NodeKind::eIdentifierNode;
+ }
+
+private:
+ std::string m_name;
+ lldb::DynamicValueType m_use_dynamic;
+ std::shared_ptr<ExecutionContextScope> m_ctx_scope;
+};
+
+/// This class contains one Visit method for each specialized type of
+/// DIL AST node. The Visit methods are used to dispatch a DIL AST node to
+/// the correct function in the DIL expression evaluator for evaluating that
+/// type of AST node.
+class Visitor {
+public:
+ virtual ~Visitor() = default;
+ virtual void Visit(const ErrorNode *node) = 0;
+ virtual void Visit(const IdentifierNode *node) = 0;
+};
+
+} // namespace dil
+
+} // namespace lldb_private
+
+#endif // LLDB_VALUEOBJECT_DILAST_H
diff --git a/lldb/include/lldb/ValueObject/DILEval.h b/lldb/include/lldb/ValueObject/DILEval.h
new file mode 100644
index 00000000000000..4006bb10630f24
--- /dev/null
+++ b/lldb/include/lldb/ValueObject/DILEval.h
@@ -0,0 +1,62 @@
+//===-- DILEval.h -----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_VALUEOBJECT_DILEVAL_H_
+#define LLDB_VALUEOBJECT_DILEVAL_H_
+
+#include <memory>
+#include <vector>
+
+#include "lldb/ValueObject/DILAST.h"
+#include "lldb/ValueObject/DILParser.h"
+
+namespace lldb_private {
+
+namespace dil {
+
+class DILInterpreter : Visitor {
+public:
+ DILInterpreter(lldb::TargetSP target, std::shared_ptr<std::string> sm);
+ DILInterpreter(lldb::TargetSP target, std::shared_ptr<std::string> sm,
+ lldb::ValueObjectSP scope);
+ DILInterpreter(lldb::TargetSP target, std::shared_ptr<std::string> sm,
+ lldb::DynamicValueType use_dynamic);
+
+ lldb::ValueObjectSP DILEval(const DILASTNode *tree, lldb::TargetSP target_sp,
+ Status &error);
+
+private:
+ lldb::ValueObjectSP DILEvalNode(const DILASTNode *node);
+
+ bool Success() { return m_error.Success(); }
+
+ void SetError(ErrorCode error_code, std::string error, uint32_t loc);
+
+ void Visit(const ErrorNode *node) override;
+ void Visit(const IdentifierNode *node) override;
+
+private:
+ // Used by the interpreter to create objects, perform casts, etc.
+ lldb::TargetSP m_target;
+
+ std::shared_ptr<std::string> m_sm;
+
+ lldb::ValueObjectSP m_result;
+
+ lldb::ValueObjectSP m_scope;
+
+ lldb::DynamicValueType m_default_dynamic;
+
+ Status m_error;
+};
+
+} // namespace dil
+
+} // namespace lldb_private
+
+#endif // LLDB_VALUEOBJECT_DILEVAL_H_
diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h
new file mode 100644
index 00000000000000..c794fb2bfc0ed3
--- /dev/null
+++ b/lldb/include/lldb/ValueObject/DILLexer.h
@@ -0,0 +1,166 @@
+//===-- DILLexer.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_VALUEOBJECT_DILLEXER_H_
+#define LLDB_VALUEOBJECT_DILLEXER_H_
+
+#include <limits.h>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "llvm/ADT/StringRef.h"
+
+namespace lldb_private {
+
+namespace dil {
+
+enum class TokenKind {
+ coloncolon,
+ eof,
+ identifier,
+ invalid,
+ kw_namespace,
+ kw_this,
+ l_paren,
+ none,
+ r_paren,
+ unknown,
+};
+
+/// Class defining the tokens generated by the DIL lexer and used by the
+/// DIL parser.
+class DILToken {
+public:
+ DILToken(dil::TokenKind kind, std::string spelling, uint32_t start,
+ uint32_t len)
+ : m_kind(kind), m_spelling(spelling), m_start_pos(start), m_length(len) {}
+
+ DILToken()
+ : m_kind(dil::TokenKind::none), m_spelling(""), m_start_pos(0),
+ m_length(0) {}
+
+ void setKind(dil::TokenKind kind) { m_kind = kind; }
+ dil::TokenKind getKind() const { return m_kind; }
+
+ std::string getSpelling() const { return m_spelling; }
+
+ uint32_t getLength() const { return m_length; }
+
+ bool is(dil::TokenKind kind) const { return m_kind == kind; }
+
+ bool isNot(dil::TokenKind kind) const { return m_kind != kind; }
+
+ bool isOneOf(dil::TokenKind kind1, dil::TokenKind kind2) const {
+ return is(kind1) || is(kind2);
+ }
+
+ template <typename... Ts> bool isOneOf(dil::TokenKind kind, Ts... Ks) const {
+ return is(kind) || isOneOf(Ks...);
+ }
+
+ uint32_t getLocation() const { return m_start_pos; }
+
+ void setValues(dil::TokenKind kind, std::string spelling, uint32_t start,
+ uint32_t len) {
+ m_kind = kind;
+ m_spelling = spelling;
+ m_start_pos = start;
+ m_length = len;
+ }
+
+ static const std::string getTokenName(dil::TokenKind kind);
+
+private:
+ dil::TokenKind m_kind;
+ std::string m_spelling;
+ uint32_t m_start_pos; // within entire expression string
+ uint32_t m_length;
+};
+
+/// Class for doing the simple lexing required by DIL.
+class DILLexer {
+public:
+ DILLexer(std::shared_ptr<std::string> dil_sm) : m_expr(*dil_sm) {
+ m_cur_pos = m_expr.begin();
+ // Use UINT_MAX to indicate invalid/uninitialized value.
+ m_tokens_idx = UINT_MAX;
+ }
+
+ bool Lex(DILToken &result, bool look_ahead = false);
+
+ bool Is_Word(std::string::iterator start, uint32_t &length);
+
+ uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); }
+
+ /// Update 'result' with the other paremeter values, create a
+ /// duplicate token, and push the duplicate token onto the vector of
+ /// lexed tokens.
+ void UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind,
+ std::string tok_str, uint32_t tok_pos,
+ uint32_t tok_len);
+
+ /// Return the lexed token N+1 positions ahead of the 'current' token
+ /// being handled by the DIL parser.
+ const DILToken &LookAhead(uint32_t N);
+
+ const DILToken &AcceptLookAhead(uint32_t N);
+
+ /// Return the index for the 'current' token being handled by the DIL parser.
+ uint32_t GetCurrentTokenIdx() { return m_tokens_idx; }
+
+ /// Return the current token to be handled by the DIL parser.
+ DILToken &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; }
+
+ /// Update the index for the 'current' token, to point to the next lexed
+ /// token.
+ bool IncrementTokenIdx() {
+ if (m_tokens_idx >= m_lexed_tokens.size() - 1)
+ return false;
+
+ m_tokens_idx++;
+ return true;
+ }
+
+ /// Set the index for the 'current' token (to be handled by the parser)
+ /// to a particular position. Used for either committing 'look ahead' parsing
+ /// or rolling back tentative parsing.
+ bool ResetTokenIdx(uint32_t new_value) {
+ if (new_value > m_lexed_tokens.size() - 1)
+ return false;
+
+ m_tokens_idx = new_value;
+ return true;
+ }
+
+private:
+ // The input string we are lexing & parsing.
+ std::string m_expr;
+
+ // The current position of the lexer within m_expr (the character position,
+ // within the string, of the next item to be lexed).
+ std::string::iterator m_cur_pos;
+
+ // Holds all of the tokens lexed so far.
+ std::vector<DILToken> m_lexed_tokens;
+
+ // Index into m_lexed_tokens; indicates which token the DIL parser is
+ // currently trying to parse/handle.
+ uint32_t m_tokens_idx;
+
+ // "invalid" token; to be returned by lexer when 'look ahead' fails.
+ DILToken m_invalid_token;
+};
+
+} // namespace dil
+
+} // namespace lldb_private
+
+#endif // LLDB_VALUEOBJECT_DILLEXER_H_
diff --git a/lldb/include/lldb/ValueObject/DILParser.h b/lldb/include/lldb/ValueObject/DILParser.h
new file mode 100644
index 00000000000000..b718903b7bea49
--- /dev/null
+++ b/lldb/include/lldb/ValueObject/DILParser.h
@@ -0,0 +1,105 @@
+//===-- DILParser.h ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_VALUEOBJECT_DILPARSER_H_
+#define LLDB_VALUEOBJECT_DILPARSER_H_
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "lldb/Target/ExecutionContextScope.h"
+#include "lldb/Utility/Status.h"
+#include "lldb/ValueObject/DILAST.h"
+#include "lldb/ValueObject/DILLexer.h"
+
+namespace lldb_private {
+
+namespace dil {
+
+enum class ErrorCode : unsigned char {
+ kOk = 0,
+ kInvalidExpressionSyntax,
+ kUndeclaredIdentifier,
+ kUnknown,
+};
+
+std::string FormatDiagnostics(std::shared_ptr<std::string> input_expr,
+ const std::string &message, uint32_t loc);
+
+/// Pure recursive descent parser for C++ like expressions.
+/// EBNF grammar for the parser is described in lldb/docs/dil-expr-lang.ebnf
+class DILParser {
+public:
+ explicit DILParser(std::shared_ptr<std::string> dil_input_expr,
+ std::shared_ptr<ExecutionContextScope> exe_ctx_scope,
+ lldb::DynamicValueType use_dynamic, bool use_synthetic,
+ bool fragile_ivar, bool check_ptr_vs_member);
+
+ DILASTNodeUP Run(Status &error);
+
+ ~DILParser() { m_ctx_scope.reset(); }
+
+ bool UseSynthetic() { return m_use_synthetic; }
+
+ lldb::DynamicValueType UseDynamic() { return m_use_dynamic; }
+
+ using PtrOperator = std::tuple<dil::TokenKind, uint32_t>;
+
+private:
+ DILASTNodeUP ParseExpression();
+ DILASTNodeUP ParsePrimaryExpression();
+
+ std::string ParseNestedNameSpecifier();
+
+ std::string ParseIdExpression();
+ std::string ParseUnqualifiedId();
+
+ void ConsumeToken();
+
+ void BailOut(ErrorCode error_code, const std::string &error, uint32_t loc);
+
+ void BailOut(Status error);
+
+ void Expect(dil::TokenKind kind);
+
+ std::string TokenDescription(const DILToken &token);
+
+ template <typename... Ts> void ExpectOneOf(dil::TokenKind k, Ts... ks);
+
+ void TentativeParsingRollback(uint32_t saved_idx) {
+ m_error.Clear();
+ m_dil_lexer.ResetTokenIdx(saved_idx);
+ m_dil_token = m_dil_lexer.GetCurrentToken();
+ }
+
+ // Parser doesn't own the evaluation context. The produced AST may depend on
+ // it (for example, for source locations), so it's expected that expression
+ // context will outlive the parser.
+ std::shared_ptr<ExecutionContextScope> m_ctx_scope;
+
+ std::shared_ptr<std::string> m_input_expr;
+ // The token lexer is stopped at (aka "current token").
+ DILToken m_dil_token;
+ // Holds an error if it occures during parsing.
+ Status m_error;
+
+ lldb::DynamicValueType m_use_dynamic;
+ bool m_use_synthetic;
+ bool m_fragile_ivar;
+ bool m_check_ptr_vs_member;
+ DILLexer m_dil_lexer;
+}; // class DILParser
+
+} // namespace dil
+
+} // namespace lldb_private
+
+#endif // LLDB_VALUEOBJECT_DILPARSER_H_
diff --git a/lldb/source/Target/StackFrame.cpp b/lldb/source/Target/StackFrame.cpp
index 2633c976c13bf4..28450b1d1c1c1e 100644
--- a/lldb/source/Target/StackFrame.cpp
+++ b/lldb/source/Target/StackFrame.cpp
@@ -31,6 +31,8 @@
#include "lldb/Utility/LLDBLog.h"
#include "lldb/Utility/Log.h"
#include "lldb/Utility/RegisterValue.h"
+#include "lldb/ValueObject/DILEval.h"
+#include "lldb/ValueObject/DILParser.h"
#include "lldb/ValueObject/ValueObjectConstResult.h"
#include "lldb/ValueObject/ValueObjectMemory.h"
#include "lldb/ValueObject/ValueObjectVariable.h"
@@ -511,22 +513,58 @@ ValueObjectSP StackFrame::GetValueForVariableExpressionPath(
VariableSP &var_sp, Status &error) {
ExecutionContext exe_ctx;
CalculateExecutionContext(exe_ctx);
+
bool use_DIL = exe_ctx.GetTargetRef().GetUseDIL(&exe_ctx);
+
if (use_DIL)
return DILGetValueForVariableExpressionPath(var_expr, use_dynamic, options,
var_sp, error);
-
- return LegacyGetValueForVariableExpressionPath(var_expr, use_dynamic, options,
- var_sp, error);
+ else
+ return LegacyGetValueForVariableExpressionPath(var_expr, use_dynamic,
+ options, var_sp, error);
}
ValueObjectSP StackFrame::DILGetValueForVariableExpressionPath(
llvm::StringRef var_expr, lldb::DynamicValueType use_dynamic,
uint32_t options, lldb::VariableSP &var_sp, Status &error) {
- // This is a place-holder for the calls into the DIL parser and
- // evaluator. For now, just call the "real" frame variable implementation.
- return LegacyGetValueForVariableExpressionPath(var_expr, use_dynamic, options,
- var_sp, error);
+ ValueObjectSP ret_val;
+ std::shared_ptr<std::string> source =
+ std::make_shared<std::string>(var_expr.data());
+
+ const bool check_ptr_vs_member =
+ (options & eExpressionPathOptionCheckPtrVsMember) != 0;
+ const bool no_fragile_ivar =
+ (options & eExpressionPathOptionsNoFragileObjcIvar) != 0;
+ const bool no_synth_child =
+ (options & eExpressionPathOptionsNoSyntheticChildren) != 0;
+
+ // Parse the expression.
+ Status parse_error, eval_error;
+ dil::DILParser parser(source, shared_from_this(), use_dynamic,
+ !no_synth_child, !no_fragile_ivar, check_ptr_vs_member);
+ dil::DILASTNodeUP tree = parser.Run(parse_error);
+ if (parse_error.Fail()) {
+ error = std::move(parse_error);
+ return ValueObjectSP();
+ }
+
+ // Evaluate the parsed expression.
+ lldb::TargetSP target = this->CalculateTarget();
+ dil::DILInterpreter interpreter(target, source, use_dynamic);
+
+ ret_val = interpreter.DILEval(tree.get(), target, eval_error);
+ if (eval_error.Fail()) {
+ error = std::move(eval_error);
+ return ValueObjectSP();
+ }
+
+ if (ret_val) {
+ var_sp = ret_val->GetVariable();
+ if (!var_sp && ret_val->GetParent()) {
+ var_sp = ret_val->GetParent()->GetVariable();
+ }
+ }
+ return ret_val;
}
ValueObjectSP StackFrame::LegacyGetValueForVariableExpressionPath(
diff --git a/lldb/source/ValueObject/CMakeLists.txt b/lldb/source/...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
✅ With the latest revision this PR passed the Python code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a partial review. I ran out of steam when I reached the Lexer component. I really think that one should be a patch of its own because:
- all of the lookaheads and everything are sufficiently complicated to deserve the proper attention
- the implementation is really amenable to unit testing (all you need is a string as an input)
I also noticed that this implementation casts a much wider web when it comes to searching for global variables (it uses Target::FindGlobalVariables
, which searches the entire process, whereas the current implementation only uses variables from the current compile unit. I think it would be best to stick to the existing implementation for now (ideally by even reusing the GetInScopeVariableList
function), and saving the discussion about the pros and cons of changing that to a separate patch.
The reason for that is I can find at least two benefits to the current implementation (it's faster and it's guaranteed to return the same variable you would get by compiling the expression in the compiler). I think it'd be best if that was isolated from the more technical/mundane aspects of parsing an expression.
} | ||
|
||
private: | ||
CompilerType m_empty_type; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
??
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed.
lldb::DynamicValueType use_dynamic() const { return m_use_dynamic; } | ||
std::string name() const { return m_name; } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's use the lldb convention for this (GetUseDynamic, GetName)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
std::shared_ptr<ExecutionContextScope> get_exe_context() const { | ||
return m_ctx_scope; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I understand the reason for this (you need to know which context to evaluate the name in), but I'm wondering if it should be a property of a specific node (instead of e.g. an argument to the Evaluate
function).
I think an interesting question here would be: do you expect a single AST to have nodes with different execution contexts. If so, how would that work?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you're right; I have now made the ExecutionContextScope a member of the DILInterpreter.
/// Class used to store & manipulate information about identifiers. | ||
class IdentifierInfo { | ||
public: | ||
enum class Kind { | ||
eValue, | ||
eContextArg, | ||
}; | ||
|
||
static std::unique_ptr<IdentifierInfo> FromValue(ValueObject &valobj) { | ||
CompilerType type; | ||
type = valobj.GetCompilerType(); | ||
return std::unique_ptr<IdentifierInfo>( | ||
new IdentifierInfo(Kind::eValue, type, valobj.GetSP(), {})); | ||
} | ||
|
||
static std::unique_ptr<IdentifierInfo> FromContextArg(CompilerType type) { | ||
lldb::ValueObjectSP empty_value; | ||
return std::unique_ptr<IdentifierInfo>( | ||
new IdentifierInfo(Kind::eContextArg, type, empty_value, {})); | ||
} | ||
|
||
Kind GetKind() const { return m_kind; } | ||
lldb::ValueObjectSP GetValue() const { return m_value; } | ||
|
||
CompilerType GetType() { return m_type; } | ||
bool IsValid() const { return m_type.IsValid(); } | ||
|
||
IdentifierInfo(Kind kind, CompilerType type, lldb::ValueObjectSP value, | ||
std::vector<uint32_t> path) | ||
: m_kind(kind), m_type(type), m_value(std::move(value)) {} | ||
|
||
private: | ||
Kind m_kind; | ||
CompilerType m_type; | ||
lldb::ValueObjectSP m_value; | ||
}; | ||
|
||
/// Given the name of an identifier (variable name, member name, type name, | ||
/// etc.), find the ValueObject for that name (if it exists) and create and | ||
/// return an IdentifierInfo object containing all the relevant information | ||
/// about that object (for DIL parsing and evaluating). | ||
std::unique_ptr<IdentifierInfo> LookupIdentifier( | ||
const std::string &name, std::shared_ptr<ExecutionContextScope> ctx_scope, | ||
lldb::DynamicValueType use_dynamic, CompilerType *scope_ptr = nullptr); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be in DILEval given that (I think) it's no longer used in ast construction?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
#include <memory> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include "lldb/ValueObject/ValueObject.h" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The llvm style puts the more specific includes first, and doesn't use empty line separators. If you just remove the empty lines, clang-format will put this in the right order.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
lldb/source/ValueObject/DILLexer.cpp
Outdated
if (Is_Word(start, length)) { | ||
dil::TokenKind kind; | ||
std::string word = m_expr.substr(position, length); | ||
if (word == "this") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we avoid special handling for "this"?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could...the main reason for handling it specially, is to make sure we treat it as an r-value, not an l-value. Would an acceptable alternative here be to ALSO look for "self" and treat it the same way?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think handling self
as well would make it better.
The question of modifying this
is interesting, but I don't think it should require special handling here. As far as modification is concerned, I think this
is not different from a variable of type (e.g.) const int
-- the language forbids you from modifying it, the debugger can do that anyway if it really wants to, but the result may not be what you expect:
Process 48439 stopped
* thread #1, name = 'a.out', stop reason = step over
frame #0: 0x000055555555520b a.out`X::foo(this=0x00007fffffffd7d7) at a.cc:7:5
4 public:
5 void foo() {
6 const int x = 47;
-> 7 printf("this = %p, x = %d\n", this, x);
8 }
9 };
10
(lldb) v this x
(X *) this = 0x00007fffffffd7d7
(const int) x = 47
(lldb) script lldb.frame.FindVariable("x").SetValueFromCString("74")
True
(lldb) script lldb.frame.FindVariable("this").SetValueFromCString("0")
True
(lldb) v this x
(X *) this = nullptr
(const int) x = 74
(lldb) c
Process 48439 resuming
this = 0x7fffffffd7d7, x = 47
Process 48439 exited with status = 0 (0x00000000)
Given that SBValue currently allows you to modify both variables, I think DIL (which is basically a domain-specific language for SBValue operations) should be doing the same. If that doesn't convince you, consider also this:
- gdb also allows you to modify
this
:
(gdb) set this = 0
(gdb) p this
$2 = (X *) 0x0
- if I compile the same binary with gcc, then the above lldb commands actually work (as in, the debugged binary "sees" the modified values of the two variables)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I think I've removed all special handling for "this"/
lldb/source/ValueObject/DILLexer.cpp
Outdated
if (remaining_tokens > 0) { | ||
m_invalid_token.setValues(dil::TokenKind::invalid, "", 0, 0); | ||
return m_invalid_token; | ||
} else |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
} | ||
|
||
template <typename... Ts> | ||
void DILParser::ExpectOneOf(dil::TokenKind k, Ts... ks) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
unused?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yes.. I missed that one (sorry); it is used in a later (more complex) version of the parser.
} | ||
|
||
std::string DILParser::TokenDescription(const DILToken &token) { | ||
const auto &spelling = ((DILToken)token).getSpelling(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what's up with the cast?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Accidentally leftover from something else; will remove it.
return 0; // Set a breakpoint here | ||
} | ||
|
||
/* |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
??
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed.
Address many review comments. - Move class IdentifierInfo & funciton LookupIdentifier from DILAST to DILEval. - Fix include statement formatting. - Use .str() instead of .data() when converting StringRefs to strings. - Remove 'else' stmts immediatly following 'return' stmts. - Clean up some things accidentally left in the code. - Remove extra, unused DILInterpreter constructors. - Convert use of std::shared_ptr<std::string> to llvm::StringRef - Remove unnecessary length member from DILToken class - Remove currently ununsed function 'ExpectOneOf' from parser - Update grammar: Remove 'this', use 'C99 Identifier' to define identifier. - Rename use_dynamic() and name() to GetUseDynamic() and GetName() - Return strings directly rather than through a return variable - Remove special handling for "this" from Lexer & from Parser - Remove unneeded casts for DILToken (old leftover code) - Small cleanups in tests (removing unneeded comments)
I've addressed a lot of the review comments, but I'm still working on some. |
(Also, I will split the Lexer into a separate PR) |
Address the remaining review comments: - Remove code that updates var_sp. - Remove ExecutionContextScope member from IdentifierNode class; add to DILInte\ rpreter class. - Update DILInterpreter::EvalNode and DILParser::Run to return llvm::Expected v\ alues; update error handling accordingly. - Remove code to update var_sp from DILGetValueForVariableExpressionPath. - Remove special handling for "this" from DILEval; use GetInstanceVariableName \ instead.
Remove DILLexer from this PR; will create a separate PR with the DILLexer in it. Note I have *not* removed code that calls into or uses definitions from the DILLexer.
I think I have addressed all the review comments and this is ready for another review. Thanks! |
|
||
lldb::DynamicValueType m_default_dynamic; | ||
|
||
Status m_error; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a good reason why you need a long-lived Status object here? We should try to replace every Status member with methods that return llvm::Error or llvm::Expected. Otherwise it's very easy for an error to get overwritten without being checked/consumed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It turns out that I can get by without using a Status member object in the DILIntepreter. I have updated the DILInterpreter code to use llvm::Expected instead.
The use of the Status member in the DILParser will be much harder to remove and replace with llvm::Expected. But the good news is that the m_error status member in DILParser is only ever updated by the BailOut methods, which also immediately terminate parsing by setting the current token to EOF, and the top level call (Run) checks the error before returning, so I am hoping that in this case I will be allowed to keep the Status member variable?
Among the reasons for not trying to convert all calls to BailOut to returning an llvm::Expected...: It's called from many functions with many different return types, including void. So at a minimum I would have to add return types to functions that currently don't return anything (as well as. changing the return types of nearly every function in the parser); I would have to do error type conversions in functions where they call something that fails with llvm::expected of type A but the calling function returns llvm::expected of type B. I would also have to add code to many of these sites to also update the current token to EOF, to terminate the parsing, not to mention the additional code at all the parse function call sites to see what value was returned & get the 'real' (non error) value out of the expected return. It will actually make the code much messier and more complicated than it is now.
I would be happy to add comments both at the declaration of the 'Status m_error;' variable in DILParser.h and in the BailOut functions, to the effect that this variable must ONLY be set in the BailOut functions (as it currently is).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with Adrian here. We'd like to avoid Statuses in new code, and I don't think we need one here. The argument about changing the return type in uncommitted code doesn't carry much weight with me. I don't think you need to do it (more on that later), but if you were to do it, here's what I have to say about that:
I would have to do error type conversions in functions where they call something that fails with llvm::expected of type A but the calling function returns llvm::expected of type B
The way you normally do that is
if (!expected_a) return expected_a.takeError(); // automatically converted to Expected<B>
// Do stuff with `a`
add code to many of these sites to also update the current token to EOF
The existence of the current token is something I want to get rid of anyway, but one of the "nice" things about explicitly error passing is that you don't have to bother with maintaining state like this. If you return an Error, then the caller has to check it, and if all that the caller does is return immediately, then it doesn't really matter what's the state of the parser object -- the error will be bubbled up to the top of the stack, and nobody will touch the parser object.
It will actually make the code much messier and more complicated than it is now.
I'm not sure if this is the case here, but I can see how it might be. There are situations where it's simpler to "plough ahead" and check for errors later. However, I don't think this means you can't use Error/Expected. While it is not common to have an Error object as a member, it's not unheard of either. The main thing we want to avoid is long-lived Error objects with unclear destruction/check semantics.
Long-livedness isn't really a problem here since the Parser object only exists while the expression is being parsed. That may be longer than usual but it has clear start and end points (unlike some of our other Status member objects). And your destruction semantics ale already quite clear, albeit not completely explicit -- the error object is checked and returned in the Run
function, which is also the only public entry point of the parser class. The thing that's missing is that nothing actually forces the caller to call the Run
function (exactly once) -- and this is where I'd like to return to my earlier suggestion of using static methods for this, as I think this dovetails very nicely with this. If parsing consistent of a single Parser::Parse
call, then there would be no way to create long-lived Parser or Error objects, and the existence of the Parser object (and its Error member, if any) would just be an implementation detail of the Parse
method. And the Error
object probably doesn't even have to be a member, because the Parse
method can do something like this:
Expected<AST> Parser::Parse(...) {
Error error;
AST ast = Parser(..., /*takes the error by reference*/error).Run();
if (error) return error;
return ast;
}
// The token lexer is stopped at (aka "current token"). | ||
DILToken m_dil_token; | ||
// Holds an error if it occures during parsing. | ||
Status m_error; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it make sense to use a https://lldb.llvm.org/cpp_reference/classlldb__private_1_1DiagnosticManager.html here?
You can wrap diagnostics in an Error/Status
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm already putting formatted diagnostics in my Status/Error. What would be the additional benefit of using the DIagnosticManager?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd guess it was something to do with the fancy new formatted diagnostics:
(lldb) expr (1.0+"")+(1.0+"")
˄ ˄
│ ╰─ error: invalid operands to binary expression ('double' and 'const char[1]')
╰─ error: invalid operands to binary expression ('double' and 'const char[1]')
For that to work, you need to error information in a structured form.
I don't know if using the DiagnosticManager makes sense here -- in theory, you should be able to construct the Error/Status objects directly -- but it would be nice to be able to produce this kind of structured information.
lldb/source/ValueObject/DILEval.cpp
Outdated
VariableList variable_list; | ||
ConstString name(name_ref); | ||
target_sp->GetImages().FindGlobalVariables(name, 1, variable_list); | ||
if (!variable_list.Empty()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LLVM coding style prefers early exists, so:
if (variable_list.Empty())
return nullptr;
etc...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I have tried to do this now. :-)
This adds the basic lexer, with unittests, for the Data Inspection Language (DIL) -- see https://discourse.llvm.org/t/rfc-data-inspection-language/69893 This version of the lexer only handles local variables and namespaces, and is designed to work with llvm#120971.
Update grammar to allow register names.
Remove Status member variable ('m_error') from DILInterpreter class.
The other advantage of the diagnostic manager with the details et al, is that lldb can use the structure in the diagnostic messages to have the error message include a pointer to the place of the error in the original message, like:
It would be nice to get that working for DIL expressions as well. |
NewFormatDiagnostics looks okay. It's longer than the old version, but it's straight-forward linear code, so I don't have a problem with that. It's true that the system was designed to handle multiple errors, but it doesn't mean you have to use it that way. For me, the main value in it is the consistency of the error format wr.t. the expression parser (I don't particularly care about the new fancy format, but I think it's good to use the same one in both places). |
- Rename 'NewFormatDiagnostics' to 'FormatDiagnostics', keeping the code that uses DiagnosticDetail; remove 'OldFormatDiagnostics'. - Remove the code that automatically dereferences reference variables before returning them. - Remove IdentifierInfo class, replacing it directly with lldb::ValueObjectSP values.
Ok, I've updated the code to use the NewFormatDiagnostics (now just called FormatDiagnostics). |
Done. |
I've figured out how to make removing IdentifierInfo work. Done now. |
I think I have addressed all of the remaining review comments/questions. Please review this again when you have a few minutes. Thanks! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, at this I don't think I have any more comments about functionality. The remaining comments are either stylistic (which should be self-explanatory) or about error handling (which I'm going to write about here).
For the error handling, I missed the fact that your new implementation throws away the structured diagnostic info after using it to construct the error string. That kinda misses the point, which is to provide the rest of lldb with a structured representation of the error. It looks like the base DiagnosticError is abstract, so for that we need to create our own error subclass of that. I tried to sketch it out in the inline comments (by combining the existing ExpressionError
class and your code), but this is only a rough sketch -- you'll certainly need to adjust it to make it work. You can tell whether you've succeded by looking at how the new "frame var" error messages are formatted. They should come out in the same was as in the snippet that Jim posted.
The other set of suggested changes is about changing the error representation from Status to llvm::Error. We're basically already using the error object as if it was an llvm::Error, so I think we should just complete the transition and use it throughout the DIL.
|
||
lldb::DynamicValueType UseDynamic() { return m_use_dynamic; } | ||
|
||
using PtrOperator = std::tuple<Token::Kind, uint32_t>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
unused?
DILLexer m_dil_lexer; | ||
|
||
// Holds an error if it occures during parsing. | ||
Status &m_error; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Status &m_error; | |
llvm::Error &m_error; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
such as this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
lldb/source/ValueObject/DILEval.cpp
Outdated
lldb::ValueObjectSP value_sp; | ||
|
||
lldb::RegisterContextSP reg_ctx(stack_frame->GetRegisterContext()); | ||
if (!reg_ctx) | ||
return nullptr; | ||
|
||
if (const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name_ref)) | ||
value_sp = | ||
ValueObjectRegister::Create(stack_frame.get(), reg_ctx, reg_info); | ||
|
||
if (value_sp) | ||
return value_sp; | ||
|
||
return nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lldb::ValueObjectSP value_sp; | |
lldb::RegisterContextSP reg_ctx(stack_frame->GetRegisterContext()); | |
if (!reg_ctx) | |
return nullptr; | |
if (const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name_ref)) | |
value_sp = | |
ValueObjectRegister::Create(stack_frame.get(), reg_ctx, reg_info); | |
if (value_sp) | |
return value_sp; | |
return nullptr; | |
lldb::RegisterContextSP reg_ctx(stack_frame->GetRegisterContext()); | |
if (!reg_ctx) | |
return nullptr; | |
if (const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name_ref)) | |
return | |
ValueObjectRegister::Create(stack_frame.get(), reg_ctx, reg_info); | |
return nullptr; |
|
||
llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override; | ||
|
||
lldb::DynamicValueType GetUseDynamic() const { return m_use_dynamic; } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should be a property of the Evaluator as a whole, and not of a specific identifier. I could imagine a setup where we resolve some parts of the AST using dynamic types, and some using the static ones, but:
- we don't have a way to express that right now
- I don't think that's what you're trying to do
- even if you we did want to do that, it should probably be a property of the base class (so that each node can decide whether it wants to use a dynamic type) and not just identifiers.
lldb/source/ValueObject/DILEval.cpp
Outdated
auto value_or_error = node->Accept(this); | ||
|
||
// Return the computed value or error. | ||
return value_or_error; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto value_or_error = node->Accept(this); | |
// Return the computed value or error. | |
return value_or_error; | |
return node->Accept(this); |
m_error = Status((uint32_t)code, lldb::eErrorTypeGeneric, | ||
FormatDiagnostics(m_input_expr, error, loc, | ||
CurToken().GetSpelling().length())); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
m_error = Status((uint32_t)code, lldb::eErrorTypeGeneric, | |
FormatDiagnostics(m_input_expr, error, loc, | |
CurToken().GetSpelling().length())); | |
m_error = CreateDiagnosticError(code, | |
m_input_expr, error, loc, | |
CurToken().GetSpelling().length())); |
or something like that.
self, "Set a breakpoint here", lldb.SBFileSpec("main.cpp") | ||
) | ||
|
||
self.expect("settings set target.experimental.use-DIL true", substrs=[""]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
self.expect("settings set target.experimental.use-DIL true", substrs=[""]) | |
self.runCmd("settings set target.experimental.use-DIL true") |
self, "Set a breakpoint here", lldb.SBFileSpec("main.cpp") | ||
) | ||
|
||
self.expect("settings set target.experimental.use-DIL true", substrs=[""]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
self.expect("settings set target.experimental.use-DIL true", substrs=[""]) | |
self.runCmd("settings set target.experimental.use-DIL true") |
|
||
self.expect("settings set target.experimental.use-DIL true", substrs=[""]) | ||
self.expect_var_path("this", type="TestMethods *") | ||
self.expect("frame variable 'c'", substrs=["(field_ = -1)"]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
self.expect("frame variable 'c'", substrs=["(field_ = -1)"]) | |
self.expect_var_path("c", children=[ValueCheck(name="field_", value = "-1")]) |
self, "Set a breakpoint here", lldb.SBFileSpec("main.cpp") | ||
) | ||
|
||
self.expect("settings set target.experimental.use-DIL true", substrs=[""]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
self.expect("settings set target.experimental.use-DIL true", substrs=[""]) | |
self.runCmd("settings set target.experimental.use-DIL true") |
I think you've misunderstood the code? I don't just throw away the DiagnosticDetail -- its formatted error message, which is what you've been asking for, gets passed back into the Status error, which LLDB then prints out. So the current implementation is giving the formatted error messages now (without any of your further suggested changes): (lldb) v externGlobalVar |
|
It's close, but it's not the same thing. Notice your arrow points into the error message:
While for
For that, you really need to return the error in a structured form. The string itself is needed as well, as it's used in contexts where it's impossible to show inline diagnostics, or because they have been disabled:
For this purpose, you could use your original code, or maybe what you have now is fine as well. |
Created a new DILDiagnosticError class, patterend after OptionParserError to use DiagnosticError and DiagnosticDetail for creating DIL errors. Replaced calls for FormatDiagnostics with calls to create DILDiagnosticError. However, at this time, must then convert these structured errors in LLDB::Status errors, because the code in CommandObjectFrame that calls into GetValueForVariableExpressionPath does NOT allow for passing structured diagnostics in/out, nor have any code for handling them.
I've fixed all the style/format requests. I ran into some difficulties with the structured error handling requests:
So I've done the best I can to honor your error handling request, but it doesn't feel very satisfactory. |
Friendly review ping? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
* Based on the way DiagnosticError is use, I've created a new error class, DILDiagnosticError, patterned after the OptionParserError clas, which DOES make use of DiagnosticError.
The class implementation looks okay for the most part.
* I have not been able to discover any way of converting DILDiagnosticError or DiagnosticDetail into an llvm::Error at all.
You construct these via llvm::make_error
, see inline suggestions.
* The only way I could find to convert a DILDiagnosticError into an LLDB::Status, was to extract the DiagnosticDetail message and create the LLDB::Status around that.
That shouldn't be necessary given that I'd like to work with llvm::Errors primarily (see other inline comment). At the API boundary (where this error goes outside of the DIL implementation, it can be converted to a Status using Status::FromError
.
* We currently still have to leave the errors as LLDB::Status errors, because the calls into & out of GetVariableValueForExpressionPath have no way (at the moment) of handling structured errors (i.e. DiagnosticDetails or DILDiagnosticErrors). We should probably change that in the future, but I would prefer to do that in a different PR.
Definitely.
std::vector<DiagnosticDetail> m_details; | ||
|
||
public: | ||
using llvm::ErrorInfo<DILDiagnosticError, DiagnosticError>::ErrorInfo; | ||
DILDiagnosticError(DiagnosticDetail detail) | ||
: ErrorInfo(std::error_code(EINVAL, std::generic_category())), | ||
m_details({detail}) {} | ||
|
||
DILDiagnosticError(ErrorCode ec, llvm::StringRef expr, | ||
const std::string &message, uint32_t loc, | ||
uint16_t err_len); | ||
|
||
std::unique_ptr<CloneableError> Clone() const override { | ||
return std::make_unique<DILDiagnosticError>(m_details[0]); | ||
} | ||
|
||
llvm::ArrayRef<DiagnosticDetail> GetDetails() const override { | ||
return m_details; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need for a vector if you're only supporting a single diagnostic detail.
std::vector<DiagnosticDetail> m_details; | |
public: | |
using llvm::ErrorInfo<DILDiagnosticError, DiagnosticError>::ErrorInfo; | |
DILDiagnosticError(DiagnosticDetail detail) | |
: ErrorInfo(std::error_code(EINVAL, std::generic_category())), | |
m_details({detail}) {} | |
DILDiagnosticError(ErrorCode ec, llvm::StringRef expr, | |
const std::string &message, uint32_t loc, | |
uint16_t err_len); | |
std::unique_ptr<CloneableError> Clone() const override { | |
return std::make_unique<DILDiagnosticError>(m_details[0]); | |
} | |
llvm::ArrayRef<DiagnosticDetail> GetDetails() const override { | |
return m_details; | |
DiagnosticDetail m_detail; | |
public: | |
using llvm::ErrorInfo<DILDiagnosticError, DiagnosticError>::ErrorInfo; | |
DILDiagnosticError(DiagnosticDetail detail) | |
: ErrorInfo(make_error_code(std::errc::invalid_argument)), | |
m_detail(std::move(detail)) {} | |
DILDiagnosticError(ErrorCode ec, llvm::StringRef expr, | |
const std::string &message, uint32_t loc, | |
uint16_t err_len); | |
std::unique_ptr<CloneableError> Clone() const override { | |
return std::make_unique<DILDiagnosticError>(m_detail); | |
} | |
llvm::ArrayRef<DiagnosticDetail> GetDetails() const override { | |
return m_detail; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
lldb/source/ValueObject/DILEval.cpp
Outdated
Status error = GetStatusError( | ||
DILDiagnosticError(ErrorCode::kUndeclaredIdentifier, m_expr, errMsg, | ||
node->GetLocation(), node->GetName().size())); | ||
return error.ToError(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Status error = GetStatusError( | |
DILDiagnosticError(ErrorCode::kUndeclaredIdentifier, m_expr, errMsg, | |
node->GetLocation(), node->GetName().size())); | |
return error.ToError(); | |
return llvm::make_error<DILDiagnosticError>( | |
ErrorCode::kUndeclaredIdentifier, m_expr, errMsg, | |
node->GetLocation(), node->GetName().size()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
if (m_error.Fail()) { | ||
// If error is already set, then the parser is in the "bail-out" mode. Don't | ||
// do anything and keep the original error. | ||
return; | ||
} | ||
|
||
m_error = GetStatusError(DILDiagnosticError( | ||
code, m_input_expr, error, loc, CurToken().GetSpelling().length())); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (m_error.Fail()) { | |
// If error is already set, then the parser is in the "bail-out" mode. Don't | |
// do anything and keep the original error. | |
return; | |
} | |
m_error = GetStatusError(DILDiagnosticError( | |
code, m_input_expr, error, loc, CurToken().GetSpelling().length())); | |
if (m_error) { | |
// If error is already set, then the parser is in the "bail-out" mode. Don't | |
// do anything and keep the original error. | |
return; | |
} | |
m_error = llvm::make_error<DILDiagnosticError>( | |
code, m_input_expr, error, loc, CurToken().GetSpelling().length())); |
(This assumes other changes I suggested in the previous round)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
DILLexer m_dil_lexer; | ||
|
||
// Holds an error if it occures during parsing. | ||
Status &m_error; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
such as this
Status error; | ||
DILParser parser(dil_input_expr, lexer, frame_sp, use_dynamic, use_synthetic, | ||
fragile_ivar, check_ptr_vs_member, error); | ||
return parser.Run(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
and this
: ErrorInfo(std::error_code(EINVAL, std::generic_category())), | ||
m_details({detail}) {} | ||
|
||
DILDiagnosticError(ErrorCode ec, llvm::StringRef expr, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you plan to use the error code for something? If yes, it should be stored somewhere. Otherwise -- delete it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well.. kind of, but now the same question can be asked about the BailOut function. If you don't plan to use the error code code for anything, I think we should delete all references to it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, will do.
Use llvm::Error rather than lldb::Status. Use llvm::make_error to convert DILDiagnosticError to llvm::Error. Some minor code cleanups.
// This is needed, at the moment, because the code that calls the 'frame | ||
// var' implementation from CommandObjectFrame ONLY passes Status in/out | ||
// as a way to determine if an error occurred. We probably want to change that | ||
// in the future. | ||
Status GetStatusError(DILDiagnosticError dil_error); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This shouldn't be necessary the llvm::Error (which can wrap any ErrorInfo object) can be converted to a Status. It also appears to be unused.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Whoops this was leftover from a previous iteration working on the errors. You are right, it is no longer needed: I will remove it.
void DILParser::BailOut(Status error) { | ||
if (m_error) | ||
// If error is already set, then the parser is in the "bail-out" mode. Don't | ||
// do anything and keep the original error. | ||
return; | ||
|
||
m_error = error.ToError(); | ||
// Advance the lexer token index to the end of the lexed tokens vector. | ||
m_dil_lexer.ResetTokenIdx(m_dil_lexer.NumLexedTokens() - 1); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this used? If yes, what for?
(If it is used, don't try removing it yet. I just want know the purpose for now.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will prevent the parser from attempting to parse anything else after calling BailOut.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean the whole function -- I don't see the Status overload being called anywhere.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, I understand. After looking through the current full DIL implementation, I can't see this being called anywhere, so it must be leftover from an earlier version of the implementation. I will remove it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
return; | ||
|
||
m_error = llvm::make_error<DILDiagnosticError>( | ||
m_input_expr, error, loc, CurToken().GetSpelling().length()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems very fragile as you're taking the location from the function argument, but length from the CurToken
member function. I think both should come from the same place. If you think the error is always to be on the current token (seems to be the case now, but maybe it doesn't always have to be true), then you don't need the loc
argument. Otherwise, both of these things should be function arguments (either in the form of a Token
, or as two separate loc+length arguments -- in case you want to underline things that aren't tokens).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do expect the error to always be on the current token, but we do need the loc
parameter: That is supposed to indicate the starting position of the error (the 'bad' token) within the overall input expression. The current token does not have this information. It might be able to tell which token it is (first, second, etc.), but it won't know the lengths of each of the preceding tokens (plus any spaces between them), so it won't know where it's starting position is in the overall input string.
If you would prefer, I can update this BailOut function to take the length-to-underline (currently CurToken().GetSpelling().Length()
) as an input parameter that it then passes to DILDiagnosticError. I think you indicated that you would find that acceptable?
Remove unused function. Removed unused error code parameter for parser's BailOut function. Add err_len parameter to BailOut function, to be passed on to DILDiagnosticError.
I think I've addressed all review comments. Please take another look at this. Thanks! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, I think we're ready now. Thank you for your patience. Just a couple of last minute cosmetic tweaks here. I've added a some include statements to make the headers more self contained, but I'm sure I've missed some.
#define LLDB_VALUEOBJECT_DILAST_H | ||
|
||
#include "lldb/ValueObject/ValueObject.h" | ||
#include <string> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#include <string> | |
#include <string> | |
#include <cstdint> | |
#include "llvm/Support/Error.h" |
#include "lldb/ValueObject/DILAST.h" | ||
#include "lldb/ValueObject/DILParser.h" | ||
#include <memory> | ||
#include <vector> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#include <vector> | |
#include <vector> | |
#include "llvm/Support/StringRef.h" | |
#include "llvm/Support/Error.h" |
lldb/source/ValueObject/DILEval.cpp
Outdated
m_exe_ctx_scope(frame_sp) {} | ||
|
||
llvm::Expected<lldb::ValueObjectSP> | ||
Interpreter::DILEvalNode(const ASTNode *node) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
already in the dil namespace..
Interpreter::DILEvalNode(const ASTNode *node) { | |
Interpreter::EvalNode(const ASTNode *node) { |
DiagnosticDetail detail; | ||
detail.source_location = sloc; | ||
detail.severity = lldb::eSeverityError; | ||
detail.message = message; | ||
detail.rendered = rendered_msg; | ||
m_detail = std::move(detail); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
DiagnosticDetail detail; | |
detail.source_location = sloc; | |
detail.severity = lldb::eSeverityError; | |
detail.message = message; | |
detail.rendered = rendered_msg; | |
m_detail = std::move(detail); | |
m_detail.source_location = sloc; | |
m_detail.severity = lldb::eSeverityError; | |
m_detail.message = message; | |
m_detail.rendered = std::move(rendered_msg); |
// The first token in nested_name_specifier is always an identifier, or | ||
// '(anonymous namespace)'. | ||
if (CurToken().IsNot(Token::identifier) && CurToken().IsNot(Token::l_paren)) | ||
return ""; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider pattern like:
switch(CurToken().GetKind())
case Token::identifier:
...
return something;
case Token::l_paren:
...
default:
return "";
}
(the motivation is to remove duplicate checks for token type)
} | ||
|
||
// Try parsing optional nested_name_specifier. | ||
auto nested_name_specifier = ParseNestedNameSpecifier(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto nested_name_specifier = ParseNestedNameSpecifier(); | |
std::string nested_name_specifier = ParseNestedNameSpecifier(); |
llvm is relatively conservative when it comes to auto
. In this case it's locally unclear whether the variable type is string, StringRef, const char *, etc..
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/13991 Here is the relevant piece of the build log for the reference
|
Add the Data Inspection Language (DIL) implementation pieces for handling plain local and global variable names.
See https://discourse.llvm.org/t/rfc-data-inspection-language/69893 for information about DIL.
This change includes the basic AST, Lexer, Parser and Evaluator pieces, as well as some tests.