style(core): more cleanup all around (#33711)

mdrxy · web-flow · commit a2a9a02ecb55 · 2025-10-28T22:58:19.000-04:00
diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py
@@ -93,6 +93,10 @@ class BaseMessage(Serializable):
     """Base abstract message class.
 
     Messages are the inputs and outputs of a chat model.
+
+    Examples include [`HumanMessage`][langchain.messages.HumanMessage],
+    [`AIMessage`][langchain.messages.AIMessage], and
+    [`SystemMessage`][langchain.messages.SystemMessage].
     """
 
     content: str | list[str | dict]
diff --git a/libs/core/langchain_core/output_parsers/__init__.py b/libs/core/langchain_core/output_parsers/__init__.py
@@ -1,4 +1,20 @@
-"""**OutputParser** classes parse the output of an LLM call."""
+"""`OutputParser` classes parse the output of an LLM call into structured data.
+
+!!! tip "Structured output"
+
+    Output parsers emerged as an early solution to the challenge of obtaining structured
+    output from LLMs.
+
+    Today, most LLMs support [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
+    natively. In such cases, using output parsers may be unnecessary, and you should
+    leverage the model's built-in capabilities for structured output. Refer to the
+    [documentation of your chosen model](https://docs.langchain.com/oss/python/integrations/providers/overview)
+    for guidance on how to achieve structured output directly.
+
+    Output parsers remain valuable when working with models that do not support
+    structured output natively, or when you require additional processing or validation
+    of the model's output beyond its inherent capabilities.
+"""
 
 from typing import TYPE_CHECKING
 
diff --git a/libs/core/langchain_core/output_parsers/base.py b/libs/core/langchain_core/output_parsers/base.py
@@ -135,6 +135,9 @@ class BaseOutputParser(
 
     Example:
         ```python
+        # Implement a simple boolean output parser
+
+
         class BooleanOutputParser(BaseOutputParser[bool]):
             true_val: str = "YES"
             false_val: str = "NO"
diff --git a/libs/core/langchain_core/output_parsers/json.py b/libs/core/langchain_core/output_parsers/json.py
@@ -31,11 +31,14 @@
 class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
     """Parse the output of an LLM call to a JSON object.
 
+    Probably the most reliable output parser for getting structured data that does *not*
+    use function calling.
+
     When used in streaming mode, it will yield partial JSON objects containing
     all the keys that have been returned so far.
 
-    In streaming, if `diff` is set to `True`, yields JSONPatch operations
-    describing the difference between the previous and the current object.
+    In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the
+    difference between the previous and the current object.
     """
 
     pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None  # type: ignore[valid-type]
diff --git a/libs/core/langchain_core/output_parsers/list.py b/libs/core/langchain_core/output_parsers/list.py
@@ -41,7 +41,7 @@ def droplastn(
 
 
 class ListOutputParser(BaseTransformOutputParser[list[str]]):
-    """Parse the output of an LLM call to a list."""
+    """Parse the output of a model to a list."""
 
     @property
     def _type(self) -> str:
@@ -74,30 +74,30 @@ def _transform(self, input: Iterator[str | BaseMessage]) -> Iterator[list[str]]:
         buffer = ""
         for chunk in input:
             if isinstance(chunk, BaseMessage):
-                # extract text
+                # Extract text
                 chunk_content = chunk.content
                 if not isinstance(chunk_content, str):
                     continue
                 buffer += chunk_content
             else:
-                # add current chunk to buffer
+                # Add current chunk to buffer
                 buffer += chunk
-            # parse buffer into a list of parts
+            # Parse buffer into a list of parts
             try:
                 done_idx = 0
-                # yield only complete parts
+                # Yield only complete parts
                 for m in droplastn(self.parse_iter(buffer), 1):
                     done_idx = m.end()
                     yield [m.group(1)]
                 buffer = buffer[done_idx:]
             except NotImplementedError:
                 parts = self.parse(buffer)
-                # yield only complete parts
+                # Yield only complete parts
                 if len(parts) > 1:
                     for part in parts[:-1]:
                         yield [part]
                     buffer = parts[-1]
-        # yield the last part
+        # Yield the last part
         for part in self.parse(buffer):
             yield [part]
 
@@ -108,40 +108,40 @@ async def _atransform(
         buffer = ""
         async for chunk in input:
             if isinstance(chunk, BaseMessage):
-                # extract text
+                # Extract text
                 chunk_content = chunk.content
                 if not isinstance(chunk_content, str):
                     continue
                 buffer += chunk_content
             else:
-                # add current chunk to buffer
+                # Add current chunk to buffer
                 buffer += chunk
-            # parse buffer into a list of parts
+            # Parse buffer into a list of parts
             try:
                 done_idx = 0
-                # yield only complete parts
+                # Yield only complete parts
                 for m in droplastn(self.parse_iter(buffer), 1):
                     done_idx = m.end()
                     yield [m.group(1)]
                 buffer = buffer[done_idx:]
             except NotImplementedError:
                 parts = self.parse(buffer)
-                # yield only complete parts
+                # Yield only complete parts
                 if len(parts) > 1:
                     for part in parts[:-1]:
                         yield [part]
                     buffer = parts[-1]
-        # yield the last part
+        # Yield the last part
         for part in self.parse(buffer):
             yield [part]
 
 
 class CommaSeparatedListOutputParser(ListOutputParser):
-    """Parse the output of an LLM call to a comma-separated list."""
+    """Parse the output of a model to a comma-separated list."""
 
     @classmethod
     def is_lc_serializable(cls) -> bool:
-        """Return True as this class is serializable."""
+        """Return `True` as this class is serializable."""
         return True
 
     @classmethod
@@ -177,7 +177,7 @@ def parse(self, text: str) -> list[str]:
             )
             return [item for sublist in reader for item in sublist]
         except csv.Error:
-            # keep old logic for backup
+            # Keep old logic for backup
             return [part.strip() for part in text.split(",")]
 
     @property
diff --git a/libs/core/langchain_core/output_parsers/openai_tools.py b/libs/core/langchain_core/output_parsers/openai_tools.py
@@ -224,7 +224,7 @@ def parse_result(self, result: list[Generation], *, partial: bool = False) -> An
             result: The result of the LLM call.
             partial: Whether to parse partial JSON.
                 If `True`, the output will be a JSON object containing
-                all the keys that have been returned so far.
+                    all the keys that have been returned so far.
                 If `False`, the output will be the full JSON object.
 
         Raises:
@@ -307,7 +307,7 @@ def parse_result(self, result: list[Generation], *, partial: bool = False) -> An
             result: The result of the LLM call.
             partial: Whether to parse partial JSON.
                 If `True`, the output will be a JSON object containing
-                all the keys that have been returned so far.
+                    all the keys that have been returned so far.
                 If `False`, the output will be the full JSON object.
 
         Returns:
diff --git a/libs/core/langchain_core/output_parsers/string.py b/libs/core/langchain_core/output_parsers/string.py
@@ -6,14 +6,14 @@
 
 
 class StrOutputParser(BaseTransformOutputParser[str]):
-    """OutputParser that parses LLMResult into the top likely string."""
+    """OutputParser that parses `LLMResult` into the top likely string."""
 
     @classmethod
     def is_lc_serializable(cls) -> bool:
-        """StrOutputParser is serializable.
+        """`StrOutputParser` is serializable.
 
         Returns:
-            True
+            `True`
         """
         return True
 
diff --git a/libs/core/langchain_core/output_parsers/xml.py b/libs/core/langchain_core/output_parsers/xml.py
@@ -43,19 +43,19 @@ class _StreamingParser:
     """Streaming parser for XML.
 
     This implementation is pulled into a class to avoid implementation
-    drift between transform and atransform of the XMLOutputParser.
+    drift between transform and atransform of the `XMLOutputParser`.
     """
 
     def __init__(self, parser: Literal["defusedxml", "xml"]) -> None:
         """Initialize the streaming parser.
 
         Args:
-            parser: Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
-              See documentation in XMLOutputParser for more information.
+            parser: Parser to use for XML parsing. Can be either `'defusedxml'` or
+                `'xml'`. See documentation in `XMLOutputParser` for more information.
 
         Raises:
-            ImportError: If defusedxml is not installed and the defusedxml
-                parser is requested.
+            ImportError: If `defusedxml` is not installed and the `defusedxml` parser is
+                requested.
         """
         if parser == "defusedxml":
             if not _HAS_DEFUSEDXML:
@@ -79,10 +79,10 @@ def parse(self, chunk: str | BaseMessage) -> Iterator[AddableDict]:
         """Parse a chunk of text.
 
         Args:
-            chunk: A chunk of text to parse. This can be a string or a BaseMessage.
+            chunk: A chunk of text to parse. This can be a `str` or a `BaseMessage`.
 
         Yields:
-            A dictionary representing the parsed XML element.
+            A `dict` representing the parsed XML element.
 
         Raises:
             xml.etree.ElementTree.ParseError: If the XML is not well-formed.
@@ -147,46 +147,49 @@ def close(self) -> None:
 
 
 class XMLOutputParser(BaseTransformOutputParser):
-    """Parse an output using xml format."""
+    """Parse an output using xml format.
+
+    Returns a dictionary of tags.
+    """
 
     tags: list[str] | None = None
     """Tags to tell the LLM to expect in the XML output.
 
     Note this may not be perfect depending on the LLM implementation.
 
-    For example, with tags=["foo", "bar", "baz"]:
+    For example, with `tags=["foo", "bar", "baz"]`:
 
     1. A well-formatted XML instance:
-       "<foo>\n   <bar>\n      <baz></baz>\n   </bar>\n</foo>"
+        `"<foo>\n   <bar>\n      <baz></baz>\n   </bar>\n</foo>"`
 
     2. A badly-formatted XML instance (missing closing tag for 'bar'):
-       "<foo>\n   <bar>\n   </foo>"
+        `"<foo>\n   <bar>\n   </foo>"`
 
     3. A badly-formatted XML instance (unexpected 'tag' element):
-       "<foo>\n   <tag>\n   </tag>\n</foo>"
+        `"<foo>\n   <tag>\n   </tag>\n</foo>"`
     """
     encoding_matcher: re.Pattern = re.compile(
         r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL
     )
     parser: Literal["defusedxml", "xml"] = "defusedxml"
-    """Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
+    """Parser to use for XML parsing. Can be either `'defusedxml'` or `'xml'`.
 
-    * 'defusedxml' is the default parser and is used to prevent XML vulnerabilities
-       present in some distributions of Python's standard library xml.
-       `defusedxml` is a wrapper around the standard library parser that
-       sets up the parser with secure defaults.
-    * 'xml' is the standard library parser.
+    * `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities
+        present in some distributions of Python's standard library xml.
+        `defusedxml` is a wrapper around the standard library parser that
+        sets up the parser with secure defaults.
+    * `'xml'` is the standard library parser.
 
-    Use `xml` only if you are sure that your distribution of the standard library
-    is not vulnerable to XML vulnerabilities.
+    Use `xml` only if you are sure that your distribution of the standard library is not
+    vulnerable to XML vulnerabilities.
 
     Please review the following resources for more information:
 
     * https://docs.python.org/3/library/xml.html#xml-vulnerabilities
     * https://github.com/tiran/defusedxml
 
-    The standard library relies on libexpat for parsing XML:
-    https://github.com/libexpat/libexpat
+    The standard library relies on [`libexpat`](https://github.com/libexpat/libexpat)
+    for parsing XML.
     """
 
     def get_format_instructions(self) -> str:
@@ -200,12 +203,12 @@ def parse(self, text: str) -> dict[str, str | list[Any]]:
             text: The output of an LLM call.
 
         Returns:
-            A dictionary representing the parsed XML.
+            A `dict` representing the parsed XML.
 
         Raises:
             OutputParserException: If the XML is not well-formed.
-            ImportError: If defusedxml is not installed and the defusedxml
-                parser is requested.
+            ImportError: If defus`edxml is not installed and the `defusedxml` parser is
+                requested.
         """
         # Try to find XML string within triple backticks
         # Imports are temporarily placed here to avoid issue with caching on CI
diff --git a/libs/core/langchain_core/prompts/chat.py b/libs/core/langchain_core/prompts/chat.py
diff --git a/libs/core/langchain_core/tracers/log_stream.py b/libs/core/langchain_core/tracers/log_stream.py
diff --git a/libs/langchain/langchain_classic/agents/agent_toolkits/__init__.py b/libs/langchain/langchain_classic/agents/agent_toolkits/__init__.py
diff --git a/libs/partners/openai/langchain_openai/embeddings/base.py b/libs/partners/openai/langchain_openai/embeddings/base.py