Skip to content

Commit a2a9a02

Browse files
authored
style(core): more cleanup all around (#33711)
1 parent e5e1d6c commit a2a9a02

12 files changed

Lines changed: 124 additions & 98 deletions

File tree

libs/core/langchain_core/messages/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ class BaseMessage(Serializable):
9393
"""Base abstract message class.
9494
9595
Messages are the inputs and outputs of a chat model.
96+
97+
Examples include [`HumanMessage`][langchain.messages.HumanMessage],
98+
[`AIMessage`][langchain.messages.AIMessage], and
99+
[`SystemMessage`][langchain.messages.SystemMessage].
96100
"""
97101

98102
content: str | list[str | dict]

libs/core/langchain_core/output_parsers/__init__.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,20 @@
1-
"""**OutputParser** classes parse the output of an LLM call."""
1+
"""`OutputParser` classes parse the output of an LLM call into structured data.
2+
3+
!!! tip "Structured output"
4+
5+
Output parsers emerged as an early solution to the challenge of obtaining structured
6+
output from LLMs.
7+
8+
Today, most LLMs support [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
9+
natively. In such cases, using output parsers may be unnecessary, and you should
10+
leverage the model's built-in capabilities for structured output. Refer to the
11+
[documentation of your chosen model](https://docs.langchain.com/oss/python/integrations/providers/overview)
12+
for guidance on how to achieve structured output directly.
13+
14+
Output parsers remain valuable when working with models that do not support
15+
structured output natively, or when you require additional processing or validation
16+
of the model's output beyond its inherent capabilities.
17+
"""
218

319
from typing import TYPE_CHECKING
420

libs/core/langchain_core/output_parsers/base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ class BaseOutputParser(
135135
136136
Example:
137137
```python
138+
# Implement a simple boolean output parser
139+
140+
138141
class BooleanOutputParser(BaseOutputParser[bool]):
139142
true_val: str = "YES"
140143
false_val: str = "NO"

libs/core/langchain_core/output_parsers/json.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,14 @@
3131
class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
3232
"""Parse the output of an LLM call to a JSON object.
3333
34+
Probably the most reliable output parser for getting structured data that does *not*
35+
use function calling.
36+
3437
When used in streaming mode, it will yield partial JSON objects containing
3538
all the keys that have been returned so far.
3639
37-
In streaming, if `diff` is set to `True`, yields JSONPatch operations
38-
describing the difference between the previous and the current object.
40+
In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the
41+
difference between the previous and the current object.
3942
"""
4043

4144
pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type]

libs/core/langchain_core/output_parsers/list.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def droplastn(
4141

4242

4343
class ListOutputParser(BaseTransformOutputParser[list[str]]):
44-
"""Parse the output of an LLM call to a list."""
44+
"""Parse the output of a model to a list."""
4545

4646
@property
4747
def _type(self) -> str:
@@ -74,30 +74,30 @@ def _transform(self, input: Iterator[str | BaseMessage]) -> Iterator[list[str]]:
7474
buffer = ""
7575
for chunk in input:
7676
if isinstance(chunk, BaseMessage):
77-
# extract text
77+
# Extract text
7878
chunk_content = chunk.content
7979
if not isinstance(chunk_content, str):
8080
continue
8181
buffer += chunk_content
8282
else:
83-
# add current chunk to buffer
83+
# Add current chunk to buffer
8484
buffer += chunk
85-
# parse buffer into a list of parts
85+
# Parse buffer into a list of parts
8686
try:
8787
done_idx = 0
88-
# yield only complete parts
88+
# Yield only complete parts
8989
for m in droplastn(self.parse_iter(buffer), 1):
9090
done_idx = m.end()
9191
yield [m.group(1)]
9292
buffer = buffer[done_idx:]
9393
except NotImplementedError:
9494
parts = self.parse(buffer)
95-
# yield only complete parts
95+
# Yield only complete parts
9696
if len(parts) > 1:
9797
for part in parts[:-1]:
9898
yield [part]
9999
buffer = parts[-1]
100-
# yield the last part
100+
# Yield the last part
101101
for part in self.parse(buffer):
102102
yield [part]
103103

@@ -108,40 +108,40 @@ async def _atransform(
108108
buffer = ""
109109
async for chunk in input:
110110
if isinstance(chunk, BaseMessage):
111-
# extract text
111+
# Extract text
112112
chunk_content = chunk.content
113113
if not isinstance(chunk_content, str):
114114
continue
115115
buffer += chunk_content
116116
else:
117-
# add current chunk to buffer
117+
# Add current chunk to buffer
118118
buffer += chunk
119-
# parse buffer into a list of parts
119+
# Parse buffer into a list of parts
120120
try:
121121
done_idx = 0
122-
# yield only complete parts
122+
# Yield only complete parts
123123
for m in droplastn(self.parse_iter(buffer), 1):
124124
done_idx = m.end()
125125
yield [m.group(1)]
126126
buffer = buffer[done_idx:]
127127
except NotImplementedError:
128128
parts = self.parse(buffer)
129-
# yield only complete parts
129+
# Yield only complete parts
130130
if len(parts) > 1:
131131
for part in parts[:-1]:
132132
yield [part]
133133
buffer = parts[-1]
134-
# yield the last part
134+
# Yield the last part
135135
for part in self.parse(buffer):
136136
yield [part]
137137

138138

139139
class CommaSeparatedListOutputParser(ListOutputParser):
140-
"""Parse the output of an LLM call to a comma-separated list."""
140+
"""Parse the output of a model to a comma-separated list."""
141141

142142
@classmethod
143143
def is_lc_serializable(cls) -> bool:
144-
"""Return True as this class is serializable."""
144+
"""Return `True` as this class is serializable."""
145145
return True
146146

147147
@classmethod
@@ -177,7 +177,7 @@ def parse(self, text: str) -> list[str]:
177177
)
178178
return [item for sublist in reader for item in sublist]
179179
except csv.Error:
180-
# keep old logic for backup
180+
# Keep old logic for backup
181181
return [part.strip() for part in text.split(",")]
182182

183183
@property

libs/core/langchain_core/output_parsers/openai_tools.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def parse_result(self, result: list[Generation], *, partial: bool = False) -> An
224224
result: The result of the LLM call.
225225
partial: Whether to parse partial JSON.
226226
If `True`, the output will be a JSON object containing
227-
all the keys that have been returned so far.
227+
all the keys that have been returned so far.
228228
If `False`, the output will be the full JSON object.
229229
230230
Raises:
@@ -307,7 +307,7 @@ def parse_result(self, result: list[Generation], *, partial: bool = False) -> An
307307
result: The result of the LLM call.
308308
partial: Whether to parse partial JSON.
309309
If `True`, the output will be a JSON object containing
310-
all the keys that have been returned so far.
310+
all the keys that have been returned so far.
311311
If `False`, the output will be the full JSON object.
312312
313313
Returns:

libs/core/langchain_core/output_parsers/string.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66

77

88
class StrOutputParser(BaseTransformOutputParser[str]):
9-
"""OutputParser that parses LLMResult into the top likely string."""
9+
"""OutputParser that parses `LLMResult` into the top likely string."""
1010

1111
@classmethod
1212
def is_lc_serializable(cls) -> bool:
13-
"""StrOutputParser is serializable.
13+
"""`StrOutputParser` is serializable.
1414
1515
Returns:
16-
True
16+
`True`
1717
"""
1818
return True
1919

libs/core/langchain_core/output_parsers/xml.py

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -43,19 +43,19 @@ class _StreamingParser:
4343
"""Streaming parser for XML.
4444
4545
This implementation is pulled into a class to avoid implementation
46-
drift between transform and atransform of the XMLOutputParser.
46+
drift between transform and atransform of the `XMLOutputParser`.
4747
"""
4848

4949
def __init__(self, parser: Literal["defusedxml", "xml"]) -> None:
5050
"""Initialize the streaming parser.
5151
5252
Args:
53-
parser: Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
54-
See documentation in XMLOutputParser for more information.
53+
parser: Parser to use for XML parsing. Can be either `'defusedxml'` or
54+
`'xml'`. See documentation in `XMLOutputParser` for more information.
5555
5656
Raises:
57-
ImportError: If defusedxml is not installed and the defusedxml
58-
parser is requested.
57+
ImportError: If `defusedxml` is not installed and the `defusedxml` parser is
58+
requested.
5959
"""
6060
if parser == "defusedxml":
6161
if not _HAS_DEFUSEDXML:
@@ -79,10 +79,10 @@ def parse(self, chunk: str | BaseMessage) -> Iterator[AddableDict]:
7979
"""Parse a chunk of text.
8080
8181
Args:
82-
chunk: A chunk of text to parse. This can be a string or a BaseMessage.
82+
chunk: A chunk of text to parse. This can be a `str` or a `BaseMessage`.
8383
8484
Yields:
85-
A dictionary representing the parsed XML element.
85+
A `dict` representing the parsed XML element.
8686
8787
Raises:
8888
xml.etree.ElementTree.ParseError: If the XML is not well-formed.
@@ -147,46 +147,49 @@ def close(self) -> None:
147147

148148

149149
class XMLOutputParser(BaseTransformOutputParser):
150-
"""Parse an output using xml format."""
150+
"""Parse an output using xml format.
151+
152+
Returns a dictionary of tags.
153+
"""
151154

152155
tags: list[str] | None = None
153156
"""Tags to tell the LLM to expect in the XML output.
154157
155158
Note this may not be perfect depending on the LLM implementation.
156159
157-
For example, with tags=["foo", "bar", "baz"]:
160+
For example, with `tags=["foo", "bar", "baz"]`:
158161
159162
1. A well-formatted XML instance:
160-
"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"
163+
`"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"`
161164
162165
2. A badly-formatted XML instance (missing closing tag for 'bar'):
163-
"<foo>\n <bar>\n </foo>"
166+
`"<foo>\n <bar>\n </foo>"`
164167
165168
3. A badly-formatted XML instance (unexpected 'tag' element):
166-
"<foo>\n <tag>\n </tag>\n</foo>"
169+
`"<foo>\n <tag>\n </tag>\n</foo>"`
167170
"""
168171
encoding_matcher: re.Pattern = re.compile(
169172
r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL
170173
)
171174
parser: Literal["defusedxml", "xml"] = "defusedxml"
172-
"""Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
175+
"""Parser to use for XML parsing. Can be either `'defusedxml'` or `'xml'`.
173176
174-
* 'defusedxml' is the default parser and is used to prevent XML vulnerabilities
175-
present in some distributions of Python's standard library xml.
176-
`defusedxml` is a wrapper around the standard library parser that
177-
sets up the parser with secure defaults.
178-
* 'xml' is the standard library parser.
177+
* `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities
178+
present in some distributions of Python's standard library xml.
179+
`defusedxml` is a wrapper around the standard library parser that
180+
sets up the parser with secure defaults.
181+
* `'xml'` is the standard library parser.
179182
180-
Use `xml` only if you are sure that your distribution of the standard library
181-
is not vulnerable to XML vulnerabilities.
183+
Use `xml` only if you are sure that your distribution of the standard library is not
184+
vulnerable to XML vulnerabilities.
182185
183186
Please review the following resources for more information:
184187
185188
* https://docs.python.org/3/library/xml.html#xml-vulnerabilities
186189
* https://github.com/tiran/defusedxml
187190
188-
The standard library relies on libexpat for parsing XML:
189-
https://github.com/libexpat/libexpat
191+
The standard library relies on [`libexpat`](https://github.com/libexpat/libexpat)
192+
for parsing XML.
190193
"""
191194

192195
def get_format_instructions(self) -> str:
@@ -200,12 +203,12 @@ def parse(self, text: str) -> dict[str, str | list[Any]]:
200203
text: The output of an LLM call.
201204
202205
Returns:
203-
A dictionary representing the parsed XML.
206+
A `dict` representing the parsed XML.
204207
205208
Raises:
206209
OutputParserException: If the XML is not well-formed.
207-
ImportError: If defusedxml is not installed and the defusedxml
208-
parser is requested.
210+
ImportError: If defus`edxml is not installed and the `defusedxml` parser is
211+
requested.
209212
"""
210213
# Try to find XML string within triple backticks
211214
# Imports are temporarily placed here to avoid issue with caching on CI

0 commit comments

Comments
 (0)