Skip to content

Commit ea0ef1a

Browse files
further-readingVMRuiz
authored andcommitted
Adding use of ItemAdapter to prevent assumptions of item nature
#353
1 parent b7b4e0e commit ea0ef1a

File tree

2 files changed

+11
-21
lines changed

2 files changed

+11
-21
lines changed

spidermon/contrib/scrapy/extensions.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from scrapy.exceptions import NotConfigured
33
from scrapy.utils.misc import load_object
44
from twisted.internet.task import LoopingCall
5+
from itemadapter import ItemAdapter
56

67
from spidermon import MonitorSuite
78
from spidermon.contrib.scrapy.runners import SpiderMonitorRunner
@@ -137,7 +138,8 @@ def _count_item(self, item, skip_none_values, item_count_stat=None):
137138
item_count_stat = f"spidermon_item_scraped_count/{item_type}"
138139
self.crawler.stats.inc_value(item_count_stat)
139140

140-
for field_name, value in item.items():
141+
data = ItemAdapter(item)
142+
for field_name, value in data.asdict().items():
141143
if skip_none_values and value is None:
142144
continue
143145

spidermon/contrib/scrapy/pipelines.py

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
1-
import json
2-
from io import BytesIO
31
from collections import defaultdict
2+
from itemadapter import ItemAdapter
43

54
from scrapy.exceptions import DropItem, NotConfigured
65
from scrapy.utils.misc import load_object
7-
from scrapy.exporters import JsonLinesItemExporter
86
from scrapy import Field, Item
9-
from scrapy.utils.python import to_unicode
107

118
from spidermon.contrib.validation import SchematicsValidator, JSONSchemaValidator
129
from spidermon.contrib.validation.jsonschema.tools import get_schema_from
@@ -136,25 +133,16 @@ def find_validators(self, item):
136133
return find(item.__class__) or find(Item)
137134

138135
def _convert_item_to_dict(self, item):
139-
serialized_json = BytesIO()
140-
exporter = JsonLinesItemExporter(serialized_json)
141-
exporter.export_item(item)
142-
data = json.loads(to_unicode(serialized_json.getvalue(), exporter.encoding))
143-
serialized_json.close()
144-
return data
136+
data = ItemAdapter(item)
137+
return data.asdict()
145138

146139
def _add_errors_to_item(self, item, errors):
147-
try:
148-
if self.errors_field not in item.__class__.fields:
149-
item.__class__.fields[self.errors_field] = Field()
150-
if self.errors_field not in item._values:
151-
item[self.errors_field] = defaultdict(list)
152-
except AttributeError:
153-
# The item is just a dict object instead of a Scrapy.Item object
154-
if self.errors_field not in item:
155-
item[self.errors_field] = defaultdict(list)
140+
data = ItemAdapter(item)
141+
if self.errors_field not in data.keys():
142+
item[self.errors_field] = defaultdict(list)
143+
156144
for field_name, messages in errors.items():
157-
item[self.errors_field][field_name] += messages
145+
data[self.errors_field][field_name] += messages
158146

159147
def _drop_item(self, item, errors):
160148
"""

0 commit comments

Comments
 (0)