Skip to content

Couldn't create custom Provider #86

@suspectinside

Description

@suspectinside

Hi, just sample setup:

# ================= Providers pom/page_input_providers/providers.py
import logging
from collections.abc import Callable, Sequence
from scrapy_poet.page_input_providers import PageObjectInputProvider
from scrapy.settings import Settings

logger = logging.getLogger()
logger.setLevel(logging.INFO)

class Arq:
	async def enqueue_task(task: dict):
		logger.info('Arq.enqueue_task() enqueueing new task: %r', task)

class ArqProvider(PageObjectInputProvider):
	provided_classes = {Arq}
	name = 'ARQ_PROVIDER'
	
	async def __call__(self, to_provide: set[Callable]) -> Sequence[Callable]:
		return [Arq()]
# ================= Page Object Models
import attr
from web_poet.pages import Injectable, WebPage, ItemWebPage
from pom.page_input_providers.providers import Arq

@attr.define
class IndexPage(WebPage):
	arq: Arq

	@property
	async def page_titles(self):
		await self.arq.enqueue_task({'bla': 'bla!'})

		return [
			(el.attrib['href'], el.css('::text').get())
			for el in self.css('.selected a.reference.external')
		]

Injectable entity - arq: Arq. So, i'd like to work with arq instance here.

# ================= the Spider
import uvloop, asyncio, pprint, logging
import scrapy
from scrapy.utils.reactor import install_reactor
from scrapy.http import HtmlResponse
from pom.util import stop_logging, wait
from pom.poms.pages import IndexPage
from pom.page_input_providers.providers import ArqProvider

import web_poet as wp

from scrapy_poet.page_input_providers import HttpClientProvider, PageParamsProvider

stop_logging()
uvloop.install()
install_reactor('twisted.internet.asyncioreactor.AsyncioSelectorReactor', 'uvloop.Loop')

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# ================= Actual Spider Code:

 TitlesLocalSpider(scrapy.Spider):
    name = 'titles.local'
    start_urls = ['http://localhost:8080/orm/join_conditions.html']
    
    custom_settings = {
        'SCRAPY_POET_PROVIDERS': {
            ArqProvider: 500,    # MY PROVIDER FOR INJECTABLE arq: Arq
        },
    }

    @classmethod
    def from_crawler(cls, crawler, *args, **kwargs):
        stop_logging()
        logger.info('=' * 30)
        return super().from_crawler(crawler, *args, **kwargs)

    async def parse(self, response, index_page: IndexPage, **kwargs):
        self.logger.info(await index_page.page_titles)

and i got the error like this:

Unhandled error in Deferred:

Traceback (most recent call last):
  File "~/.venv/lib/python3.10/site-packages/scrapy/crawler.py", line 205, in crawl
    return self._crawl(crawler, *args, **kwargs)
  File "~/.venv/lib/python3.10/site-packages/scrapy/crawler.py", line 209, in _crawl
    d = crawler.crawl(*args, **kwargs)
  File "~/.venv/lib/python3.10/site-packages/twisted/internet/defer.py", line 1946, in unwindGenerator
    return _cancellableInlineCallbacks(gen)
  File "~/.venv/lib/python3.10/site-packages/twisted/internet/defer.py", line 1856, in _cancellableInlineCallbacks
    _inlineCallbacks(None, gen, status, _copy_context())
--- <exception caught here> ---
  File "~/.venv/lib/python3.10/site-packages/twisted/internet/defer.py", line 1696, in _inlineCallbacks
    result = context.run(gen.send, result)
  File "~/.venv/lib/python3.10/site-packages/scrapy/crawler.py", line 101, in crawl
    self.engine = self._create_engine()
  File "~/.venv/lib/python3.10/site-packages/scrapy/crawler.py", line 115, in _create_engine
    return ExecutionEngine(self, lambda _: self.stop())
  File "~/.venv/lib/python3.10/site-packages/scrapy/core/engine.py", line 83, in __init__
    self.downloader = downloader_cls(crawler)
  File "~/.venv/lib/python3.10/site-packages/scrapy/core/downloader/__init__.py", line 83, in __init__
    self.middleware = DownloaderMiddlewareManager.from_crawler(crawler)
  File "~/.venv/lib/python3.10/site-packages/scrapy/middleware.py", line 59, in from_crawler
    return cls.from_settings(crawler.settings, crawler)
  File "~/.venv/lib/python3.10/site-packages/scrapy/middleware.py", line 41, in from_settings
    mw = create_instance(mwcls, settings, crawler)
  File "~/.venv/lib/python3.10/site-packages/scrapy/utils/misc.py", line 166, in create_instance
    instance = objcls.from_crawler(crawler, *args, **kwargs)
  File "~/.venv/lib/python3.10/site-packages/scrapy_poet/downloadermiddlewares.py", line 62, in from_crawler
    o = cls(crawler)
  File "~/.venv/lib/python3.10/site-packages/scrapy_poet/downloadermiddlewares.py", line 52, in __init__
    self.injector = Injector(
  File "~/.venv/lib/python3.10/site-packages/scrapy_poet/injection.py", line 50, in __init__
    self.load_providers(default_providers)
  File "~/.venv/lib/python3.10/site-packages/scrapy_poet/injection.py", line 63, in load_providers
    self.is_provider_requiring_scrapy_response = {
  File "~/.venv/lib/python3.10/site-packages/scrapy_poet/injection.py", line 64, in <dictcomp>
    provider: is_provider_requiring_scrapy_response(provider)
  File "~/.venv/lib/python3.10/site-packages/scrapy_poet/injection.py", line 348, in is_provider_requiring_scrapy_response
    plan = andi.plan(
  File "~/.venv/lib/python3.10/site-packages/andi/andi.py", line 303, in plan
    plan, _ = _plan(class_or_func,
  File "~/.venv/lib/python3.10/site-packages/andi/andi.py", line 341, in _plan
    sel_cls, arg_overrides = _select_type(
  File "~/.venv/lib/python3.10/site-packages/andi/andi.py", line 395, in _select_type
    if is_injectable(candidate) or externally_provided(candidate):
  File "~/.venv/lib/python3.10/site-packages/web_poet/pages.py", line 34, in is_injectable
    return isinstance(cls, type) and issubclass(cls, Injectable)
  File "/usr/lib/python3.10/abc.py", line 123, in __subclasscheck__
    return _abc_subclasscheck(cls, subclass)
builtins.TypeError: issubclass() arg 1 must be a class

So, could you pls explain why this error happens and how to fix it?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions