import abc
import inspect
from contextlib import suppress
from functools import wraps
from typing import Any, Generic, Optional, TypeVar, overload
import attr
import parsel
from web_poet.fields import FieldsMixin, item_from_fields
from web_poet.mixins import ResponseShortcutsMixin, SelectorShortcutsMixin
from web_poet.page_inputs import HttpResponse
from web_poet.utils import (
CallableT,
_create_deprecated_class,
cached_method,
get_generic_param,
)
[docs]class Injectable(abc.ABC, FieldsMixin):
"""Base Page Object class, which all Page Objects should inherit from
(probably through Injectable subclasses).
Frameworks which are using ``web-poet`` Page Objects should use
:func:`is_injectable` function to detect if an object is an Injectable,
and if an object is injectable, allow building it automatically
through dependency injection, using https://github.com/scrapinghub/andi
library.
Instead of inheriting you can also use ``Injectable.register(MyWebPage)``.
``Injectable.register`` can also be used as a decorator.
"""
pass
# NoneType is considered as injectable. Required for Optionals to work.
Injectable.register(type(None))
[docs]def is_injectable(cls: Any) -> bool:
"""Return True if ``cls`` is a class which inherits
from :class:`~.Injectable`."""
return isinstance(cls, type) and issubclass(cls, Injectable)
ItemT = TypeVar("ItemT")
[docs]class Returns(Generic[ItemT]):
"""Inherit from this generic mixin to change the item class used by
:class:`~.ItemPage`"""
@property
def item_cls(self) -> type:
"""Item class"""
return get_item_cls(self.__class__, default=dict)
@overload
def get_item_cls(cls: type, default: type) -> type: ...
@overload
def get_item_cls(cls: type, default: None) -> Optional[type]: ...
def get_item_cls(cls: type, default: Optional[type] = None) -> Optional[type]:
param = get_generic_param(cls, Returns)
return param or default
_NOT_SET = object()
def validates_input(to_item: CallableT) -> CallableT:
"""Decorator to apply input validation to custom to_item method
implementations in :class:`~web_poet.pages.ItemPage` subclasses."""
if inspect.iscoroutinefunction(to_item):
@wraps(to_item)
async def _to_item(self, *args, **kwargs):
validation_item = self._validate_input()
if validation_item is not None:
return validation_item
return await to_item(self, *args, **kwargs)
else:
@wraps(to_item)
def _to_item(self, *args, **kwargs):
validation_item = self._validate_input()
if validation_item is not None:
return validation_item
return to_item(self, *args, **kwargs)
return _to_item # type: ignore[return-value]
[docs]class ItemPage(Extractor[ItemT], Injectable):
"""Base class for page objects."""
@cached_method
def _validate_input(self) -> None:
"""Run self.validate_input if defined."""
if not hasattr(self, "validate_input"):
return
with suppress(AttributeError):
if self.__validating_input:
# We are in a recursive call, i.e. _validate_input is being
# called from _validate_input itself (likely through a @field
# method).
return
self.__validating_input: bool = True
validation_item = self.validate_input() # type: ignore[attr-defined]
self.__validating_input = False
return validation_item
[docs] @validates_input
async def to_item(self) -> ItemT:
"""Extract an item from a web page"""
return await super().to_item()
[docs]@attr.s(auto_attribs=True)
class WebPage(ItemPage[ItemT], ResponseShortcutsMixin):
"""Base Page Object which requires :class:`~.HttpResponse`
and provides XPath / CSS shortcuts.
"""
response: HttpResponse
ItemWebPage = _create_deprecated_class("ItemWebPage", WebPage, warn_once=False)