Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Base Solver Classes #80

Merged
merged 6 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 3 additions & 28 deletions playwright_recaptcha/recaptchav2/async_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import asyncio
import base64
import functools
import os
import random
import re
from concurrent.futures import ThreadPoolExecutor
Expand All @@ -12,7 +11,7 @@
from typing import Any, BinaryIO, Dict, Iterable, List, Optional, Union

import speech_recognition
from playwright.async_api import APIResponse, Locator, Page, Response
from playwright.async_api import Locator, Page, Response
from pydub import AudioSegment
from tenacity import (
AsyncRetrying,
Expand All @@ -27,6 +26,7 @@
RecaptchaRateLimitError,
RecaptchaSolveError,
)
from .base_solver import BaseSolver
from .recaptcha_box import AsyncRecaptchaBox
from .translations import TRANSLATIONS

Expand Down Expand Up @@ -61,7 +61,7 @@ async def __aexit__(self, *args: Any) -> None:
await self._loop.run_in_executor(self._executor, self.__exit__, *args)


class AsyncSolver:
class AsyncSolver(BaseSolver[Page]):
"""
A class for solving reCAPTCHA v2 asynchronously with Playwright.

Expand All @@ -76,24 +76,6 @@ class AsyncSolver:
If None, the `CAPSOLVER_API_KEY` environment variable will be used.
"""

def __init__(
self, page: Page, *, attempts: int = 5, capsolver_api_key: Optional[str] = None
) -> None:
self._page = page
self._attempts = attempts
self._capsolver_api_key = capsolver_api_key or os.getenv("CAPSOLVER_API_KEY")

self._token: Optional[str] = None
self._payload_response: Union[APIResponse, Response, None] = None
self._page.on("response", self._response_callback)

def __repr__(self) -> str:
return (
f"AsyncSolver(page={self._page!r}, "
f"attempts={self._attempts!r}, "
f"capsolver_api_key={self._capsolver_api_key!r})"
)

async def __aenter__(self) -> AsyncSolver:
return self

Expand Down Expand Up @@ -547,13 +529,6 @@ async def _solve_audio_challenge(self, recaptcha_box: AsyncRecaptchaBox) -> None

await self._submit_audio_text(recaptcha_box, text)

def close(self) -> None:
"""Remove the response listener."""
try:
self._page.remove_listener("response", self._response_callback)
except KeyError:
pass

async def recaptcha_is_visible(self) -> bool:
"""
Check if a reCAPTCHA challenge or unchecked reCAPTCHA box is visible.
Expand Down
313 changes: 313 additions & 0 deletions playwright_recaptcha/recaptchav2/base_solver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,313 @@
import os
from abc import ABC, abstractmethod
from typing import Any, Dict, Generic, Iterable, Optional, TypeVar, Union

from playwright.async_api import APIResponse as AsyncAPIResponse
from playwright.async_api import Page as AsyncPage
from playwright.async_api import Response as AsyncResponse
from playwright.sync_api import APIResponse as SyncAPIResponse
from playwright.sync_api import Page as SyncPage
from playwright.sync_api import Response as SyncResponse

from .recaptcha_box import RecaptchaBox

PageT = TypeVar("PageT", AsyncPage, SyncPage)
APIResponse = Union[AsyncAPIResponse, SyncAPIResponse]
Response = Union[AsyncResponse, SyncResponse]


class BaseSolver(ABC, Generic[PageT]):
"""
The base class for reCAPTCHA v2 solvers.

Parameters
----------
page : PageT
The Playwright page to solve the reCAPTCHA on.
attempts : int, optional
The number of solve attempts, by default 5.
capsolver_api_key : Optional[str], optional
The CapSolver API key, by default None.
If None, the `CAPSOLVER_API_KEY` environment variable will be used.
"""

def __init__(
self, page: PageT, *, attempts: int = 5, capsolver_api_key: Optional[str] = None
) -> None:
self._page = page
self._attempts = attempts
self._capsolver_api_key = capsolver_api_key or os.getenv("CAPSOLVER_API_KEY")

self._token: Optional[str] = None
self._payload_response: Union[APIResponse, Response, None] = None
self._page.on("response", self._response_callback)

def __repr__(self) -> str:
return (
f"{self.__class__.__name__}(page={self._page!r}, "
f"attempts={self._attempts!r}, "
f"capsolver_api_key={self._capsolver_api_key!r})"
)

def close(self) -> None:
"""Remove the response listener."""
try:
self._page.remove_listener("response", self._response_callback)
except KeyError:
pass

@staticmethod
@abstractmethod
def _get_task_object(recaptcha_box: RecaptchaBox) -> Optional[str]:
"""
Get the ID of the object in the reCAPTCHA image challenge task.

Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.

Returns
-------
Optional[str]
The object ID. Returns None if the task object is not recognized.
"""

@abstractmethod
def _response_callback(self, response: Response) -> None:
"""
The callback for intercepting payload and userverify responses.

Parameters
----------
response : Response
The response.
"""

@abstractmethod
def _random_delay(self, short: bool = True) -> None:
"""
Delay the browser for a random amount of time.

Parameters
----------
short : bool, optional
Whether to delay for a short amount of time, by default True.
"""

@abstractmethod
def _get_capsolver_response(
self, recaptcha_box: RecaptchaBox, image_data: bytes
) -> Optional[Dict[str, Any]]:
"""
Get the CapSolver JSON response for an image.

Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.
image_data : bytes
The image data.

Returns
-------
Optional[Dict[str, Any]]
The CapSolver JSON response.
Returns None if the task object is not recognized.

Raises
------
CapSolverError
If the CapSolver API returned an error.
"""

@abstractmethod
def _solve_tiles(self, recaptcha_box: RecaptchaBox, indexes: Iterable[int]) -> None:
"""
Solve the tiles in the reCAPTCHA image challenge.

Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.
indexes : Iterable[int]
The indexes of the tiles that contain the task object.

Raises
------
CapSolverError
If the CapSolver API returned an error.
"""

@abstractmethod
def _convert_audio_to_text(self, audio_url: str) -> Optional[str]:
"""
Convert the reCAPTCHA audio to text.

Parameters
----------
audio_url : str
The reCAPTCHA audio URL.

Returns
-------
Optional[str]
The reCAPTCHA audio text. Returns None if the audio could not be converted.
"""

@abstractmethod
def _click_checkbox(self, recaptcha_box: RecaptchaBox) -> None:
"""
Click the reCAPTCHA checkbox.

Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.

Raises
------
RecaptchaRateLimitError
If the reCAPTCHA rate limit has been exceeded.
"""

@abstractmethod
def _get_audio_url(self, recaptcha_box: RecaptchaBox) -> str:
"""
Get the reCAPTCHA audio URL.

Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.

Returns
-------
str
The reCAPTCHA audio URL.

Raises
------
RecaptchaRateLimitError
If the reCAPTCHA rate limit has been exceeded.
"""

@abstractmethod
def _submit_audio_text(self, recaptcha_box: RecaptchaBox, text: str) -> None:
"""
Submit the reCAPTCHA audio text.

Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.
text : str
The reCAPTCHA audio text.

Raises
------
RecaptchaRateLimitError
If the reCAPTCHA rate limit has been exceeded.
"""

@abstractmethod
def _submit_tile_answers(self, recaptcha_box: RecaptchaBox) -> None:
"""
Submit the reCAPTCHA image challenge tile answers.

Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.

Raises
------
RecaptchaRateLimitError
If the reCAPTCHA rate limit has been exceeded.
"""

@abstractmethod
def _solve_image_challenge(self, recaptcha_box: RecaptchaBox) -> None:
"""
Solve the reCAPTCHA image challenge.

Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.

Raises
------
CapSolverError
If the CapSolver API returned an error.
RecaptchaRateLimitError
If the reCAPTCHA rate limit has been exceeded.
"""

@abstractmethod
def _solve_audio_challenge(self, recaptcha_box: RecaptchaBox) -> None:
"""
Solve the reCAPTCHA audio challenge.

Parameters
----------
recaptcha_box : RecaptchaBox
The reCAPTCHA box.

Raises
------
RecaptchaRateLimitError
If the reCAPTCHA rate limit has been exceeded.
"""

@abstractmethod
def recaptcha_is_visible(self) -> bool:
"""
Check if a reCAPTCHA challenge or unchecked reCAPTCHA box is visible.

Returns
-------
bool
Whether a reCAPTCHA challenge or unchecked reCAPTCHA box is visible.
"""

@abstractmethod
def solve_recaptcha(
self,
*,
attempts: Optional[int] = None,
wait: bool = False,
wait_timeout: float = 30,
image_challenge: bool = False,
) -> str:
"""
Solve the reCAPTCHA and return the `g-recaptcha-response` token.

Parameters
----------
attempts : Optional[int], optional
The number of solve attempts, by default 5.
wait : bool, optional
Whether to wait for the reCAPTCHA to appear, by default False.
wait_timeout : float, optional
The amount of time in seconds to wait for the reCAPTCHA to appear,
by default 30. Only used if `wait` is True.
image_challenge : bool, optional
Whether to solve the image challenge, by default False.

Returns
-------
str
The `g-recaptcha-response` token.

Raises
------
CapSolverError
If the CapSolver API returned an error.
RecaptchaNotFoundError
If the reCAPTCHA was not found.
RecaptchaRateLimitError
If the reCAPTCHA rate limit has been exceeded.
RecaptchaSolveError
If the reCAPTCHA could not be solved.
"""
Loading
Loading