From ccb9e3ee2d4ffde1bb33c6c0df0db87aff3341bf Mon Sep 17 00:00:00 2001 From: Predrag Gruevski <2348618+obi1kenobi@users.noreply.github.com> Date: Wed, 6 Sep 2023 11:15:28 -0400 Subject: [PATCH] Install dev, lint, test, typing extra deps for linting steps. (#10249) `mypy` cannot type-check code that relies on dependencies that aren't installed. Eventually we'll probably want to install as many optional dependencies as possible. However, the full "extended deps" setup for langchain creates a 3GB cache file and takes a while to unpack and install. We'll probably want something a bit more targeted. This is a first step toward something better. --- .github/workflows/_lint.yml | 12 ++++++++++-- .../langchain/document_loaders/url_playwright.py | 10 +++++++++- .../document_loaders/test_url_playwright.py | 4 +++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/.github/workflows/_lint.yml b/.github/workflows/_lint.yml index 1a01b225a00aa..64169ce0be023 100644 --- a/.github/workflows/_lint.yml +++ b/.github/workflows/_lint.yml @@ -87,7 +87,7 @@ jobs: python-version: ${{ matrix.python-version }} poetry-version: ${{ env.POETRY_VERSION }} working-directory: ${{ inputs.working-directory }} - cache-key: lint + cache-key: lint-with-extras - name: Check Poetry File shell: bash @@ -102,9 +102,17 @@ jobs: poetry lock --check - name: Install dependencies + # Also installs dev/lint/test/typing dependencies, to ensure we have + # type hints for as many of our libraries as possible. + # This helps catch errors that require dependencies to be spotted, for example: + # https://github.com/langchain-ai/langchain/pull/10249/files#diff-935185cd488d015f026dcd9e19616ff62863e8cde8c0bee70318d3ccbca98341 + # + # If you change this configuration, make sure to change the `cache-key` + # in the `poetry_setup` action above to stop using the old cache. + # It doesn't matter how you change it, any change will cause a cache-bust. working-directory: ${{ inputs.working-directory }} run: | - poetry install + poetry install --with dev,lint,test,typing - name: Install langchain editable working-directory: ${{ inputs.working-directory }} diff --git a/libs/langchain/langchain/document_loaders/url_playwright.py b/libs/langchain/langchain/document_loaders/url_playwright.py index 16f5b00fd3c50..7aa60cf18839e 100644 --- a/libs/langchain/langchain/document_loaders/url_playwright.py +++ b/libs/langchain/langchain/document_loaders/url_playwright.py @@ -8,7 +8,9 @@ from langchain.document_loaders.base import BaseLoader if TYPE_CHECKING: - from playwright.async_api import AsyncBrowser, AsyncPage, AsyncResponse + from playwright.async_api import Browser as AsyncBrowser + from playwright.async_api import Page as AsyncPage + from playwright.async_api import Response as AsyncResponse from playwright.sync_api import Browser, Page, Response @@ -155,6 +157,9 @@ def load(self) -> List[Document]: try: page = browser.new_page() response = page.goto(url) + if response is None: + raise ValueError(f"page.goto() returned None for url {url}") + text = self.evaluator.evaluate(page, browser, response) metadata = {"source": url} docs.append(Document(page_content=text, metadata=metadata)) @@ -185,6 +190,9 @@ async def aload(self) -> List[Document]: try: page = await browser.new_page() response = await page.goto(url) + if response is None: + raise ValueError(f"page.goto() returned None for url {url}") + text = await self.evaluator.evaluate_async(page, browser, response) metadata = {"source": url} docs.append(Document(page_content=text, metadata=metadata)) diff --git a/libs/langchain/tests/integration_tests/document_loaders/test_url_playwright.py b/libs/langchain/tests/integration_tests/document_loaders/test_url_playwright.py index 7bea1c6dee761..eb53682d75bad 100644 --- a/libs/langchain/tests/integration_tests/document_loaders/test_url_playwright.py +++ b/libs/langchain/tests/integration_tests/document_loaders/test_url_playwright.py @@ -7,7 +7,9 @@ from langchain.document_loaders.url_playwright import PlaywrightEvaluator if TYPE_CHECKING: - from playwright.async_api import AsyncBrowser, AsyncPage, AsyncResponse + from playwright.async_api import Browser as AsyncBrowser + from playwright.async_api import Page as AsyncPage + from playwright.async_api import Response as AsyncResponse from playwright.sync_api import Browser, Page, Response