diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 791d698ad..b061f8e77 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -39,6 +39,7 @@ jobs: # --ignore-pull-failures for PRs with new images that haven't been pushed yet: docker-compose -f docker-compose.yml pull --ignore-pull-failures || true docker-compose -f docker-compose.yml up -d # use -f to suppress docker-compose.override.yml + docker-compose up -d scoop-db scoop-rest-api # start up the Scoop API separately bash make_cert.sh # install SSL certs and restart the wacz-exhibitor and minio containers docker ps -a # show running containers docker-compose logs # show logs diff --git a/docker-compose.override.yml b/docker-compose.override.yml index 3081c6c7c..6753b5909 100644 --- a/docker-compose.override.yml +++ b/docker-compose.override.yml @@ -49,7 +49,7 @@ services: - scoop_rest_api_internal scoop-rest-api: - image: registry.lil.tools/harvardlil/scoop-rest-api:15-36faf86818fcb2094e47cfe37c443515 + image: registry.lil.tools/harvardlil/scoop-rest-api:33-966e4755f4a6abe5d88b8fd9f0405152 init: true tty: true depends_on: diff --git a/perma_web/api/tests/test_link_resource.py b/perma_web/api/tests/test_link_resource.py index 67a95e69a..222beb254 100644 --- a/perma_web/api/tests/test_link_resource.py +++ b/perma_web/api/tests/test_link_resource.py @@ -826,7 +826,7 @@ def test_scoop_capture_hung(self, mockrequest): "scoop_capture_summary": None}, 200 )) - with self.assertLogs('celery.django', level='ERROR') as logs: + with self.assertLogs('celery.django', level='WARNING') as logs: obj = self.successful_post(self.list_url, data={ 'url': self.server_url + "/test.html" @@ -836,5 +836,6 @@ def test_scoop_capture_hung(self, mockrequest): self.assertEqual(link.primary_capture.status, 'failed') self.assertEqual(link.capture_job.status, 'failed') + self.assertTrue('scoop-silent-failure' in [tag.name for tag in link.tags.all()]) log_string = " ".join(logs.output) - self.assertTrue(log_string.endswith("'scoop_capture_summary': None}")) + self.assertTrue("Scoop failed without logs" in log_string) diff --git a/perma_web/perma/settings/deployments/settings_common.py b/perma_web/perma/settings/deployments/settings_common.py index 5035afd02..ee8d193e8 100644 --- a/perma_web/perma/settings/deployments/settings_common.py +++ b/perma_web/perma/settings/deployments/settings_common.py @@ -513,7 +513,7 @@ # # Capture # -CAPTURE_ENGINE = 'perma' # perma|scoop-api +CAPTURE_ENGINE = 'scoop-api' # perma|scoop-api PRIVATE_BY_POLICY_DOMAINS = [] # diff --git a/services/docker/scoop-rest-api/config.py b/services/docker/scoop-rest-api/config.py index 19fe7ea05..6e7714792 100644 --- a/services/docker/scoop-rest-api/config.py +++ b/services/docker/scoop-rest-api/config.py @@ -1,5 +1,5 @@ -# This is the default config.py from the Scoop REST API as of 9/6/2023 -# https://github.com/harvard-lil/scoop-rest-api/blob/31c541432fd8e31a04c6e0c5667beb28decfc3ec/scoop_rest_api/config.py +# This is the default config.py from the Scoop REST API as of 11/1/2023 +# https://github.com/harvard-lil/scoop-rest-api/blob/26dfc224aafabb53b4af5a44ef9b29cd79d1de82/scoop_rest_api/config.py # We only use it to override the blocklist: we disable it to allow the capturing of # localhost in our test suite. @@ -12,15 +12,6 @@ load_dotenv() -# -# Temporary -# -SCREEN_API_TOKEN = "" -""" (Temporary) screen-api.lil.tools token. """ - -if "SCREEN_API_TOKEN" in os.environ: - SCREEN_API_TOKEN = os.environ["SCREEN_API_TOKEN"] - # # Security settings # @@ -134,9 +125,9 @@ "--capture-certificates-as-attachment": "false", "--provenance-summary": "true", "--attachments-bypass-limits": "true", - "--capture-timeout": 40 * 1000, - "--load-timeout": 15 * 1000, - "--network-idle-timeout": 15 * 1000, + "--capture-timeout": 45 * 1000, + "--load-timeout": 20 * 1000, + "--network-idle-timeout": 20 * 1000, "--behaviors-timeout": 15 * 1000, "--capture-video-as-attachment-timeout": 20 * 1000, "--capture-certificates-as-attachment-timeout": 10 * 1000, @@ -151,7 +142,7 @@ # "--user-agent-suffix": "", # "--blocklist": "/https?:\/\/localhost/,0.0.0.0/8,10.0.0.0/8,100.64.0.0/10,127.0.0.0/8,169.254.0.0/16,172.16.0.0/12,192.0.0.0/29,192.0.2.0/24,192.88.99.0/24,192.168.0.0/16,198.18.0.0/15,198.51.100.0/24,203.0.113.0/24,224.0.0.0/4,240.0.0.0/4,255.255.255.255/32,::/128,::1/128,::ffff:0:0/96,100::/64,64:ff9b::/96,2001::/32,2001:10::/28,2001:db8::/32,2002::/16,fc00::/7,fe80::/10,ff00::/8", # noqa "--blocklist": "", - "--public-ip-resolver-endpoint": "https://myip.lil.tools", + "--public-ip-resolver-endpoint": "https://icanhazip.com", } """ Options passed to the Scoop CLI during capture. @@ -160,3 +151,6 @@ Options which cannot be set at config level are listed here: - utils.config_check.EXCLUDED_SCOOP_CLI_OPTIONS """ + +SCOOP_TIMEOUT_FUSE = 30 +""" Number of seconds to wait before "killing" a Scoop progress after capture timeout. """