You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Just got a ton of these errors on an amazon bestseller crawl. I believe what is happening is that we unexpectedly got a bunch of empty 200 responses and the below validator crashes trying to test text[-0] when text = ''.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/scrapy/utils/signal.py", line 30, in send_catch_log
response = robustApply(receiver, signal=signal, sender=sender, *arguments, **named)
File "/usr/local/lib/python3.10/dist-packages/pydispatch/robustapply.py", line 55, in robustApply
return receiver(*arguments, **named)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/extension.py", line 70, in log_response_middleware
self.response_stats(request=request, response=response)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/core/core.py", line 55, in response_stats
self.request_response_middleware.process(request_response_object, response)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/normalizer/middleware.py", line 33, in process
self.validate_response_data(request_response_object, response)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/normalizer/middleware.py", line 163, in validate_response_data
ResponseValidator.validate(request_response_object, response, domain_tests=domain_tests, generic_tests=self._generic_validators)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 15, in validate
if ResponseValidator.run_validation_test(request_response_object, response, test.get('validation_tests', [])) is False:
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 45, in run_validation_test
if ResponseValidator.string_check(ResponseValidator.get_response_text(request_response_object, response), test.get('text_check', ''), test.get('comparison_type'), text_slice=test.get('text_slice')):
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 102, in string_check
text = ResponseValidator.string_slice(text, text_slice)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 116, in string_slice
return text[-text_slice.get('slice_lower_threshold', 0)]
IndexError: string index out of range
The text was updated successfully, but these errors were encountered:
Just got a ton of these errors on an amazon bestseller crawl. I believe what is happening is that we unexpectedly got a bunch of empty 200 responses and the below validator crashes trying to test
text[-0]
whentext = ''
.2023-09-19 12:01:13 [scrapy.utils.signal] ERROR: Error caught on signal handler: <bound method ScrapeOpsMonitor.log_response_middleware of <scrapeops_scrapy.extension.ScrapeOpsMonitor object at 0x7eff870fcb50>>
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1660, in _inlineCallbacks
result = current_context.run(gen.send, result)
StopIteration: <200 https://www.amazon.com/acp/p13n-zg-list-grid-desktop/p13n-zg-list-grid-desktop-04a0353d-de90-433c-baae-b0489d0167eb-1693478974679/nextPage>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/scrapy/utils/signal.py", line 30, in send_catch_log
response = robustApply(receiver, signal=signal, sender=sender, *arguments, **named)
File "/usr/local/lib/python3.10/dist-packages/pydispatch/robustapply.py", line 55, in robustApply
return receiver(*arguments, **named)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/extension.py", line 70, in log_response_middleware
self.response_stats(request=request, response=response)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/core/core.py", line 55, in response_stats
self.request_response_middleware.process(request_response_object, response)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/normalizer/middleware.py", line 33, in process
self.validate_response_data(request_response_object, response)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/normalizer/middleware.py", line 163, in validate_response_data
ResponseValidator.validate(request_response_object, response, domain_tests=domain_tests, generic_tests=self._generic_validators)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 15, in validate
if ResponseValidator.run_validation_test(request_response_object, response, test.get('validation_tests', [])) is False:
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 45, in run_validation_test
if ResponseValidator.string_check(ResponseValidator.get_response_text(request_response_object, response), test.get('text_check', ''), test.get('comparison_type'), text_slice=test.get('text_slice')):
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 102, in string_check
text = ResponseValidator.string_slice(text, text_slice)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 116, in string_slice
return text[-text_slice.get('slice_lower_threshold', 0)]
IndexError: string index out of range
The text was updated successfully, but these errors were encountered: