diff --git a/icrawler/builtin/google.py b/icrawler/builtin/google.py index dbb91a4..d6cd2b8 100644 --- a/icrawler/builtin/google.py +++ b/icrawler/builtin/google.py @@ -161,13 +161,12 @@ def parse(self, response): # uris = [img[1][3][0] for img in data if img[0] == 1] uris = re.findall(r"http[^\[]*?.(?:jpg|png|bmp)", txt) + if not uris: + uris = re.findall(r"http[^\[]*?\.(?:jpg|png|bmp)", txt) uris = [bytes(uri, "utf-8").decode("unicode-escape") for uri in uris] if uris: return [{"file_url": uri} for uri in uris] - uris = re.findall(r"http[^\[]*?\.(?:jpg|png|bmp)", txt) - return [{"file_url": uri} for uri in uris] - class GoogleImageCrawler(Crawler): def __init__(