Skip to content

Commit

Permalink
处理最新豆瓣封面下载失败的问题
Browse files Browse the repository at this point in the history
  • Loading branch information
fugary committed Jul 15, 2023
1 parent 7d62e86 commit 7b8a16e
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 10 deletions.
29 changes: 29 additions & 0 deletions src/NewDouban.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from cps.search_metadata import meta
from flask import request, Response
from cps import helper

# 是否自动代理封面地址
DOUBAN_PROXY_COVER = True
Expand All @@ -22,6 +23,7 @@
DOUBAN_PROXY_COVER_PATH = 'metadata/douban_cover?cover='
DOUBAN_SEARCH_URL = "https://www.douban.com/search"
DOUBAN_BASE = "https://book.douban.com/"
DOUBAN_COVER_DOMAIN = 'doubanio.com'
DOUBAN_BOOK_CAT = "1001"
DOUBAN_BOOK_CACHE_SIZE = 500 # 最大缓存数量
DOUBAN_CONCURRENCY_SIZE = 5 # 并发查询数
Expand All @@ -41,12 +43,35 @@ class NewDouban(Metadata):

def __init__(self):
self.searcher = DoubanBookSearcher()
self.hack_helper_cover()
super().__init__()

def search(self, query: str, generic_cover: str = "", locale: str = "en"):
if self.active:
return self.searcher.search_books(query)

@staticmethod
def hack_helper_cover():
"""
覆盖helper.save_cover_from_url方法实现豆瓣的封面下载
:return:
"""
save_cover = helper.save_cover_from_url

def new_save_cover(url, book_path):
if DOUBAN_COVER_DOMAIN in url:
cover_url = url
if DOUBAN_PROXY_COVER:
component = urllib.parse.urlparse(url)
query = urllib.parse.parse_qs(component.query)
cover_url = urllib.parse.unquote(query.get('cover')[0])
res = requests.get(cover_url, headers=DEFAULT_HEADERS)
return helper.save_cover(res, book_path)
else:
return save_cover(url, book_path)

helper.save_cover_from_url = new_save_cover


@dataclasses.dataclass
class DoubanMetaRecord(MetaRecord):
Expand Down Expand Up @@ -232,6 +257,10 @@ def get_tail(self, element, default_str=''):

@meta.route("/metadata/douban_cover", methods=["GET"])
def proxy_douban_cover():
"""
代理豆瓣封面展示
:return:
"""
cover_url = urllib.parse.unquote(request.args.get('cover'))
res = requests.get(cover_url, headers=DEFAULT_HEADERS)
return Response(res.content, mimetype=res.headers['Content-Type'])
14 changes: 4 additions & 10 deletions tests/NewDoubanTest.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
import requests

from NewDouban import NewDouban

if __name__ == "__main__":
# douban = NewDouban()
# result = douban.search("知识考古学")
# for book in result:
# print(book)

res = requests.get('http://127.0.0.1:8083/metadata/douban_cover?cover=https%3A//img1.doubanio.com/view/subject/l/public/s29195878.jpg',
timeout=(10, 200), allow_redirects=False)
print(res)
douban = NewDouban()
result = douban.search("知识考古学")
for book in result:
print(book)
6 changes: 6 additions & 0 deletions tests/cps/helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def save_cover_from_url(url, book_path):
pass


def save_cover(img, book_path):
pass

0 comments on commit 7b8a16e

Please sign in to comment.