Skip to content

Commit

Permalink
更新项目代码
Browse files Browse the repository at this point in the history
  • Loading branch information
JoeanAmier committed Dec 9, 2023
1 parent 413168f commit a1d2961
Show file tree
Hide file tree
Showing 13 changed files with 185 additions and 82 deletions.
38 changes: 24 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
</ul>
<h1>📸 程序截图</h1>
<br>
<img src="static/程序运行截图.png" alt="">
<img src="static/程序运行截图1.png" alt="">
<hr>
<img src="static/程序运行截图2.png" alt="">
<h1>🔗 支持链接</h1>
<ul>
<li><code>https://www.xiaohongshu.com/explore/作品ID</code></li>
Expand Down Expand Up @@ -50,26 +52,28 @@ image_demo = "https://www.xiaohongshu.com/explore/63b275a30000000019020185"
video_demo = "https://www.xiaohongshu.com/explore/64edb460000000001f03cadc"
multiple_demo = f"{image_demo} {video_demo}"
# 实例对象
path = "" # 作品下载储存根路径,默认值:当前路径
folder = "Download" # 作品下载文件夹名称(自动创建),默认值:Download
path = "" # 作品数据/文件保存根路径,默认值:项目根路径
folder_name = "Download" # 作品文件储存文件夹名称(自动创建),默认值:Download
user_agent = "" # 请求头 User-Agent
proxy = None # 网络代理
timeout = 5 # 网络请求超时限制,默认值:10
chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位字节
proxy = "" # 网络代理
timeout = 5 # 网络请求超时限制,单位:秒,默认值:10
chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位:字节
max_retry = 2 # 请求数据失败时,重试的最大次数,单位:秒,默认值:5
# async with XHS() as xhs:
# pass # 使用默认参数
async with XHS(path=path,
folder=folder,
folder_name=folder_name,
user_agent=user_agent,
proxy=proxy,
timeout=timeout,
chunk=chunk) as xhs: # 使用自定义参数
chunk=chunk,
max_retry=max_retry, ) as xhs: # 使用自定义参数
download = True # 是否下载作品文件,默认值:False
# 返回作品详细信息,包括下载地址
print(await xhs.extract(error_demo, download=download)) # 获取数据失败时返回空字典
print(await xhs.extract(image_demo, download=download))
print(await xhs.extract(video_demo, download=download))
print(await xhs.extract(multiple_demo, download=download)) # 支持传入多个作品链接
print(await xhs.extract(error_demo, download)) # 获取数据失败时返回空字典
print(await xhs.extract(image_demo, download))
print(await xhs.extract(video_demo, download))
print(await xhs.extract(multiple_demo, download)) # 支持传入多个作品链接
</pre>
<h1>⚙️ 配置文件</h1>
<p>项目根目录下的 <code>settings.json</code> 文件,首次运行自动生成,可以自定义部分运行参数。</p>
Expand All @@ -90,7 +94,7 @@ async with XHS(path=path,
<td align="center">项目根路径</td>
</tr>
<tr>
<td align="center">folder</td>
<td align="center">folder_name</td>
<td align="center">str</td>
<td align="center">作品文件储存文件夹名称</td>
<td align="center">Download</td>
Expand All @@ -99,7 +103,7 @@ async with XHS(path=path,
<td align="center">user_agent</td>
<td align="center">str</td>
<td align="center">请求头 User-Agent</td>
<td align="center">内置 UA</td>
<td align="center">默认 UA</td>
</tr>
<tr>
<td align="center">proxy</td>
Expand All @@ -119,6 +123,12 @@ async with XHS(path=path,
<td align="center">下载文件时,每次从服务器获取的数据块大小,单位:字节</td>
<td align="center">1048576(1 MB)</td>
</tr>
<tr>
<td align="center">max_retry</td>
<td align="center">int</td>
<td align="center">请求数据失败时,重试的最大次数,单位:秒</td>
<td align="center">5</td>
</tr>
</tbody>
</table>
<h1>♥️ 支持项目</h1>
Expand Down
35 changes: 21 additions & 14 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from asyncio import run

from source import XHS
from source import XHSDownloader


async def example():
Expand All @@ -11,29 +12,35 @@ async def example():
video_demo = "https://www.xiaohongshu.com/explore/64edb460000000001f03cadc"
multiple_demo = f"{image_demo} {video_demo}"
# 实例对象
path = "" # 作品下载储存根路径,默认值:当前路径
folder = "Download" # 作品下载文件夹名称(自动创建),默认值:Download
path = "" # 作品数据/文件保存根路径,默认值:项目根路径
folder_name = "Download" # 作品文件储存文件夹名称(自动创建),默认值:Download
user_agent = "" # 请求头 User-Agent
proxy = None # 网络代理
timeout = 5 # 网络请求超时限制,默认值:10
chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位字节
proxy = "" # 网络代理
timeout = 5 # 网络请求超时限制,单位:秒,默认值:10
chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位:字节
max_retry = 2 # 请求数据失败时,重试的最大次数,单位:秒,默认值:5
# async with XHS() as xhs:
# pass # 使用默认参数
async with XHS(path=path,
folder=folder,
folder_name=folder_name,
user_agent=user_agent,
proxy=proxy,
timeout=timeout,
chunk=chunk) as xhs: # 使用自定义参数
chunk=chunk,
max_retry=max_retry, ) as xhs: # 使用自定义参数
download = True # 是否下载作品文件,默认值:False
# 返回作品详细信息,包括下载地址
print(await xhs.extract(error_demo, download=download)) # 获取数据失败时返回空字典
print(await xhs.extract(image_demo, download=download))
print(await xhs.extract(video_demo, download=download))
print(await xhs.extract(multiple_demo, download=download)) # 支持传入多个作品链接
print(await xhs.extract(error_demo, download)) # 获取数据失败时返回空字典
print(await xhs.extract(image_demo, download))
print(await xhs.extract(video_demo, download))
print(await xhs.extract(multiple_demo, download)) # 支持传入多个作品链接


async def main():
async with XHSDownloader() as xhs:
await xhs.run_async()


if __name__ == '__main__':
run(example())
# with XHSDownloader() as xhs:
# xhs.run()
# run(example())
run(main())
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
aiohttp>=3.9.0
textual>=0.40.0
pyperclip>=1.8.2
30 changes: 26 additions & 4 deletions source/Downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@

from aiohttp import ClientSession
from aiohttp import ClientTimeout
from aiohttp import ServerDisconnectedError
from aiohttp import ServerTimeoutError
from rich.text import Text

from .Html import retry

__all__ = ['Download']

Expand All @@ -15,9 +19,10 @@ def __init__(
root: Path,
path: str,
folder: str,
proxy: str = None,
proxy: str = "",
chunk=1024 * 1024,
timeout=10, ):
timeout=10,
retry_=5, ):
self.manager = manager
self.temp = manager.temp
self.root = self.__init_root(root, path, folder)
Expand All @@ -26,6 +31,7 @@ def __init__(
self.session = ClientSession(
headers=manager.headers,
timeout=ClientTimeout(connect=timeout))
self.retry = retry_

def __init_root(self, root: Path, path: str, folder: str) -> Path:
if path and (r := Path(path)).is_dir():
Expand All @@ -45,11 +51,13 @@ async def run(self, urls: list, name: str, type_: int, log, bar):
else:
raise ValueError

@retry
async def __download(self, url: str, name: str, log, bar):
temp = self.temp.joinpath(name)
file = self.root.joinpath(name)
if self.manager.is_exists(file):
return
self.rich_log(log, f"{name} 已存在,跳过下载")
return True
try:
async with self.session.get(url, proxy=self.proxy) as response:
self.__create_progress(
Expand All @@ -62,9 +70,16 @@ async def __download(self, url: str, name: str, log, bar):
self.__update_progress(bar, len(chunk))
self.manager.move(temp, file)
self.__create_progress(bar, None)
except ServerTimeoutError:
self.rich_log(log, f"{name} 下载成功")
return True
except (
ServerTimeoutError,
ServerDisconnectedError,
):
self.manager.delete(temp)
self.__create_progress(bar, None)
self.rich_log(log, f"{name} 下载失败", "bright_red")
return False

@staticmethod
def __create_progress(bar, total: int | None):
Expand All @@ -75,3 +90,10 @@ def __create_progress(bar, total: int | None):
def __update_progress(bar, advance: int):
if bar:
bar.advance(advance)

@staticmethod
def rich_log(log, text, style="bright_green"):
if log:
log.write(Text(text, style=f"b {style}"))
else:
print(text)
27 changes: 23 additions & 4 deletions source/Html.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,40 @@
from aiohttp import ClientSession
from aiohttp import ClientTimeout
from aiohttp import ServerDisconnectedError
from aiohttp import ServerTimeoutError

__all__ = ['Html']


def retry(function):
async def inner(self, *args, **kwargs):
if result := await function(self, *args, **kwargs):
return result
for _ in range(self.retry):
if result := await function(self, *args, **kwargs):
return result
return result

return inner


class Html:

def __init__(
self,
headers: dict,
proxy: str = None,
timeout=10, ):
proxy: str = "",
timeout=10,
retry_=5, ):
self.proxy = proxy
self.session = ClientSession(
headers=headers | {
"Referer": "https://www.xiaohongshu.com/", },
timeout=ClientTimeout(connect=timeout),
)
self.retry = retry_

@retry
async def request_url(
self,
url: str,
Expand All @@ -28,8 +44,11 @@ async def request_url(
url,
proxy=self.proxy,
) as response:
return await response.text() if text else response.url
except ServerTimeoutError:
return await response.text() if text else str(response.url)
except (
ServerTimeoutError,
ServerDisconnectedError,
):
return ""

@staticmethod
Expand Down
5 changes: 3 additions & 2 deletions source/Image.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@ def __generate_image_link(token: str) -> str:
return f"https://ci.xiaohongshu.com/{token}?imageView2/2/w/format/png"

def __extract_image_token(self, url: str) -> str:
return self.__generate_image_link(token.group(1)) if (token := self.IMAGE_TOKEN.search(url)) else ""
return self.__generate_image_link(token.group(1)) if (
token := self.IMAGE_TOKEN.search(url)) else ""

def __extract_image_urls(self, data: list[dict]) -> list[str]:
urls = []
for i in data:
for j in i.get("infoList", []):
if j.get("imageScene", "").startswith("CRD_WM_"):
if j.get("imageScene", "") == "WB_DFT":
urls.append(self.__extract_image_token(j.get("url", "")))
break
return [i for i in urls if i]
3 changes: 2 additions & 1 deletion source/Manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@


class Manager:
def __init__(self, root: Path, ua: str):
def __init__(self, root: Path, ua: str, retry: int):
self.temp = root.joinpath("./temp")
self.headers = {
"User-Agent": ua or "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0", }
self.retry = retry

@staticmethod
def is_exists(path: Path) -> bool:
Expand Down
3 changes: 2 additions & 1 deletion source/Settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
class Settings:
default = {
"path": "",
"folder": "Download",
"folder_name": "Download",
"user_agent": "",
"proxy": "",
"timeout": 10,
"chunk": 1024 * 1024,
"max_retry": 5,
}

def __init__(self, root: Path):
Expand Down
Loading

0 comments on commit a1d2961

Please sign in to comment.