101 lines
2.6 KiB
Python
101 lines
2.6 KiB
Python
"""
|
|
工具函数模块
|
|
提供通用的辅助功能
|
|
"""
|
|
|
|
import os
|
|
import hashlib
|
|
import requests
|
|
from urllib.parse import urljoin
|
|
from .config import HEADERS
|
|
|
|
|
|
def ensure_dir(path: str) -> None:
|
|
"""确保目录存在,不存在则创建"""
|
|
os.makedirs(path, exist_ok=True)
|
|
|
|
|
|
def get_file_hash(url: str) -> str:
|
|
"""根据URL生成唯一文件名哈希"""
|
|
return hashlib.md5(url.encode()).hexdigest()[:12]
|
|
|
|
|
|
def get_file_extension(url: str) -> str:
|
|
"""从URL获取文件扩展名"""
|
|
# 移除查询参数
|
|
clean_url = url.split('?')[0]
|
|
ext = os.path.splitext(clean_url)[1].lower()
|
|
if ext not in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.svg']:
|
|
ext = '.jpg' # 默认扩展名
|
|
return ext
|
|
|
|
|
|
def download_image(img_url: str, save_dir: str, timeout: int = 15) -> str | None:
|
|
"""
|
|
下载图片到本地
|
|
|
|
Args:
|
|
img_url: 图片URL
|
|
save_dir: 保存目录
|
|
timeout: 超时时间
|
|
|
|
Returns:
|
|
本地文件路径,失败返回 None
|
|
"""
|
|
try:
|
|
ensure_dir(save_dir)
|
|
|
|
url_hash = get_file_hash(img_url)
|
|
ext = get_file_extension(img_url)
|
|
local_filename = f"{url_hash}{ext}"
|
|
local_path = os.path.join(save_dir, local_filename)
|
|
|
|
# 如果已下载过,直接返回路径
|
|
if os.path.exists(local_path):
|
|
return local_path
|
|
|
|
# 下载图片
|
|
response = requests.get(img_url, headers=HEADERS, timeout=timeout)
|
|
if response.status_code == 200:
|
|
with open(local_path, 'wb') as f:
|
|
f.write(response.content)
|
|
return local_path
|
|
else:
|
|
print(f" 图片下载失败 ({response.status_code}): {img_url}")
|
|
return None
|
|
except Exception as e:
|
|
print(f" 图片下载出错: {img_url} - {e}")
|
|
return None
|
|
|
|
|
|
def safe_filename(name: str, max_length: int = 50) -> str:
|
|
"""
|
|
生成安全的文件名
|
|
|
|
Args:
|
|
name: 原始名称
|
|
max_length: 最大长度
|
|
|
|
Returns:
|
|
安全的文件名
|
|
"""
|
|
# 移除或替换不安全字符
|
|
unsafe_chars = ['/', '\\', ':', '*', '?', '"', '<', '>', '|', '\n', '\r', '\t']
|
|
for char in unsafe_chars:
|
|
name = name.replace(char, '_')
|
|
|
|
# 去除首尾空格
|
|
name = name.strip()
|
|
|
|
# 截断长度
|
|
if len(name) > max_length:
|
|
name = name[:max_length]
|
|
|
|
return name
|
|
|
|
|
|
def make_absolute_url(base_url: str, relative_url: str) -> str:
|
|
"""将相对URL转为绝对URL"""
|
|
return urljoin(base_url, relative_url)
|
|
|