""" 工具函数模块 提供通用的辅助功能 """ import os import hashlib import requests from urllib.parse import urljoin from .config import HEADERS def ensure_dir(path: str) -> None: """确保目录存在,不存在则创建""" os.makedirs(path, exist_ok=True) def get_file_hash(url: str) -> str: """根据URL生成唯一文件名哈希""" return hashlib.md5(url.encode()).hexdigest()[:12] def get_file_extension(url: str) -> str: """从URL获取文件扩展名""" # 移除查询参数 clean_url = url.split('?')[0] ext = os.path.splitext(clean_url)[1].lower() if ext not in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.svg']: ext = '.jpg' # 默认扩展名 return ext def download_image(img_url: str, save_dir: str, timeout: int = 15) -> str | None: """ 下载图片到本地 Args: img_url: 图片URL save_dir: 保存目录 timeout: 超时时间 Returns: 本地文件路径,失败返回 None """ try: ensure_dir(save_dir) url_hash = get_file_hash(img_url) ext = get_file_extension(img_url) local_filename = f"{url_hash}{ext}" local_path = os.path.join(save_dir, local_filename) # 如果已下载过,直接返回路径 if os.path.exists(local_path): return local_path # 下载图片 response = requests.get(img_url, headers=HEADERS, timeout=timeout) if response.status_code == 200: with open(local_path, 'wb') as f: f.write(response.content) return local_path else: print(f" 图片下载失败 ({response.status_code}): {img_url}") return None except Exception as e: print(f" 图片下载出错: {img_url} - {e}") return None def safe_filename(name: str, max_length: int = 50) -> str: """ 生成安全的文件名 Args: name: 原始名称 max_length: 最大长度 Returns: 安全的文件名 """ # 移除或替换不安全字符 unsafe_chars = ['/', '\\', ':', '*', '?', '"', '<', '>', '|', '\n', '\r', '\t'] for char in unsafe_chars: name = name.replace(char, '_') # 去除首尾空格 name = name.strip() # 截断长度 if len(name) > max_length: name = name[:max_length] return name def make_absolute_url(base_url: str, relative_url: str) -> str: """将相对URL转为绝对URL""" return urljoin(base_url, relative_url)