2025-03-08 01:34:36 +08:00

71 lines
2.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
系统全局配置文件
"""
import os
import platform
from pathlib import Path
# 项目根目录
ROOT_DIR = Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
"""
Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 是当前文件的上一级目录
这种写法主要是为了方便移植项目到不同的平台运行
"""
# 数据相关路径
DATA_DIR = ROOT_DIR / "data"
RAW_DATA_DIR = DATA_DIR / "raw" / "THUCNews"
PROCESSED_DATA_DIR = DATA_DIR / "processed"
RESOURCES_DIR = DATA_DIR / "resources"
STOPWORDS_DIR = RESOURCES_DIR / "stopwords"
EMBEDDINGS_DIR = RESOURCES_DIR / "embeddings"
# 确保必要的目录存在
for directory in [PROCESSED_DATA_DIR, RESOURCES_DIR, STOPWORDS_DIR, EMBEDDINGS_DIR]:
directory.mkdir(parents=True, exist_ok=True)
# 保存模型的路径
SAVED_MODELS_DIR = ROOT_DIR / "saved_models"
TOKENIZERS_DIR = SAVED_MODELS_DIR / "tokenizers"
CLASSIFIERS_DIR = SAVED_MODELS_DIR / "classifiers"
# 确保模型保存目录存在
for directory in [SAVED_MODELS_DIR, TOKENIZERS_DIR, CLASSIFIERS_DIR]:
directory.mkdir(parents=True, exist_ok=True)
# 系统资源配置
CPU_COUNT = os.cpu_count()
USE_GPU = True
MULTI_GPU = False # 目前只使用单个GPU
# 基于13900K性能设置并行处理参数
DATA_LOADING_WORKERS = min(16, CPU_COUNT) # 数据加载线程数
PREPROCESSING_WORKERS = min(24, CPU_COUNT) # 预处理线程数13900K有强大的多线程能力
# 基于64GB内存设置内存相关参数
MAX_MEMORY_GB = 48 # 保留部分内存给系统和其他应用
MAX_TEXT_PER_BATCH = 10000 # 每批处理的最大文本数量
# 日志配置
LOG_DIR = ROOT_DIR / "logs"
LOG_DIR.mkdir(exist_ok=True)
LOG_LEVEL = "INFO"
LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
# 类别标签映射与THUCNews数据集一致
CATEGORIES = [
"体育", "娱乐", "家居", "彩票", "房产", "教育",
"时尚", "时政", "星座", "游戏", "社会", "科技", "股票", "财经"
]
CATEGORY_TO_ID = {category: idx for idx, category in enumerate(CATEGORIES)}
ID_TO_CATEGORY = {idx: category for idx, category in enumerate(CATEGORIES)}
# 文件编码
ENCODING = "utf-8"
# 系统信息
SYSTEM_INFO = {
"platform": platform.platform(),
"python_version": platform.python_version(),
"processor": platform.processor(),
}