CHM_attendance/all_file_output.py
superlishunqin e7fa4bc030 first commit
2025-06-11 19:56:34 +08:00

64 lines
2.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import sys
def collect_code_files(output_file="code_collection.txt"):
# 定义代码文件扩展名
code_extensions = [
'.py', '.java', '.cpp', '.c', '.h', '.hpp', '.cs',
'.js', '.html', '.css', '.php', '.go', '.rb',
'.swift', '.kt', '.ts', '.sh', '.pl', '.r'
]
# 定义要排除的目录
excluded_dirs = [
'venv', 'env', '.venv', '.env', 'virtualenv',
'__pycache__', 'node_modules', '.git', '.idea',
'dist', 'build', 'target', 'bin'
]
# 计数器
file_count = 0
# 打开输出文件
with open(output_file, 'w', encoding='utf-8') as out_file:
# 遍历当前目录及所有子目录
for root, dirs, files in os.walk('.'):
# 从dirs中移除排除的目录这会阻止os.walk进入这些目录
dirs[:] = [d for d in dirs if d not in excluded_dirs]
for file in files:
# 获取文件扩展名
_, ext = os.path.splitext(file)
# 检查是否为代码文件
if ext.lower() in code_extensions:
file_path = os.path.join(root, file)
file_count += 1
# 写入文件路径作为分隔
out_file.write(f"\n{'=' * 80}\n")
out_file.write(f"File: {file_path}\n")
out_file.write(f"{'=' * 80}\n\n")
# 尝试读取文件内容并写入
try:
with open(file_path, 'r', encoding='utf-8') as code_file:
out_file.write(code_file.read())
except UnicodeDecodeError:
# 尝试用不同的编码
try:
with open(file_path, 'r', encoding='latin-1') as code_file:
out_file.write(code_file.read())
except Exception as e:
out_file.write(f"无法读取文件内容: {str(e)}\n")
except Exception as e:
out_file.write(f"读取文件时出错: {str(e)}\n")
print(f"已成功收集 {file_count} 个代码文件到 {output_file}")
if __name__ == "__main__":
# 如果提供了命令行参数,则使用它作为输出文件名
output_file = sys.argv[1] if len(sys.argv) > 1 else "code_collection.txt"
collect_code_files(output_file)