CHM_attendance/all_file_output.py

import os
import sys


def collect_code_files(output_file="code_collection.txt"):
    # 定义代码文件扩展名
    code_extensions = [
        '.py', '.java', '.cpp', '.c', '.h', '.hpp', '.cs',
        '.js', '.html', '.css', '.php', '.go', '.rb',
        '.swift', '.kt', '.ts', '.sh', '.pl', '.r'
    ]

    # 定义要排除的目录
    excluded_dirs = [
        'venv', 'env', '.venv', '.env', 'virtualenv',
        '__pycache__', 'node_modules', '.git', '.idea',
        'dist', 'build', 'target', 'bin'
    ]

    # 计数器
    file_count = 0

    # 打开输出文件
    with open(output_file, 'w', encoding='utf-8') as out_file:
        # 遍历当前目录及所有子目录
        for root, dirs, files in os.walk('.'):
            # 从dirs中移除排除的目录，这会阻止os.walk进入这些目录
            dirs[:] = [d for d in dirs if d not in excluded_dirs]

            for file in files:
                # 获取文件扩展名
                _, ext = os.path.splitext(file)

                # 检查是否为代码文件
                if ext.lower() in code_extensions:
                    file_path = os.path.join(root, file)
                    file_count += 1

                    # 写入文件路径作为分隔
                    out_file.write(f"\n{'=' * 80}\n")
                    out_file.write(f"File: {file_path}\n")
                    out_file.write(f"{'=' * 80}\n\n")

                    # 尝试读取文件内容并写入
                    try:
                        with open(file_path, 'r', encoding='utf-8') as code_file:
                            out_file.write(code_file.read())
                    except UnicodeDecodeError:
                        # 尝试用不同的编码
                        try:
                            with open(file_path, 'r', encoding='latin-1') as code_file:
                                out_file.write(code_file.read())
                        except Exception as e:
                            out_file.write(f"无法读取文件内容: {str(e)}\n")
                    except Exception as e:
                        out_file.write(f"读取文件时出错: {str(e)}\n")

    print(f"已成功收集 {file_count} 个代码文件到 {output_file}")


if __name__ == "__main__":
    # 如果提供了命令行参数，则使用它作为输出文件名
    output_file = sys.argv[1] if len(sys.argv) > 1 else "code_collection.txt"
    collect_code_files(output_file)