import os import sys def collect_code_files(output_file="code_collection.txt"): # 定义代码文件扩展名 code_extensions = [ '.py', '.java', '.cpp', '.c', '.h', '.hpp', '.cs', '.js', '.html', '.css', '.php', '.go', '.rb', '.swift', '.kt', '.ts', '.sh', '.pl', '.r' ] # 定义要排除的目录 excluded_dirs = [ 'venv', 'env', '.venv', '.env', 'virtualenv', '__pycache__', 'node_modules', '.git', '.idea', 'dist', 'build', 'target', 'bin' ] # 计数器 file_count = 0 # 打开输出文件 with open(output_file, 'w', encoding='utf-8') as out_file: # 遍历当前目录及所有子目录 for root, dirs, files in os.walk('.'): # 从dirs中移除排除的目录,这会阻止os.walk进入这些目录 dirs[:] = [d for d in dirs if d not in excluded_dirs] for file in files: # 获取文件扩展名 _, ext = os.path.splitext(file) # 检查是否为代码文件 if ext.lower() in code_extensions: file_path = os.path.join(root, file) file_count += 1 # 写入文件路径作为分隔 out_file.write(f"\n{'=' * 80}\n") out_file.write(f"File: {file_path}\n") out_file.write(f"{'=' * 80}\n\n") # 尝试读取文件内容并写入 try: with open(file_path, 'r', encoding='utf-8') as code_file: out_file.write(code_file.read()) except UnicodeDecodeError: # 尝试用不同的编码 try: with open(file_path, 'r', encoding='latin-1') as code_file: out_file.write(code_file.read()) except Exception as e: out_file.write(f"无法读取文件内容: {str(e)}\n") except Exception as e: out_file.write(f"读取文件时出错: {str(e)}\n") print(f"已成功收集 {file_count} 个代码文件到 {output_file}") if __name__ == "__main__": # 如果提供了命令行参数,则使用它作为输出文件名 output_file = sys.argv[1] if len(sys.argv) > 1 else "code_collection.txt" collect_code_files(output_file)