import pandas as pd import re from datetime import datetime, timedelta, time from typing import Dict, List, Tuple, Optional from app.utils.database import get_db_connection import logging logger = logging.getLogger(__name__) class AttendanceDataImporter: def __init__(self): self.work_time_rules = { 'morning': { 'work_start': time(9, 45), 'work_end': time(11, 30), 'card_start': time(6, 0), 'card_end': time(12, 0) }, 'afternoon': { 'work_start': time(13, 30), 'work_end': time(18, 30), 'card_start': time(13, 30), 'card_end': time(18, 30) }, 'evening': { 'work_start': time(19, 0), 'work_end': time(23, 30), 'card_start': time(19, 0), 'card_end': time(23, 30) } } def parse_xlsx_file(self, file_path: str) -> Dict: """解析xlsx文件""" try: df = pd.read_excel(file_path) logger.info(f"成功读取文件: {file_path}") return self._process_dataframe(df) except Exception as e: logger.error(f"读取文件失败: {e}") raise def _process_dataframe(self, df: pd.DataFrame) -> Dict: """处理DataFrame数据""" results = {} # 获取日期列(跳过前几列的统计数据) date_columns = [col for col in df.columns if '2025-' in str(col)] for _, row in df.iterrows(): name = row['姓名'] if pd.isna(name): continue # 解析每日考勤数据 daily_data = {} for date_col in date_columns: date_str = str(date_col).split()[0] # 提取日期部分 attendance_str = str(row[date_col]) daily_data[date_str] = self._parse_daily_attendance(attendance_str) results[name] = daily_data return results def _parse_daily_attendance(self, attendance_str: str) -> Dict: """解析单日考勤字符串""" if pd.isna(attendance_str) or attendance_str == 'nan': return {'status': 'absent', 'records': []} if '休息' in attendance_str: return self._parse_weekend_attendance(attendance_str) # 解析工作日考勤 records = [] parts = attendance_str.split(',') time_periods = ['morning_in', 'morning_out', 'afternoon_in', 'afternoon_out', 'evening_in', 'evening_out'] for i, part in enumerate(parts): if i >= len(time_periods): break part = part.strip() period = time_periods[i] if '缺卡' in part: records.append({'period': period, 'status': 'missing', 'time': None}) elif '正常' in part: time_match = re.search(r'\((\d{2}:\d{2})\)', part) card_time = time_match.group(1) if time_match else None records.append({'period': period, 'status': 'normal', 'time': card_time}) elif '迟到' in part: time_match = re.search(r'\((\d{2}:\d{2})\)', part) late_match = re.search(r'迟到(\d+)分钟', part) card_time = time_match.group(1) if time_match else None late_minutes = int(late_match.group(1)) if late_match else 0 records.append({ 'period': period, 'status': 'late', 'time': card_time, 'late_minutes': late_minutes }) elif '早退' in part: time_match = re.search(r'\((\d{2}:\d{2})\)', part) early_match = re.search(r'早退(\d+)分钟', part) card_time = time_match.group(1) if time_match else None early_minutes = int(early_match.group(1)) if early_match else 0 records.append({ 'period': period, 'status': 'early_leave', 'time': card_time, 'early_minutes': early_minutes }) return {'status': 'workday', 'records': records} def _parse_weekend_attendance(self, attendance_str: str) -> Dict: """解析周末考勤""" if '休息(-,-)' in attendance_str: return {'status': 'weekend_rest', 'records': []} # 解析周末加班 time_match = re.search(r'休息打卡\((\d{2}:\d{2}),?(\d{2}:\d{2})?\)', attendance_str) if time_match: start_time = time_match.group(1) end_time = time_match.group(2) if time_match.group(2) else None return { 'status': 'weekend_work', 'records': [{'start': start_time, 'end': end_time}] } return {'status': 'weekend_rest', 'records': []} def calculate_weekly_statistics(self, daily_data: Dict, week_start: str, week_end: str) -> Dict: """计算周统计数据""" stats = { 'actual_work_hours': 0.0, 'class_work_hours': 0.0, 'absent_days': 0, 'overtime_hours': 0.0 } start_date = datetime.strptime(week_start, '%Y-%m-%d') end_date = datetime.strptime(week_end, '%Y-%m-%d') current_date = start_date while current_date <= end_date: date_str = current_date.strftime('%Y-%m-%d') is_weekday = current_date.weekday() < 5 # 0-4是工作日 if date_str in daily_data: day_data = daily_data[date_str] if day_data['status'] == 'workday': day_stats = self._calculate_daily_hours(day_data['records'], is_weekday) stats['actual_work_hours'] += day_stats['actual_hours'] if is_weekday: stats['class_work_hours'] += day_stats['actual_hours'] else: stats['overtime_hours'] += day_stats['actual_hours'] elif day_data['status'] == 'weekend_work': overtime = self._calculate_weekend_overtime(day_data['records']) stats['actual_work_hours'] += overtime stats['overtime_hours'] += overtime elif day_data['status'] == 'absent' and is_weekday: stats['absent_days'] += 1 elif is_weekday: stats['absent_days'] += 1 current_date += timedelta(days=1) return stats def _calculate_daily_hours(self, records: List[Dict], is_weekday: bool) -> Dict: """计算每日工作时长""" total_hours = 0.0 # 处理早上时段 morning_in = None morning_out = None afternoon_in = None afternoon_out = None evening_in = None evening_out = None for record in records: if record['period'] == 'morning_in' and record['status'] in ['normal', 'late'] and record['time']: morning_in = datetime.strptime(record['time'], '%H:%M').time() elif record['period'] == 'morning_out' and record['status'] in ['normal', 'early_leave'] and record['time']: morning_out = datetime.strptime(record['time'], '%H:%M').time() elif record['period'] == 'afternoon_in' and record['status'] in ['normal', 'late'] and record['time']: afternoon_in = datetime.strptime(record['time'], '%H:%M').time() elif record['period'] == 'afternoon_out' and record['status'] in ['normal', 'early_leave'] and record[ 'time']: afternoon_out = datetime.strptime(record['time'], '%H:%M').time() elif record['period'] == 'evening_in' and record['status'] in ['normal', 'late'] and record['time']: evening_in = datetime.strptime(record['time'], '%H:%M').time() elif record['period'] == 'evening_out' and record['status'] in ['normal', 'early_leave'] and record['time']: evening_out = datetime.strptime(record['time'], '%H:%M').time() # 计算各时段工时 if morning_in and morning_out: morning_hours = self._calculate_time_diff(morning_in, morning_out) total_hours += morning_hours if afternoon_in and afternoon_out: afternoon_hours = self._calculate_time_diff(afternoon_in, afternoon_out) total_hours += afternoon_hours if evening_in and evening_out: evening_hours = self._calculate_time_diff(evening_in, evening_out) total_hours += evening_hours return {'actual_hours': total_hours} def _calculate_weekend_overtime(self, records: List[Dict]) -> float: """计算周末加班时长""" if not records or not records[0].get('start'): return 0.0 start_time = datetime.strptime(records[0]['start'], '%H:%M').time() end_time = None if records[0].get('end'): end_time = datetime.strptime(records[0]['end'], '%H:%M').time() if start_time and end_time: return self._calculate_time_diff(start_time, end_time) return 0.0 def _calculate_time_diff(self, start_time: time, end_time: time) -> float: """计算时间差(小时)""" start_minutes = start_time.hour * 60 + start_time.minute end_minutes = end_time.hour * 60 + end_time.minute if end_minutes < start_minutes: # 跨天 end_minutes += 24 * 60 diff_minutes = end_minutes - start_minutes return round(diff_minutes / 60.0, 1) def import_to_database(self, data: Dict, week_start: str, week_end: str): """导入数据到数据库""" conn = get_db_connection() cursor = conn.cursor() try: for name, daily_data in data.items(): # 获取学生信息 cursor.execute("SELECT student_number FROM students WHERE name = %s", (name,)) student_result = cursor.fetchone() if not student_result: logger.warning(f"未找到学生: {name}") continue student_number = student_result[0] # 计算周统计 weekly_stats = self.calculate_weekly_statistics(daily_data, week_start, week_end) # 插入周考勤汇总 insert_weekly_sql = """ INSERT INTO weekly_attendance (student_number, name, week_start_date, week_end_date, actual_work_hours, class_work_hours, absent_days, overtime_hours) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE actual_work_hours = VALUES(actual_work_hours), class_work_hours = VALUES(class_work_hours), absent_days = VALUES(absent_days), overtime_hours = VALUES(overtime_hours), updated_at = CURRENT_TIMESTAMP """ cursor.execute(insert_weekly_sql, ( student_number, name, week_start, week_end, weekly_stats['actual_work_hours'], weekly_stats['class_work_hours'], weekly_stats['absent_days'], weekly_stats['overtime_hours'] )) weekly_record_id = cursor.lastrowid # 插入每日考勤明细 self._insert_daily_details(cursor, weekly_record_id, student_number, daily_data, week_start, week_end) conn.commit() logger.info("数据导入成功") except Exception as e: conn.rollback() logger.error(f"数据导入失败: {e}") raise finally: cursor.close() conn.close() def _insert_daily_details(self, cursor, weekly_record_id: int, student_number: str, daily_data: Dict, week_start: str, week_end: str): """插入每日考勤明细""" start_date = datetime.strptime(week_start, '%Y-%m-%d') end_date = datetime.strptime(week_end, '%Y-%m-%d') current_date = start_date while current_date <= end_date: date_str = current_date.strftime('%Y-%m-%d') if date_str in daily_data: day_data = daily_data[date_str] status = self._get_daily_status(day_data) # 插入每日记录 insert_daily_sql = """ INSERT INTO daily_attendance_details (weekly_record_id, student_number, attendance_date, status, remarks) VALUES (%s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE status = VALUES(status), remarks = VALUES(remarks) """ remarks = self._generate_remarks(day_data) cursor.execute(insert_daily_sql, ( weekly_record_id, student_number, current_date.date(), status, remarks )) current_date += timedelta(days=1) def _get_daily_status(self, day_data: Dict) -> str: """获取每日状态""" if day_data['status'] == 'absent': return '缺勤' elif day_data['status'] == 'weekend_rest': return '休息' elif day_data['status'] == 'weekend_work': return '加班' else: # 检查是否有迟到 for record in day_data['records']: if record.get('status') == 'late': return '迟到' return '正常' def _generate_remarks(self, day_data: Dict) -> str: """生成备注信息""" if day_data['status'] == 'absent': return '缺勤' elif day_data['status'] == 'weekend_rest': return '休息日' elif day_data['status'] == 'weekend_work': return '周末加班' remarks = [] for record in day_data['records']: if record.get('status') == 'late': remarks.append(f"迟到{record.get('late_minutes', 0)}分钟") elif record.get('status') == 'early_leave': remarks.append(f"早退{record.get('early_minutes', 0)}分钟") elif record.get('status') == 'missing': remarks.append("缺卡") return '; '.join(remarks) if remarks else '正常'