CHM_attendance/app/utils/data_import.py
superlishunqin e7fa4bc030 first commit
2025-06-11 19:56:34 +08:00

367 lines
14 KiB
Python

import pandas as pd
import re
from datetime import datetime, timedelta, time
from typing import Dict, List, Tuple, Optional
from app.utils.database import get_db_connection
import logging
logger = logging.getLogger(__name__)
class AttendanceDataImporter:
def __init__(self):
self.work_time_rules = {
'morning': {
'work_start': time(9, 45),
'work_end': time(11, 30),
'card_start': time(6, 0),
'card_end': time(12, 0)
},
'afternoon': {
'work_start': time(13, 30),
'work_end': time(18, 30),
'card_start': time(13, 30),
'card_end': time(18, 30)
},
'evening': {
'work_start': time(19, 0),
'work_end': time(23, 30),
'card_start': time(19, 0),
'card_end': time(23, 30)
}
}
def parse_xlsx_file(self, file_path: str) -> Dict:
"""解析xlsx文件"""
try:
df = pd.read_excel(file_path)
logger.info(f"成功读取文件: {file_path}")
return self._process_dataframe(df)
except Exception as e:
logger.error(f"读取文件失败: {e}")
raise
def _process_dataframe(self, df: pd.DataFrame) -> Dict:
"""处理DataFrame数据"""
results = {}
# 获取日期列(跳过前几列的统计数据)
date_columns = [col for col in df.columns if '2025-' in str(col)]
for _, row in df.iterrows():
name = row['姓名']
if pd.isna(name):
continue
# 解析每日考勤数据
daily_data = {}
for date_col in date_columns:
date_str = str(date_col).split()[0] # 提取日期部分
attendance_str = str(row[date_col])
daily_data[date_str] = self._parse_daily_attendance(attendance_str)
results[name] = daily_data
return results
def _parse_daily_attendance(self, attendance_str: str) -> Dict:
"""解析单日考勤字符串"""
if pd.isna(attendance_str) or attendance_str == 'nan':
return {'status': 'absent', 'records': []}
if '休息' in attendance_str:
return self._parse_weekend_attendance(attendance_str)
# 解析工作日考勤
records = []
parts = attendance_str.split(',')
time_periods = ['morning_in', 'morning_out', 'afternoon_in', 'afternoon_out', 'evening_in', 'evening_out']
for i, part in enumerate(parts):
if i >= len(time_periods):
break
part = part.strip()
period = time_periods[i]
if '缺卡' in part:
records.append({'period': period, 'status': 'missing', 'time': None})
elif '正常' in part:
time_match = re.search(r'\((\d{2}:\d{2})\)', part)
card_time = time_match.group(1) if time_match else None
records.append({'period': period, 'status': 'normal', 'time': card_time})
elif '迟到' in part:
time_match = re.search(r'\((\d{2}:\d{2})\)', part)
late_match = re.search(r'迟到(\d+)分钟', part)
card_time = time_match.group(1) if time_match else None
late_minutes = int(late_match.group(1)) if late_match else 0
records.append({
'period': period,
'status': 'late',
'time': card_time,
'late_minutes': late_minutes
})
elif '早退' in part:
time_match = re.search(r'\((\d{2}:\d{2})\)', part)
early_match = re.search(r'早退(\d+)分钟', part)
card_time = time_match.group(1) if time_match else None
early_minutes = int(early_match.group(1)) if early_match else 0
records.append({
'period': period,
'status': 'early_leave',
'time': card_time,
'early_minutes': early_minutes
})
return {'status': 'workday', 'records': records}
def _parse_weekend_attendance(self, attendance_str: str) -> Dict:
"""解析周末考勤"""
if '休息(-,-)' in attendance_str:
return {'status': 'weekend_rest', 'records': []}
# 解析周末加班
time_match = re.search(r'休息打卡\((\d{2}:\d{2}),?(\d{2}:\d{2})?\)', attendance_str)
if time_match:
start_time = time_match.group(1)
end_time = time_match.group(2) if time_match.group(2) else None
return {
'status': 'weekend_work',
'records': [{'start': start_time, 'end': end_time}]
}
return {'status': 'weekend_rest', 'records': []}
def calculate_weekly_statistics(self, daily_data: Dict, week_start: str, week_end: str) -> Dict:
"""计算周统计数据"""
stats = {
'actual_work_hours': 0.0,
'class_work_hours': 0.0,
'absent_days': 0,
'overtime_hours': 0.0
}
start_date = datetime.strptime(week_start, '%Y-%m-%d')
end_date = datetime.strptime(week_end, '%Y-%m-%d')
current_date = start_date
while current_date <= end_date:
date_str = current_date.strftime('%Y-%m-%d')
is_weekday = current_date.weekday() < 5 # 0-4是工作日
if date_str in daily_data:
day_data = daily_data[date_str]
if day_data['status'] == 'workday':
day_stats = self._calculate_daily_hours(day_data['records'], is_weekday)
stats['actual_work_hours'] += day_stats['actual_hours']
if is_weekday:
stats['class_work_hours'] += day_stats['actual_hours']
else:
stats['overtime_hours'] += day_stats['actual_hours']
elif day_data['status'] == 'weekend_work':
overtime = self._calculate_weekend_overtime(day_data['records'])
stats['actual_work_hours'] += overtime
stats['overtime_hours'] += overtime
elif day_data['status'] == 'absent' and is_weekday:
stats['absent_days'] += 1
elif is_weekday:
stats['absent_days'] += 1
current_date += timedelta(days=1)
return stats
def _calculate_daily_hours(self, records: List[Dict], is_weekday: bool) -> Dict:
"""计算每日工作时长"""
total_hours = 0.0
# 处理早上时段
morning_in = None
morning_out = None
afternoon_in = None
afternoon_out = None
evening_in = None
evening_out = None
for record in records:
if record['period'] == 'morning_in' and record['status'] in ['normal', 'late'] and record['time']:
morning_in = datetime.strptime(record['time'], '%H:%M').time()
elif record['period'] == 'morning_out' and record['status'] in ['normal', 'early_leave'] and record['time']:
morning_out = datetime.strptime(record['time'], '%H:%M').time()
elif record['period'] == 'afternoon_in' and record['status'] in ['normal', 'late'] and record['time']:
afternoon_in = datetime.strptime(record['time'], '%H:%M').time()
elif record['period'] == 'afternoon_out' and record['status'] in ['normal', 'early_leave'] and record[
'time']:
afternoon_out = datetime.strptime(record['time'], '%H:%M').time()
elif record['period'] == 'evening_in' and record['status'] in ['normal', 'late'] and record['time']:
evening_in = datetime.strptime(record['time'], '%H:%M').time()
elif record['period'] == 'evening_out' and record['status'] in ['normal', 'early_leave'] and record['time']:
evening_out = datetime.strptime(record['time'], '%H:%M').time()
# 计算各时段工时
if morning_in and morning_out:
morning_hours = self._calculate_time_diff(morning_in, morning_out)
total_hours += morning_hours
if afternoon_in and afternoon_out:
afternoon_hours = self._calculate_time_diff(afternoon_in, afternoon_out)
total_hours += afternoon_hours
if evening_in and evening_out:
evening_hours = self._calculate_time_diff(evening_in, evening_out)
total_hours += evening_hours
return {'actual_hours': total_hours}
def _calculate_weekend_overtime(self, records: List[Dict]) -> float:
"""计算周末加班时长"""
if not records or not records[0].get('start'):
return 0.0
start_time = datetime.strptime(records[0]['start'], '%H:%M').time()
end_time = None
if records[0].get('end'):
end_time = datetime.strptime(records[0]['end'], '%H:%M').time()
if start_time and end_time:
return self._calculate_time_diff(start_time, end_time)
return 0.0
def _calculate_time_diff(self, start_time: time, end_time: time) -> float:
"""计算时间差(小时)"""
start_minutes = start_time.hour * 60 + start_time.minute
end_minutes = end_time.hour * 60 + end_time.minute
if end_minutes < start_minutes: # 跨天
end_minutes += 24 * 60
diff_minutes = end_minutes - start_minutes
return round(diff_minutes / 60.0, 1)
def import_to_database(self, data: Dict, week_start: str, week_end: str):
"""导入数据到数据库"""
conn = get_db_connection()
cursor = conn.cursor()
try:
for name, daily_data in data.items():
# 获取学生信息
cursor.execute("SELECT student_number FROM students WHERE name = %s", (name,))
student_result = cursor.fetchone()
if not student_result:
logger.warning(f"未找到学生: {name}")
continue
student_number = student_result[0]
# 计算周统计
weekly_stats = self.calculate_weekly_statistics(daily_data, week_start, week_end)
# 插入周考勤汇总
insert_weekly_sql = """
INSERT INTO weekly_attendance
(student_number, name, week_start_date, week_end_date,
actual_work_hours, class_work_hours, absent_days, overtime_hours)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
actual_work_hours = VALUES(actual_work_hours),
class_work_hours = VALUES(class_work_hours),
absent_days = VALUES(absent_days),
overtime_hours = VALUES(overtime_hours),
updated_at = CURRENT_TIMESTAMP
"""
cursor.execute(insert_weekly_sql, (
student_number, name, week_start, week_end,
weekly_stats['actual_work_hours'],
weekly_stats['class_work_hours'],
weekly_stats['absent_days'],
weekly_stats['overtime_hours']
))
weekly_record_id = cursor.lastrowid
# 插入每日考勤明细
self._insert_daily_details(cursor, weekly_record_id, student_number, daily_data, week_start, week_end)
conn.commit()
logger.info("数据导入成功")
except Exception as e:
conn.rollback()
logger.error(f"数据导入失败: {e}")
raise
finally:
cursor.close()
conn.close()
def _insert_daily_details(self, cursor, weekly_record_id: int, student_number: str,
daily_data: Dict, week_start: str, week_end: str):
"""插入每日考勤明细"""
start_date = datetime.strptime(week_start, '%Y-%m-%d')
end_date = datetime.strptime(week_end, '%Y-%m-%d')
current_date = start_date
while current_date <= end_date:
date_str = current_date.strftime('%Y-%m-%d')
if date_str in daily_data:
day_data = daily_data[date_str]
status = self._get_daily_status(day_data)
# 插入每日记录
insert_daily_sql = """
INSERT INTO daily_attendance_details
(weekly_record_id, student_number, attendance_date, status, remarks)
VALUES (%s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
status = VALUES(status),
remarks = VALUES(remarks)
"""
remarks = self._generate_remarks(day_data)
cursor.execute(insert_daily_sql, (
weekly_record_id, student_number, current_date.date(), status, remarks
))
current_date += timedelta(days=1)
def _get_daily_status(self, day_data: Dict) -> str:
"""获取每日状态"""
if day_data['status'] == 'absent':
return '缺勤'
elif day_data['status'] == 'weekend_rest':
return '休息'
elif day_data['status'] == 'weekend_work':
return '加班'
else:
# 检查是否有迟到
for record in day_data['records']:
if record.get('status') == 'late':
return '迟到'
return '正常'
def _generate_remarks(self, day_data: Dict) -> str:
"""生成备注信息"""
if day_data['status'] == 'absent':
return '缺勤'
elif day_data['status'] == 'weekend_rest':
return '休息日'
elif day_data['status'] == 'weekend_work':
return '周末加班'
remarks = []
for record in day_data['records']:
if record.get('status') == 'late':
remarks.append(f"迟到{record.get('late_minutes', 0)}分钟")
elif record.get('status') == 'early_leave':
remarks.append(f"早退{record.get('early_minutes', 0)}分钟")
elif record.get('status') == 'missing':
remarks.append("缺卡")
return '; '.join(remarks) if remarks else '正常'