这是一个快速而肮脏的模型,可以正确处理假期安排中的漏洞。它不处理给定用户没有假期或第一个记录的假期在当前日期之前的极端情况,但我会把这些留给你——基础知识都在这里。
from datetime import datetime, timedelta
import pandas as pd
datetime_format = '%d-%b'
str2dt = lambda dts: datetime.strptime(dts, datetime_format)
current_date_col_name = 'curr_date'
name_col_name = 'name'
holiday_col_name = 'holiday'
df1 = pd.DataFrame({
name_col_name: ['A','B','C','D'],
current_date_col_name: ['27-Jun', '27-Jun','27-Jun', '27-Jun'],
})
# assuming "current_date" can vary by person
# if not, you can just ignore df1
current_dates = {
row[name_col_name]: str2dt(row[current_date_col_name]) for ind, row in df1.iterrows()
}
holidays_df = pd.DataFrame({
name_col_name: ['A', 'A', 'B', 'B', 'B', 'B', 'C'],
holiday_col_name: ['27-Jun', '26-Jun', '27-Jun', '26-Jun', '25-Jun', '22-Jun', '27-Jun']
})
holiday_dt_col_name = 'holiday_datetime'
last_day_worked_col_name = 'last_day_worked'
# convert holiday days to datetime objects
holidays_df[holiday_dt_col_name] = holidays_df[holiday_col_name].apply(str2dt)
one_day = timedelta(days=1)
last_dates_worked = {}
for group_name, gdf in holidays_df.groupby(name_col_name):
gdf_sorted = gdf.sort_values(by=holiday_dt_col_name, ascending=False)
current_date = current_dates[group_name]
prev_date = current_date
last_date_worked = None
for ind, row in gdf_sorted.iterrows():
holiday_date = row[holiday_dt_col_name]
time_diff = holiday_date - prev_date
if time_diff < -one_day:
last_date_worked = holiday_date - (time_diff + one_day)
break
prev_date = holiday_date
if last_date_worked is None:
last_date_worked = prev_date - one_day
last_dates_worked[group_name] = last_date_worked
print("Outcome:")
for person, last_date_worked in last_dates_worked.items():
print(f'{person}: {last_date_worked}')
print()
Outcome:
A: 1900-06-25 00:00:00
B: 1900-06-24 00:00:00
C: 1900-06-26 00:00:00