import d6tjoin.top1
# fuzzy join on id, exact match on date
d6tjoin.top1.MergeTop1(df1,df2,fuzzy_left_on=['id'],fuzzy_right_on=['id'],exact_left_on=['date'],exact_right_on=['date']).merge()
|
date |
id |
v |
id_right |
v_right |
0 |
2010-01-01 |
e3e70682 |
0.271 |
3e7068 |
0.271 |
1 |
2010-01-01 |
f728b4fa |
0.576 |
728b4f |
0.576 |
2 |
2010-01-01 |
eb1167b3 |
0.038 |
b1167b |
0.038 |
3 |
2010-01-01 |
f7c1bd87 |
0.784 |
7c1bd8 |
0.784 |
4 |
2010-01-01 |
e443df78 |
0.387 |
443df7 |
0.387 |
# fuzzy join on both id and date
d6tjoin.top1.MergeTop1(df1,df2,fuzzy_left_on= ['date','id'],fuzzy_right_on= ['date','id']).merge()
|
date |
id |
v |
date_right |
v_right |
0 |
2010-01-01 |
e3e70682 |
0.271 |
2010-01-01 |
0.864 |
1 |
2010-01-02 |
e3e70682 |
0.010 |
2010-01-01 |
0.864 |
2 |
2010-01-01 |
f728b4fa |
0.576 |
2010-01-01 |
0.196 |
3 |
2010-01-02 |
f728b4fa |
0.524 |
2010-01-01 |
0.196 |
4 |
2010-01-01 |
eb1167b3 |
0.038 |
2010-01-01 |
0.950 |
# check top 1 merge results
result['top1']['date'].tail()
|
__top1left__ |
__top1right__ |
__top1diff__ |
__matchtype__ |
361 |
2010-12-28 |
2010-12-28 |
0 days |
exact |
362 |
2010-12-29 |
2010-12-29 |
0 days |
exact |
363 |
2010-12-30 |
2010-12-30 |
0 days |
exact |
364 |
2010-12-31 |
2010-12-31 |
0 days |
exact |
365 |
2011-01-01 |
2010-12-31 |
1 days |
top1 left |
|