import d6tstack.combine_csv as d6tc
c = d6tc.CombinerCSV(cfg_fnames, all_strings=True) # all_strings=True makes reading faster
col_preview = c.preview_columns()
print('all columns equal?', c.is_all_equal())
print('')
print('which columns are unique?', col_preview['columns_unique'])
print('')
print('which files have unique columns?')
print('')
print(c.is_col_present_unique())
all columns equal? False
which columns are unique? ['profit2']
which files have unique columns?
profit2
filename
test-data-input-csv-colmismatch-mar.csv True
test-data-input-csv-colmismatch-feb.csv False
test-data-input-csv-colmismatch-jan.csv False
# quickly read all data
c.preview_combine() # keep all columns
|
cost |
date |
filename |
profit |
profit2 |
sales |
0 |
-100 |
2011-03-01 |
test-data-input-csv-colmismatch-mar.csv |
200 |
400 |
300 |
1 |
-100 |
2011-03-02 |
test-data-input-csv-colmismatch-mar.csv |
200 |
400 |
300 |
2 |
-100 |
2011-03-03 |
test-data-input-csv-colmismatch-mar.csv |
200 |
400 |
300 |
0 |
-90 |
2011-02-01 |
test-data-input-csv-colmismatch-feb.csv |
110 |
NaN |
200 |
1 |
-90 |
2011-02-02 |
test-data-input-csv-colmismatch-feb.csv |
110 |
NaN |
200 |
2 |
-90 |
2011-02-03 |
test-data-input-csv-colmismatch-feb.csv |
110 |
NaN |
200 |
0 |
-80 |
2011-01-01 |
test-data-input-csv-colmismatch-jan.csv |
20 |
NaN |
100 |
1 |
-80 |
2011-01-02 |
test-data-input-csv-colmismatch-jan.csv |
20 |
NaN |
100 |
2 |
-80 |
2011-01-03 |
test-data-input-csv-colmismatch-jan.csv |
20 |
NaN |
100 |
c.preview_combine(is_col_common=True) # keep only common columns
Out[64]:
|
cost |
date |
profit |
sales |
filename |
0 |
-100 |
2011-03-01 |
200 |
300 |
test-data-input-csv-colmismatch-mar.csv |
1 |
-100 |
2011-03-02 |
200 |
300 |
test-data-input-csv-colmismatch-mar.csv |
2 |
-100 |
2011-03-03 |
200 |
300 |
test-data-input-csv-colmismatch-mar.csv |
0 |
-90 |
2011-02-01 |
110 |
200 |
test-data-input-csv-colmismatch-feb.csv |
1 |
-90 |
2011-02-02 |
110 |
200 |
test-data-input-csv-colmismatch-feb.csv |
2 |
-90 |
2011-02-03 |
110 |
200 |
test-data-input-csv-colmismatch-feb.csv |
0 |
-80 |
2011-01-01 |
20 |
100 |
test-data-input-csv-colmismatch-jan.csv |
1 |
-80 |
2011-01-02 |
20 |
100 |
test-data-input-csv-colmismatch-jan.csv |
2 |
-80 |
2011-01-03 |
20 |
100 |
test-data-input-csv-colmismatch-jan.csv |
|