Does you write data science code looks like this? Don't do it! There's a better way.
import pandas as pd
import sklearn.svm, sklearn.metrics
def get_data():
data = download_data()
data = clean_data(data)
data.to_pickle('data.pkl')
def preprocess(data):
data = apply_function(data)
return data
# flow parameters
reload_source = True
do_preprocess = True
# run workflow
if reload_source:
get_data()
df_train = pd.read_pickle('data.pkl')
if do_preprocess:
df_train = preprocess(df_train)
model = sklearn.svm.SVC()
model.fit(df_train.iloc[:,:-1], df_train['y'])
print(sklearn.metrics.accuracy_score(df_train['y'],model.predict(df_train.iloc[:,:-1])))
|