from xgboost import XGBRegressor
# training data contains features and targets
training_data = pd.read_csv("numerai_training_data.csv").set_index("id")
# tournament data contains features only
tournament_data = pd.read_csv("numerai_tournament_data.csv").set_index("id")
feature_names = [f for f in training_data.columns if "feature" in f]
# train a model to make predictions on tournament data
model = XGBRegressor(max_depth=5, learning_rate=0.01, \
n_estimators=2000, colsample_bytree=0.1)
model.fit(training_data[feature_names], training_data["target"])
# submit predictions to numer.ai
predictions = model.predict(tournament_data[feature_names])
predictions.to_csv("predictions.csv")