废话不多说,直接上代码
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 25 09:24:15 2019@author: zxh
"""
import numpy as np #pandas依赖于numpy
ics import r2_score#,
import pickle
import matplotlib.pyplot as plt
from xgboost import plot_importance
del_selection import GridSearchCV
import xgboost as xgb
del_selection import train_test_split
def trainandParameter(X_train, y_train,other_params):# 通过交叉验证从而确定最佳参数,适用于小数据集,利用样本数据调优效果并不是很好 #1首先调试的参数是estimatorscv_params = {'n_estimators': [475,500,525,550, 575, 600]}#设置间隔 model = xgb.XGBRegressor(**other_params)optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)optimized_GBM.fit(X_train, y_train)other_params['n_estimators']=optimized_GBM.best_params_['n_estimators'] #2接下来要调试的参数是minchildweight以及max_depth:cv_params = {'max_depth': [3, 4, 5, 6, 7, 8, 9, 10], 'min_child_weight': [1, 2, 3, 4, 5, 6]}model = xgb.XGBRegressor(**other_params)optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)optimized_GBM.fit(X_train, y_train) other_params['max_depth']=optimized_GBM.best_params_['max_depth']other_params['min_child_weight']=optimized_GBM.best_params_['min_child_weight'] #3接着我们就开始调试参数:gamma:cv_params = {'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]} model = xgb.XGBRegressor(**other_params)optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)optimized_GBM.fit(X_train, y_train)other_params['gamma']=optimized_GBM.best_params_['gamma']#4接着是subsample以及colsample_bytree:cv_params = {'subsample': [0.6, 0.7, 0.8, 0.9], 'colsample_bytree': [0.6, 0.7, 0.8, 0.9]} model = xgb.XGBRegressor(**other_params)optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)optimized_GBM.fit(X_train, y_train)other_params['subsample']=optimized_GBM.best_params_['subsample']other_params['colsample_bytree']=optimized_GBM.best_params_['colsample_bytree'] #5、紧接着就是:regalpha以及reglambda:cv_params = {'reg_alpha': [0.05, 0.1, 1, 2, 3], 'reg_lambda': [0.05, 0.1, 1, 2, 3]} model = xgb.XGBRegressor(**other_params)optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)optimized_GBM.fit(X_train, y_train)other_params['reg_alpha']=optimized_GBM.best_params_['reg_alpha']other_params['reg_lambda']=optimized_GBM.best_params_['reg_lambda'] #6、最后就是learning_rate,一般这时候要调小学习率来测试:cv_params = {'learning_rate': [0.01, 0.05, 0.07, 0.1, 0.2]}model = xgb.XGBRegressor(**other_params)optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)optimized_GBM.fit(X_train, y_train)other_params['learning_rate']=optimized_GBM.best_params_['learning_rate'] return other_paramsif '__main__' == __name__:#初始化参数 other_params = {'learning_rate': 0.1, 'n_estimators': 550, 'max_depth': 4, 'min_child_weight': 5, 'seed': 0,'subsample': 0.7, 'colsample_bytree': 0.7, 'gamma': 0.1, 'reg_alpha': 1, 'reg_lambda': 1}Traindataset = np.loadtxt('concrete.data', delimiter=",")# split data into X and yX= Traindataset[:,0:-1]Y = Traindataset[:,-1]# 把数据集拆分成训练集和测试集seed = 0test_size = 0.5X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)best_params = trainandParameter(X_train, y_train,other_params)#参数调优
# best_params =other_paramsmodel = xgb.XGBRegressor(**best_params) model.fit(X_train, y_train)# 对测试集进行预测xgbr_y_predict = model.predict(X_test) np_data = np.array(xgbr_y_predict)np.savetxt(',np_data)#显示模型 digraph _graphviz(model, num_trees=0) digraph.format = 'png'digraph.view('./iris_xgb')digraph2 _graphviz(model, num_trees=549) digraph2.format = 'png'digraph2.view('./2iris_xgb')pickle.dump(model, open("pima.pickle.dat", "wb"))model.save_model(del')#保存模型 #结果对比xgbr_y_predict_train= model.predict(X_train)plt.figure()plt.scatter(xgbr_y_predict,y_test,marker='X',s=5,c='blue')plt.scatter(xgbr_y_predict_train,y_train,marker='X',s=5,c='red')plt.title('XGBRegressor training set & test set prediction vs true')plt.xlabel('xgbr_y_predict')plt.ylabel('y_true value')plt.show()#plt.scatter(y_test,np_data)print('R-squared of XGBoostRegressor on test set is: %.4f'%(r2_score(y_test,xgbr_y_predict)))print('R-squared of XGBoostRegressor on training set is: %.4f'%(r2_score(y_train,xgbr_y_predict_train)))# 显示重要特征plot_importance(model)plt.show()
效果图
本文发布于:2024-02-01 11:44:33,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170675907536362.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |