XGBoost的调用、参数调优、模型保存、模型显示、预测评价全套

阅读：评论：0

废话不多说，直接上代码

# -*- coding: utf-8 -*-
"""
Created on Fri Oct 25 09:24:15 2019@author: zxh
"""
import numpy as np #pandas依赖于numpy
ics import r2_score#,
import pickle
import matplotlib.pyplot as plt
from xgboost import plot_importance
del_selection import GridSearchCV
import xgboost as xgb
del_selection import train_test_split
def trainandParameter(X_train, y_train,other_params):# 通过交叉验证从而确定最佳参数，适用于小数据集，利用样本数据调优效果并不是很好  #1首先调试的参数是estimatorscv_params = {'n_estimators': [475,500,525,550, 575, 600]}#设置间隔   model = xgb.XGBRegressor(**other_params)optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)optimized_GBM.fit(X_train, y_train)other_params['n_estimators']=optimized_GBM.best_params_['n_estimators']   #2接下来要调试的参数是minchildweight以及max_depth：cv_params = {'max_depth': [3, 4, 5, 6, 7, 8, 9, 10], 'min_child_weight': [1, 2, 3, 4, 5, 6]}model = xgb.XGBRegressor(**other_params)optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)optimized_GBM.fit(X_train, y_train)    other_params['max_depth']=optimized_GBM.best_params_['max_depth']other_params['min_child_weight']=optimized_GBM.best_params_['min_child_weight']  #3接着我们就开始调试参数：gamma：cv_params = {'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]}    model = xgb.XGBRegressor(**other_params)optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)optimized_GBM.fit(X_train, y_train)other_params['gamma']=optimized_GBM.best_params_['gamma']#4接着是subsample以及colsample_bytree：cv_params = {'subsample': [0.6, 0.7, 0.8, 0.9], 'colsample_bytree': [0.6, 0.7, 0.8, 0.9]}   model = xgb.XGBRegressor(**other_params)optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)optimized_GBM.fit(X_train, y_train)other_params['subsample']=optimized_GBM.best_params_['subsample']other_params['colsample_bytree']=optimized_GBM.best_params_['colsample_bytree']       #5、紧接着就是：regalpha以及reglambda：cv_params =  {'reg_alpha': [0.05, 0.1, 1, 2, 3], 'reg_lambda': [0.05, 0.1, 1, 2, 3]} model = xgb.XGBRegressor(**other_params)optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)optimized_GBM.fit(X_train, y_train)other_params['reg_alpha']=optimized_GBM.best_params_['reg_alpha']other_params['reg_lambda']=optimized_GBM.best_params_['reg_lambda']        #6、最后就是learning_rate，一般这时候要调小学习率来测试：cv_params =  {'learning_rate': [0.01, 0.05, 0.07, 0.1, 0.2]}model = xgb.XGBRegressor(**other_params)optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)optimized_GBM.fit(X_train, y_train)other_params['learning_rate']=optimized_GBM.best_params_['learning_rate']  return other_paramsif '__main__' == __name__:#初始化参数 other_params = {'learning_rate': 0.1, 'n_estimators': 550, 'max_depth': 4, 'min_child_weight': 5, 'seed': 0,'subsample': 0.7, 'colsample_bytree': 0.7, 'gamma': 0.1, 'reg_alpha': 1, 'reg_lambda': 1}Traindataset = np.loadtxt('concrete.data', delimiter=",")# split data into X and yX= Traindataset[:,0:-1]Y = Traindataset[:,-1]# 把数据集拆分成训练集和测试集seed = 0test_size = 0.5X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)best_params = trainandParameter(X_train, y_train,other_params)#参数调优
#    best_params =other_paramsmodel = xgb.XGBRegressor(**best_params)    model.fit(X_train, y_train)# 对测试集进行预测xgbr_y_predict = model.predict(X_test)    np_data = np.array(xgbr_y_predict)np.savetxt(&#',np_data)#显示模型 digraph &#_graphviz(model, num_trees=0)    digraph.format = 'png'digraph.view('./iris_xgb')digraph2 &#_graphviz(model, num_trees=549)    digraph2.format = 'png'digraph2.view('./2iris_xgb')pickle.dump(model, open("pima.pickle.dat", "wb"))model.save_model(&#del')#保存模型 #结果对比xgbr_y_predict_train= model.predict(X_train)plt.figure()plt.scatter(xgbr_y_predict,y_test,marker='X',s=5,c='blue')plt.scatter(xgbr_y_predict_train,y_train,marker='X',s=5,c='red')plt.title('XGBRegressor training set & test set prediction vs true')plt.xlabel('xgbr_y_predict')plt.ylabel('y_true value')plt.show()#plt.scatter(y_test,np_data)print('R-squared of XGBoostRegressor on test set is: %.4f'%(r2_score(y_test,xgbr_y_predict)))print('R-squared of XGBoostRegressor on training set is: %.4f'%(r2_score(y_train,xgbr_y_predict_train)))# 显示重要特征plot_importance(model)plt.show()

效果图

本文发布于:2024-02-01 11:44:33，感谢您对本站的认可！

本文链接：https://www.4u4v.net/it/170675907536362.html

上一篇：【网络】HTTPS讲解（侧重于加密、秘钥、证书的讲解）

下一篇：分享一下最近写的仿支付宝支付键盘密码输入框demo

标签：模型全套参数评价 XGBoost

留言与评论（共有 0 条评论）