RF_default.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. ## 导入常用基本包
  2. import os
  3. import pandas as pd
  4. import numpy as np
  5. from PIL import Image
  6. from model_saver import save_model
  7. # 机器学习模型导入
  8. from sklearn.ensemble import RandomForestRegressor
  9. from sklearn.model_selection import cross_val_score,cross_val_predict
  10. from sklearn.model_selection import train_test_split
  11. from sklearn.metrics import mean_squared_error
  12. ## 导入常用辅助函数
  13. from sklearn.model_selection import train_test_split
  14. from sklearn.model_selection import GridSearchCV
  15. from sklearn.model_selection import cross_val_score
  16. from sklearn.model_selection import cross_val_predict
  17. ## 导入数据处理函数
  18. from sklearn.preprocessing import StandardScaler
  19. from sklearn.preprocessing import MinMaxScaler
  20. ## 导入评分函数
  21. from sklearn.metrics import r2_score
  22. from sklearn.metrics import mean_squared_error
  23. from sklearn.metrics import mean_absolute_error
  24. from sklearn.metrics import accuracy_score
  25. from sklearn.metrics import log_loss
  26. from sklearn.metrics import roc_auc_score
  27. import pickle
  28. import pandas as pd
  29. import numpy as np
  30. from sklearn.metrics import mean_squared_error
  31. from pathlib import Path
  32. # 导入数据
  33. data=pd.read_excel('model_optimize\data\data_filt.xlsx')
  34. x = data.iloc[:,1:10]
  35. y = data.iloc[:,-1]
  36. # 为 x 赋予列名
  37. x.columns = [
  38. 'organic_matter', # OM g/kg
  39. 'chloride', # CL g/kg
  40. 'cec', # CEC cmol/kg
  41. 'h_concentration', # H+ cmol/kg
  42. 'hn', # HN mg/kg
  43. 'al_concentration', # Al3+ cmol/kg
  44. 'free_alumina', # Free alumina g/kg
  45. 'free_iron', # Free iron oxides g/kg
  46. 'delta_ph' # ΔpH
  47. ]
  48. y.name = 'target_ph'
  49. Xtrain, Xtest, Ytrain, Ytest = train_test_split(x, y, test_size=0.2, random_state=42)
  50. ## 原始模型
  51. model_path = Path('model_optimize\pkl\RF_filt.pkl')
  52. # 确保路径存在
  53. if model_path.exists():
  54. with open(model_path, 'rb') as f:
  55. rfc = pickle.load(f)
  56. CV_score = cross_val_score(rfc, Xtrain, Ytrain, cv=5).mean()
  57. CV_predictions = cross_val_predict(rfc, Xtrain, Ytrain, cv=5)
  58. rmse1 = np.sqrt(mean_squared_error(Ytrain,CV_predictions))
  59. regressor = rfc.fit(Xtrain, Ytrain)
  60. test_predictions = regressor.predict(Xtest)
  61. score_test = regressor.score(Xtest,Ytest)
  62. rmse2 = np.sqrt(mean_squared_error(Ytest,test_predictions))
  63. print("5cv_score:",CV_score)
  64. print("rmse_5CV",rmse1)
  65. print("R2_test:",score_test)
  66. print("rmse_test",rmse2)
  67. # 指定参数
  68. # 确定参数进行训练
  69. rfc = RandomForestRegressor(random_state=1)
  70. CV_score = cross_val_score(rfc, Xtrain, Ytrain, cv=5).mean()
  71. CV_predictions = cross_val_predict(rfc, Xtrain, Ytrain, cv=5)
  72. rmse1 = np.sqrt(mean_squared_error(Ytrain,CV_predictions))
  73. regressor = rfc.fit(Xtrain, Ytrain)
  74. test_predictions = regressor.predict(Xtest)
  75. score_test = regressor.score(Xtest,Ytest)
  76. rmse2 = np.sqrt(mean_squared_error(Ytest,test_predictions))
  77. print("5cv:",CV_score)
  78. print("rmse_5CV",rmse1)
  79. print("R2_test:",score_test)
  80. print("rmse_test",rmse2)