KNN_filt.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. # 导入常用基本包
  2. import os
  3. import pandas as pd
  4. import numpy as np
  5. from PIL import Image
  6. import matplotlib.pyplot as plt
  7. import matplotlib
  8. from mpl_toolkits.mplot3d import Axes3D
  9. import matplotlib.cm as cm
  10. # 机器学习相关库
  11. from sklearn.ensemble import RandomForestRegressor
  12. from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict, GridSearchCV
  13. from sklearn.preprocessing import StandardScaler, MinMaxScaler
  14. from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, accuracy_score, log_loss, roc_auc_score
  15. # 导入XGBoost
  16. from xgboost import XGBRegressor as XGBR
  17. # 其他工具
  18. import pickle
  19. from pathlib import Path
  20. # 导入数据
  21. data=pd.read_excel('model_optimize\data\data_filt.xlsx')
  22. x = data.iloc[:,1:10]
  23. y = data.iloc[:,-1]
  24. # 为 x 赋予列名
  25. x.columns = [
  26. 'organic_matter', # OM g/kg
  27. 'chloride', # CL g/kg
  28. 'cec', # CEC cmol/kg
  29. 'h_concentration', # H+ cmol/kg
  30. 'hn', # HN mg/kg
  31. 'al_concentration', # Al3+ cmol/kg
  32. 'free_alumina', # Free alumina g/kg
  33. 'free_iron', # Free iron oxides g/kg
  34. 'delta_ph' # ΔpH
  35. ]
  36. y.name = 'target_ph'
  37. Xtrain, Xtest, Ytrain, Ytest = train_test_split(x, y, test_size=0.2, random_state=42)
  38. from sklearn.neighbors import KNeighborsRegressor
  39. KNN_model = KNeighborsRegressor(n_neighbors=10)
  40. r2_score=cross_val_score(KNN_model,Xtrain,Ytrain,cv=5)
  41. print(r2_score)
  42. print(r2_score.mean())