# 导入常用基本包 import os import pandas as pd import numpy as np from PIL import Image import matplotlib.pyplot as plt import matplotlib from mpl_toolkits.mplot3d import Axes3D import matplotlib.cm as cm # 机器学习相关库 from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict, GridSearchCV from sklearn.preprocessing import StandardScaler, MinMaxScaler from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, accuracy_score, log_loss, roc_auc_score # 导入XGBoost from xgboost import XGBRegressor as XGBR # 其他工具 import pickle from pathlib import Path # 导入数据 data=pd.read_excel('model_optimize\data\data_filt.xlsx') x = data.iloc[:,1:10] y = data.iloc[:,-1] # 为 x 赋予列名 x.columns = [ 'organic_matter', # OM g/kg 'chloride', # CL g/kg 'cec', # CEC cmol/kg 'h_concentration', # H+ cmol/kg 'hn', # HN mg/kg 'al_concentration', # Al3+ cmol/kg 'free_alumina', # Free alumina g/kg 'free_iron', # Free iron oxides g/kg 'delta_ph' # ΔpH ] y.name = 'target_ph' Xtrain, Xtest, Ytrain, Ytest = train_test_split(x, y, test_size=0.2, random_state=42) from sklearn.neighbors import KNeighborsRegressor KNN_model = KNeighborsRegressor(n_neighbors=10) r2_score=cross_val_score(KNN_model,Xtrain,Ytrain,cv=5) print(r2_score) print(r2_score.mean())