1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- # 导入常用基本包
- import os
- import pandas as pd
- import numpy as np
- from PIL import Image
- import matplotlib.pyplot as plt
- import matplotlib
- from mpl_toolkits.mplot3d import Axes3D
- import matplotlib.cm as cm
- # 机器学习相关库
- from sklearn.ensemble import RandomForestRegressor
- from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict, GridSearchCV
- from sklearn.preprocessing import StandardScaler, MinMaxScaler
- from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, accuracy_score, log_loss, roc_auc_score
- # 导入XGBoost
- from xgboost import XGBRegressor as XGBR
- # 其他工具
- import pickle
- from pathlib import Path
- # 导入数据
- data=pd.read_excel('model_optimize\data\data_filt.xlsx')
- x = data.iloc[:,1:10]
- y = data.iloc[:,-1]
- # 为 x 赋予列名
- x.columns = [
- 'organic_matter', # OM g/kg
- 'chloride', # CL g/kg
- 'cec', # CEC cmol/kg
- 'h_concentration', # H+ cmol/kg
- 'hn', # HN mg/kg
- 'al_concentration', # Al3+ cmol/kg
- 'free_alumina', # Free alumina g/kg
- 'free_iron', # Free iron oxides g/kg
- 'delta_ph' # ΔpH
- ]
- y.name = 'target_ph'
- Xtrain, Xtest, Ytrain, Ytest = train_test_split(x, y, test_size=0.2, random_state=42)
- from sklearn.neighbors import KNeighborsRegressor
- KNN_model = KNeighborsRegressor(n_neighbors=10)
- r2_score=cross_val_score(KNN_model,Xtrain,Ytrain,cv=5)
- print(r2_score)
- print(r2_score.mean())
|