| import pandas as pd
|
| from ucimlrepo import fetch_ucirepo
|
| from sklearn.model_selection import train_test_split
|
| from sklearn.ensemble import RandomForestRegressor
|
| import joblib
|
| import matplotlib.pyplot as plt
|
|
|
|
|
|
|
| student_performance = fetch_ucirepo(id=320)
|
|
|
|
|
| X = student_performance.data.features
|
| y = student_performance.data.targets
|
|
|
|
|
| print(X.head())
|
| print(y.head())
|
|
|
|
|
| X = pd.get_dummies(X, drop_first=True)
|
|
|
|
|
| X_train, X_test, y_train, y_test = train_test_split(X, y['G3'], test_size=0.2, random_state=42)
|
|
|
|
|
| model = RandomForestRegressor(n_estimators=100, random_state=42)
|
| model.fit(X_train, y_train)
|
|
|
|
|
| model_path = "C:/Users/baby7/Desktop/推理/model_checkpoints/random_forest_model.pkl"
|
| joblib.dump(model, model_path)
|
| print(f"模型已保存到 {model_path}")
|
|
|
|
|
| loaded_model = joblib.load(model_path)
|
| print("模型已加载")
|
|
|
|
|
| y_pred = loaded_model.predict(X_test)
|
| print("预测结果:", y_pred)
|
|
|
|
|
| from sklearn.metrics import mean_squared_error
|
|
|
| mse = mean_squared_error(y_test, y_pred)
|
| print(f'均方误差: {mse:.2f}')
|
|
|
| import matplotlib.pyplot as plt
|
|
|
| plt.scatter(y_test, y_pred)
|
| plt.xlabel('真实值')
|
| plt.ylabel('预测值')
|
| plt.title('真实值与预测值对比')
|
| plt.plot([0, 20], [0, 20], color='red', linestyle='--')
|
| plt.show() |