features, labels =self.define_features_and_labels() print("\n特征定义:") for category, items in features.items(): print(f" {category}:{', '.join(items)}") print("\n标签定义:") for label_type, label_name in labels.items(): print(f" {label_type}:{label_name}")
# 使用示例
problem = ProblemDefinition()
problem.print_definition()
# 数据准备示例 import pandas as pd import numpy as np from sklearn.preprocessingimport StandardScaler, LabelEncoder from sklearn.model_selectionimport train_test_split
class DataPreparer: def__init__(self, data): self.data= data.copy() self.processed_data=None
# 数值列用均值填充
numeric_columns =self.data.select_dtypes(include=[np.number]).columns for col in numeric_columns: ifself.data[col].isnull().sum()>0: self.data[col].fillna(self.data[col].mean(), inplace=True)
# 类别列用众数填充
categorical_columns =self.data.select_dtypes(include=['object']).columns for col in categorical_columns: ifself.data[col].isnull().sum()>0:
mode_val =self.data[col].mode()[0] self.data[col].fillna(mode_val, inplace=True)
# 模型训练示例 from sklearn.linear_modelimport LogisticRegression, LinearRegression from sklearn.ensembleimport RandomForestClassifier, RandomForestRegressor from sklearn.svmimport SVC, SVR from sklearn.metricsimport accuracy_score, mean_squared_error, classification_report
class ModelTrainer: def__init__(self): self.models={} self.trained_models={}
# 创建比较表格
comparison_data =[] for model_name, results inself.evaluation_results.items():
row =[model_name] for metric, value in results.items():
row.append(f"{value:.4f}")
comparison_data.append(row)
# 打印表格
headers =["模型名称"] + list(self.evaluation_results.values())[0].keys() print("\t".join(headers)) for row in comparison_data: print("\t".join(row))
def predict_service(input_data): """预测服务函数""" try: # 数据预处理 if encoders: for col, encoder in encoders.items(): if col in input_data.columns:
input_data[col]= encoder.transform(input_data[col])
if scaler:
numeric_cols = input_data.select_dtypes(include=['number']).columns
input_data[numeric_cols]= scaler.transform(input_data[numeric_cols])
# 选择特征列
feature_columns =['age','gender','city','duration'] ifall(col in merged_data.columnsfor col in feature_columns):
data_for_ml = merged_data[feature_columns + ['purchased']].copy()