本文共 3041 字,大约阅读时间需要 10 分钟。
from sklearn.externals import joblib
import pandas as pdimport numpy as npfrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import StandardScalerfrom sklearn.linear_model import LogisticRegressionfrom sklearn.metrics import classification_report,roc_auc_scorefrom sklearn.externals import joblibdef logisticregression(): '''逻辑回归癌症预测''' # 确定数据columns数值 columns = ["Sample code number","Clump Thickness","Uniformity of Cell Size","Uniformity of Cell Shape","Marginal Adhesion","Single Epithelial Cell Size","Bare Nuclei","Bland Chromatin","Normal Nucleoli","Mitoses","Class"] data = pd.read_csv("breast-cancer-wisconsin.data",names=columns) # 去掉缺失值 data.replace(to_replace="?",value=np.nan,inplace=True) data.dropna(axis=0,inplace=True,how="any") # 提取目标值 target = data["Class"] # 提取特征值 data = data.drop(["Sample code number"],axis=1).iloc[:,:-1] # 切割训练集和测试集 x_train,x_test,y_train,y_test = train_test_split(data,target,test_size=0.3) # 进行标准化 std = StandardScaler() x_train = std.fit_transform(x_train) x_test = std.fit_transform(x_test) # 逻辑回归进行训练和 lr = LogisticRegression() lr.fit(x_train,y_train) # 得到训练集返回数据 # print("逻辑回归权重:",lr.coef_) # print("逻辑回归偏置:",lr.intercept_) # 保存训练模型 joblib.dump(lr, "test.pkl")if __name__ == '__main__': logisticregression()
import pandas as pdimport numpy as npfrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import StandardScalerfrom sklearn.linear_model import LogisticRegressionfrom sklearn.metrics import classification_report,roc_auc_scorefrom sklearn.externals import joblibdef logisticregression(): '''逻辑回归癌症预测''' # 确定数据columns数值 columns = ["Sample code number","Clump Thickness","Uniformity of Cell Size","Uniformity of Cell Shape","Marginal Adhesion","Single Epithelial Cell Size","Bare Nuclei","Bland Chromatin","Normal Nucleoli","Mitoses","Class"] data = pd.read_csv("breast-cancer-wisconsin.data",names=columns) # 去掉缺失值 data.replace(to_replace="?",value=np.nan,inplace=True) data.dropna(axis=0,inplace=True,how="any") # 提取目标值 target = data["Class"] # 提取特征值 data = data.drop(["Sample code number"],axis=1).iloc[:,:-1] # 切割训练集和测试集 x_train,x_test,y_train,y_test = train_test_split(data,target,test_size=0.3) # 进行标准化 std = StandardScaler() x_train = std.fit_transform(x_train) x_test = std.fit_transform(x_test) lr = joblib.load("test.pkl") # 逻辑回归测试集预测结果 pre_result = lr.predict(x_test) # print(pre_result) # 逻辑回归预测准确率 sore = lr.score(x_test,y_test) print(sore) # 精确率(Precision)与召回率(Recall) report = classification_report(y_test,pre_result,target_names=["良性","恶性"]) print(report) # 查看AUC指标 y_test = np.where(y_test>2.5,1,0) print(y_test) auc_score = roc_auc_score(y_test,pre_result) print(auc_score)if __name__ == '__main__': logisticregression()
转载地址:http://upili.baihongyu.com/