변수의 개수가 늘어나면 동일한 설명력을 유지하기 위해서는 데이터가 기하급수적으로 필요하다(지수적으로 증가)
All things being equal, the simplest solution tends to be the best one. -William of Ockham
차원 축소:
High Deimensionality에 의하여 발생하는 문제
해결방안
요약
차원 축소 방법
변수 선택법
Meta-Heuristic Algorithm이라고도 함
개미가 먹이를 최단 거리로 search 하는 방법
Local Optimum vs. golbal Optimum
Selection: 변수 선택 (Chromosomes)
Crossover: 대안 검색
Mutation(돌연변이): Local Optim을 벗어나기 위한 방법
처리 순서
# Install necessary packages
# glmnet: Ridge, Lasso, Elastic Net Logistic Regression
# GA: genetic algorithm
if (!requireNamespace("glmnet"))
install.packages("glmnet")
if (!requireNamespace("GA"))
install.packages("GA")
library(glmnet)
library(GA)
# Performance Evaluation Function -----------------------------------------
## confusion table기준으로
<- function(cm){
perf_eval # True positive rate: TPR (Recall) : 민감도(재현율)
<- cm[2,2]/sum(cm[2,])
TPR # Precision (정확도)
<- cm[2,2]/sum(cm[,2])
PRE # True negative rate: TNR
<- cm[1,1]/sum(cm[1,]) #: 특이도
TNR # Simple Accuracy : 단순 정확도
<- (cm[1,1]+cm[2,2])/sum(cm)
ACC # Balanced Correction Rate (균형 정합도)
<- sqrt(TPR*TNR)
BCR # F1-Measure
<- 2*TPR*PRE/(TPR+PRE)
F1
return(c(TPR, PRE, TNR, ACC, BCR, F1))
}
<- matrix(0, nrow = 12, ncol = 6)
Perf_Table rownames(Perf_Table) <- c("All", "Forward", "Backward", "Stepwise", "GA", "Ridge", "Lasso", "Elastic Net",
"TRAIN_Forward", "TRAIN_Backward", "TRAIN_Stepwise",
"PCA_Stepwise")
colnames(Perf_Table) <- c("TPR", "Precision", "TNR", "Accuracy", "BCR", "F1-Measure")
# 결과을 저장할 Matrix
Perf_Table
TPR Precision TNR Accuracy BCR F1-Measure
All 0 0 0 0 0 0
Forward 0 0 0 0 0 0
Backward 0 0 0 0 0 0
Stepwise 0 0 0 0 0 0
GA 0 0 0 0 0 0
Ridge 0 0 0 0 0 0
Lasso 0 0 0 0 0 0
Elastic Net 0 0 0 0 0 0
TRAIN_Forward 0 0 0 0 0 0
TRAIN_Backward 0 0 0 0 0 0
TRAIN_Stepwise 0 0 0 0 0 0
PCA_Stepwise 0 0 0 0 0 0
# Load the data & Preprocessing
<- read.csv("data/PersonalLoan.csv")
Ploan
<- Ploan[,-c(1,5,10)] #ID, ZIP OCDE, 종속 제외
Ploan_input <- scale(Ploan_input, center = TRUE, scale = TRUE)
Ploan_input_scaled <- factor(Ploan$Personal.Loan, labels = c("0","1"))
Ploan_target <- data.frame(Ploan_input_scaled, Ploan_target)
Ploan_data_scaled
<- 1:1500
trn_idx <- 1501:2500
tst_idx
table(Ploan_target)
Ploan_target
0 1
2244 256
<- Ploan_data_scaled[trn_idx,] # train용 데이터
Ploan_trn <- Ploan_data_scaled[tst_idx,] # Test데이터
Ploan_tst #str(Ploan_trn)
전체 변수로 로지스틱 회귀분석
<- glm(Ploan_target ~ ., family=binomial, Ploan_trn)
full_model summary(full_model)
Call:
glm(formula = Ploan_target ~ ., family = binomial, data = Ploan_trn)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.1781 -0.2189 -0.0906 -0.0365 3.5345
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.445483 0.272085 -16.339 < 2e-16 ***
Age -0.776031 1.276510 -0.608 0.543233
Experience 0.910984 1.272791 0.716 0.474154
Income 2.374701 0.206877 11.479 < 2e-16 ***
Family 0.739703 0.153237 4.827 1.38e-06 ***
CCAvg 0.264244 0.120946 2.185 0.028902 *
Education 1.328860 0.170227 7.806 5.88e-15 ***
Mortgage 0.009294 0.100392 0.093 0.926239
Securities.Account -0.501459 0.183861 -2.727 0.006384 **
CD.Account 0.982082 0.151231 6.494 8.36e-11 ***
Online -0.182069 0.139815 -1.302 0.192843
CreditCard -0.617374 0.181094 -3.409 0.000652 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 984.01 on 1499 degrees of freedom
Residual deviance: 401.11 on 1488 degrees of freedom
AIC: 425.11
Number of Fisher Scoring iterations: 7
<- as.matrix(x=full_model$coefficients, nrow=12, ncol=1)
full_model_coeff full_model_coeff
[,1]
(Intercept) -4.445483193
Age -0.776030543
Experience 0.910983523
Income 2.374700527
Family 0.739703391
CCAvg 0.264244401
Education 1.328860208
Mortgage 0.009294095
Securities.Account -0.501459121
CD.Account 0.982081783
Online -0.182069399
CreditCard -0.617373701
# Make prediction
<- predict(full_model, type = "response", newdata = Ploan_tst)
full_model_prob <- rep(0, nrow(Ploan_tst))
full_model_prey which(full_model_prob >= 0.5)] <- 1
full_model_prey[<- table(Ploan_tst$Ploan_target, full_model_prey)
full_model_cm full_model_cm
full_model_prey
0 1
0 881 15
1 36 68
# Peformance evaluation
1,] <- perf_eval(full_model_cm)
Perf_Table[ Perf_Table
TPR Precision TNR Accuracy BCR F1-Measure
All 0.6538462 0.8192771 0.9832589 0.949 0.8018105 0.7272727
Forward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Backward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
GA 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Ridge 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Lasso 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Elastic Net 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Forward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Backward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
PCA_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
# Variable selection method 1: Forward selection
<- paste(colnames(Ploan_trn)[-12], collapse=" + ")
tmp_x <- paste("Ploan_target ~ ", tmp_x, collapse = "")
tmp_xy as.formula(tmp_xy)
Ploan_target ~ Age + Experience + Income + Family + CCAvg + Education +
Mortgage + Securities.Account + CD.Account + Online + CreditCard
#colSums(is.na(Ploan_trn)) #NA 없음
<- step(glm(Ploan_target ~ 1, family = "binomial", data = Ploan_trn),
forward_model scope = list(upper = as.formula(tmp_xy),
lower = Ploan_target ~ 1),
direction="forward", trace = 0)
summary(forward_model)
Call:
glm(formula = Ploan_target ~ Income + Education + CD.Account +
Family + CreditCard + Securities.Account + CCAvg, family = "binomial",
data = Ploan_trn)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.1510 -0.2194 -0.0947 -0.0378 3.5566
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.4217 0.2685 -16.467 < 2e-16 ***
Income 2.3871 0.2021 11.814 < 2e-16 ***
Education 1.3033 0.1661 7.849 4.20e-15 ***
CD.Account 0.9373 0.1426 6.572 4.95e-11 ***
Family 0.7319 0.1507 4.858 1.18e-06 ***
CreditCard -0.5889 0.1769 -3.329 0.000871 ***
Securities.Account -0.4675 0.1809 -2.585 0.009751 **
CCAvg 0.2482 0.1197 2.073 0.038149 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 984.01 on 1499 degrees of freedom
Residual deviance: 404.27 on 1492 degrees of freedom
AIC: 420.27
Number of Fisher Scoring iterations: 7
<- as.matrix(forward_model$coefficients, 12, 1)
forward_model_coeff forward_model_coeff
[,1]
(Intercept) -4.4217370
Income 2.3871278
Education 1.3033304
CD.Account 0.9372616
Family 0.7319393
CreditCard -0.5889305
Securities.Account -0.4674934
CCAvg 0.2481614
#변수가 추가된 순서를 확인
$anova$Step forward_model
[1] "" "+ Income" "+ Education"
[4] "+ CD.Account" "+ Family" "+ CreditCard"
[7] "+ Securities.Account" "+ CCAvg"
$anova$AIC forward_model
[1] 986.0083 623.0218 507.7428 464.4016 438.1318 429.1807 422.5882 420.2715
# Make prediction
<- predict(forward_model, type = "response", newdata = Ploan_tst)
forward_model_prob <- rep(0, nrow(Ploan_tst))
forward_model_prey which(forward_model_prob >= 0.5)] <- 1
forward_model_prey[<- table(Ploan_tst$Ploan_target, forward_model_prey)
forward_model_cm forward_model_cm
forward_model_prey
0 1
0 881 15
1 37 67
# Peformance evaluation
2,] <- perf_eval(forward_model_cm)
Perf_Table[ Perf_Table
TPR Precision TNR Accuracy BCR F1-Measure
All 0.6538462 0.8192771 0.9832589 0.949 0.8018105 0.7272727
Forward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Backward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
GA 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Ridge 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Lasso 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Elastic Net 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Forward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Backward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
PCA_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
<- step(full_model,
backward_model scope = list(upper = as.formula(tmp_xy), lower = Ploan_target ~ 1),
direction = "backward", trace = 0)
summary(backward_model)
Call:
glm(formula = Ploan_target ~ Income + Family + CCAvg + Education +
Securities.Account + CD.Account + CreditCard, family = binomial,
data = Ploan_trn)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.1510 -0.2194 -0.0947 -0.0378 3.5566
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.4217 0.2685 -16.467 < 2e-16 ***
Income 2.3871 0.2021 11.814 < 2e-16 ***
Family 0.7319 0.1507 4.858 1.18e-06 ***
CCAvg 0.2482 0.1197 2.073 0.038149 *
Education 1.3033 0.1661 7.849 4.20e-15 ***
Securities.Account -0.4675 0.1809 -2.585 0.009751 **
CD.Account 0.9373 0.1426 6.572 4.95e-11 ***
CreditCard -0.5889 0.1769 -3.329 0.000871 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 984.01 on 1499 degrees of freedom
Residual deviance: 404.27 on 1492 degrees of freedom
AIC: 420.27
Number of Fisher Scoring iterations: 7
<- as.matrix(backward_model$coefficients, 12, 1)
backward_model_coeff backward_model_coeff
[,1]
(Intercept) -4.4217370
Income 2.3871278
Family 0.7319393
CCAvg 0.2481614
Education 1.3033304
Securities.Account -0.4674934
CD.Account 0.9372616
CreditCard -0.5889305
#변수가 추가된 순서를 확인
$anova$Step backward_model
[1] "" "- Mortgage" "- Age" "- Experience" "- Online"
$anova$AIC backward_model
[1] 425.1119 423.1204 421.5019 420.6152 420.2715
# Make prediction
<- predict(backward_model, type = "response", newdata = Ploan_tst)
backward_model_prob <- rep(0, nrow(Ploan_tst))
backward_model_prey which(backward_model_prob >= 0.5)] <- 1
backward_model_prey[<- table(Ploan_tst$Ploan_target, backward_model_prey)
backward_model_cm backward_model_cm
backward_model_prey
0 1
0 881 15
1 37 67
# Peformance evaluation
3,] <- perf_eval(backward_model_cm)
Perf_Table[ Perf_Table
TPR Precision TNR Accuracy BCR F1-Measure
All 0.6538462 0.8192771 0.9832589 0.949 0.8018105 0.7272727
Forward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Backward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
GA 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Ridge 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Lasso 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Elastic Net 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Forward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Backward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
PCA_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
<- step(full_model,
stepwise_model scope = list(upper = as.formula(tmp_xy), lower = Ploan_target ~ 1),
direction="both", trace = 0)
summary(stepwise_model)
Call:
glm(formula = Ploan_target ~ Income + Family + CCAvg + Education +
Securities.Account + CD.Account + CreditCard, family = binomial,
data = Ploan_trn)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.1510 -0.2194 -0.0947 -0.0378 3.5566
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.4217 0.2685 -16.467 < 2e-16 ***
Income 2.3871 0.2021 11.814 < 2e-16 ***
Family 0.7319 0.1507 4.858 1.18e-06 ***
CCAvg 0.2482 0.1197 2.073 0.038149 *
Education 1.3033 0.1661 7.849 4.20e-15 ***
Securities.Account -0.4675 0.1809 -2.585 0.009751 **
CD.Account 0.9373 0.1426 6.572 4.95e-11 ***
CreditCard -0.5889 0.1769 -3.329 0.000871 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 984.01 on 1499 degrees of freedom
Residual deviance: 404.27 on 1492 degrees of freedom
AIC: 420.27
Number of Fisher Scoring iterations: 7
<- as.matrix(stepwise_model$coefficients, 12, 1)
stepwise_model_coeff stepwise_model_coeff
[,1]
(Intercept) -4.4217370
Income 2.3871278
Family 0.7319393
CCAvg 0.2481614
Education 1.3033304
Securities.Account -0.4674934
CD.Account 0.9372616
CreditCard -0.5889305
#변수가 추가된 순서를 확인
$anova$Step #중간에 추가된 내역이 없음() stepwise_model
[1] "" "- Mortgage" "- Age" "- Experience" "- Online"
$anova$AIC stepwise_model
[1] 425.1119 423.1204 421.5019 420.6152 420.2715
# Make prediction
<- predict(stepwise_model, type = "response", newdata = Ploan_tst)
stepwise_model_prob <- rep(0, nrow(Ploan_tst))
stepwise_model_prey which(stepwise_model_prob >= 0.5)] <- 1
stepwise_model_prey[<- table(Ploan_tst$Ploan_target, stepwise_model_prey)
stepwise_model_cm stepwise_model_cm
stepwise_model_prey
0 1
0 881 15
1 37 67
# Peformance evaluation
4,] <- perf_eval(stepwise_model_cm)
Perf_Table[ Perf_Table
TPR Precision TNR Accuracy BCR F1-Measure
All 0.6538462 0.8192771 0.9832589 0.949 0.8018105 0.7272727
Forward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Backward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Stepwise 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
GA 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Ridge 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Lasso 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Elastic Net 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Forward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Backward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
PCA_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
유전 알고리즘
# Fitness function: F1 for the training dataset
<- function(string){
fit_F1 <- which(string == 1)
sel_var_idx # Use variables whose gene value is 1
<- x[, sel_var_idx]
sel_x <- data.frame(sel_x, y)
xy # Training the model
<- glm(y ~ ., family = binomial, data = xy)
GA_lr <- predict(GA_lr, type = "response", newdata = xy)
GA_lr_prob <- rep(0, length(y))
GA_lr_prey which(GA_lr_prob >= 0.5)] <- 1
GA_lr_prey[<- matrix(0, nrow = 2, ncol = 2)
GA_lr_cm 1,1] <- length(which(y == 0 & GA_lr_prey == 0))
GA_lr_cm[1,2] <- length(which(y == 0 & GA_lr_prey == 1))
GA_lr_cm[2,1] <- length(which(y == 1 & GA_lr_prey == 0))
GA_lr_cm[2,2] <- length(which(y == 1 & GA_lr_prey == 1))
GA_lr_cm[<- perf_eval(GA_lr_cm)
GA_perf return(GA_perf[6])
}
<- as.matrix(Ploan_trn[,-12])
x <- Ploan_trn[,12]
y
# Variable selection by Genetic Algorithm
<- proc.time()
start_time <- ga(type = "binary", fitness = fit_F1, nBits = ncol(x),
GA_F1 names = colnames(x), popSize = 50, pcrossover = 0.5,
pmutation = 0.01, maxiter = 100, elitism = 2, seed = 123)
<- proc.time()
end_time - start_time end_time
user system elapsed
49.20 0.02 49.34
<- which(GA_F1@solution == 1)
best_var_idx
# Model training based on the best variable subset
<- Ploan_trn[,c(best_var_idx, 12)]
GA_trn_data <- Ploan_tst[,c(best_var_idx, 12)]
GA_tst_data
<- glm(Ploan_target ~ ., family=binomial, GA_trn_data)
GA_model summary(GA_model)
Call:
glm(formula = Ploan_target ~ ., family = binomial, data = GA_trn_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.1550 -0.2173 -0.0943 -0.0377 3.6214
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.4333 0.2705 -16.391 < 2e-16 ***
Age 0.1292 0.1353 0.954 0.33985
Income 2.3956 0.2063 11.611 < 2e-16 ***
Family 0.7508 0.1527 4.918 8.76e-07 ***
CCAvg 0.2572 0.1202 2.140 0.03237 *
Education 1.3081 0.1670 7.833 4.75e-15 ***
Mortgage 0.0135 0.1003 0.135 0.89298
Securities.Account -0.4726 0.1822 -2.595 0.00947 **
CD.Account 0.9276 0.1429 6.489 8.63e-11 ***
CreditCard -0.5733 0.1770 -3.240 0.00120 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 984.01 on 1499 degrees of freedom
Residual deviance: 403.32 on 1490 degrees of freedom
AIC: 423.32
Number of Fisher Scoring iterations: 7
<- as.matrix(GA_model$coefficients, 12, 1)
GA_model_coeff GA_model_coeff
[,1]
(Intercept) -4.43332725
Age 0.12916530
Income 2.39554937
Family 0.75075280
CCAvg 0.25721313
Education 1.30812212
Mortgage 0.01350098
Securities.Account -0.47264049
CD.Account 0.92755663
CreditCard -0.57332418
# Make prediction
<- predict(GA_model, type = "response", newdata = GA_tst_data)
GA_model_prob <- rep(0, nrow(Ploan_tst))
GA_model_prey which(GA_model_prob >= 0.5)] <- 1
GA_model_prey[<- table(GA_tst_data$Ploan_target, GA_model_prey)
GA_model_cm GA_model_cm
GA_model_prey
0 1
0 883 13
1 38 66
# Peformance evaluation
5,] <- perf_eval(GA_model_cm)
Perf_Table[ Perf_Table
TPR Precision TNR Accuracy BCR F1-Measure
All 0.6538462 0.8192771 0.9832589 0.949 0.8018105 0.7272727
Forward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Backward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Stepwise 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
GA 0.6346154 0.8354430 0.9854911 0.949 0.7908273 0.7213115
Ridge 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Lasso 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Elastic Net 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Forward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Backward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
PCA_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
<- as.matrix(Ploan_trn[,-12])
Ploan_trn_X <- as.factor(Ploan_trn[,12])
Ploan_trn_y
<- as.matrix(Ploan_tst[,-12])
Ploan_tst_X <- as.factor(Ploan_tst[,12])
Ploan_tst_y
<- glmnet(Ploan_trn_X, Ploan_trn_y, family = "binomial", alpha = 0)
Ridge_model #HTML에서 오류 발생
#plot(Ridge_model, xvar = "lambda")
# Find the best lambda based in 5-fold cross validation
<- cv.glmnet(Ploan_trn_X, Ploan_trn_y, family = "binomial", alpha = 0)
CV_Ridge #HTML에서 오류 발생
#plot(CV_Ridge)
<- CV_Ridge$lambda.min
best_lambda
# Check the coefficients
<- predict(Ridge_model, s = best_lambda, newx = Ploan_tst_X, type = "coefficient")
Ridge_model_coeff Ridge_model_coeff
12 x 1 sparse Matrix of class "dgCMatrix"
1
(Intercept) -3.30036803
Age 0.02040531
Experience 0.04731391
Income 1.39594425
Family 0.40749151
CCAvg 0.31921895
Education 0.72106151
Mortgage 0.07161853
Securities.Account -0.22267823
CD.Account 0.59570049
Online -0.07595584
CreditCard -0.29458911
# Make predictions
<- predict(Ridge_model, s = best_lambda, newx = Ploan_tst_X, type = "class")
Ridge_model_prey <- as.factor(Ridge_model_prey)
Ridge_model_prey <- table(Ploan_tst_y, Ridge_model_prey)
Ridge_model_cm Ridge_model_cm
Ridge_model_prey
Ploan_tst_y 0 1
0 889 7
1 48 56
# Peformance evaluation
6,] <- perf_eval(Ridge_model_cm)
Perf_Table[ Perf_Table
TPR Precision TNR Accuracy BCR F1-Measure
All 0.6538462 0.8192771 0.9832589 0.949 0.8018105 0.7272727
Forward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Backward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Stepwise 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
GA 0.6346154 0.8354430 0.9854911 0.949 0.7908273 0.7213115
Ridge 0.5384615 0.8888889 0.9921875 0.945 0.7309274 0.6706587
Lasso 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
Elastic Net 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Forward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Backward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
PCA_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
<- glmnet(Ploan_trn_X, Ploan_trn_y, family = "binomial", alpha = 1)
Lasso_model plot(Lasso_model, xvar = "lambda")
# Find the best lambda based in 5-fold cross validation
<- cv.glmnet(Ploan_trn_X, Ploan_trn_y, family = "binomial", alpha = 1)
CV_Lasso plot(CV_Lasso)
<- CV_Lasso$lambda.min
best_lambda
# Check the coefficients
<- predict(Lasso_model, s = best_lambda, newx = Ploan_tst_X, type = "coefficient")
Lasso_model_coeff Lasso_model_coeff
12 x 1 sparse Matrix of class "dgCMatrix"
1
(Intercept) -4.22300574
Age .
Experience 0.09476078
Income 2.25829042
Family 0.66916519
CCAvg 0.23622651
Education 1.20888249
Mortgage .
Securities.Account -0.38279462
CD.Account 0.86156707
Online -0.11820359
CreditCard -0.50420953
# Make predictions
<- predict(Lasso_model, s = best_lambda, newx = Ploan_tst_X, type = "class")
Lasso_model_prey <- as.factor(Lasso_model_prey)
Lasso_model_prey <- table(Ploan_tst_y, Lasso_model_prey)
Lasso_model_cm Lasso_model_cm
Lasso_model_prey
Ploan_tst_y 0 1
0 882 14
1 39 65
# Peformance evaluation
7,] <- perf_eval(Lasso_model_cm)
Perf_Table[ Perf_Table
TPR Precision TNR Accuracy BCR F1-Measure
All 0.6538462 0.8192771 0.9832589 0.949 0.8018105 0.7272727
Forward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Backward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Stepwise 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
GA 0.6346154 0.8354430 0.9854911 0.949 0.7908273 0.7213115
Ridge 0.5384615 0.8888889 0.9921875 0.945 0.7309274 0.6706587
Lasso 0.6250000 0.8227848 0.9843750 0.947 0.7843688 0.7103825
Elastic Net 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Forward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Backward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
PCA_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
<- glmnet(Ploan_trn_X, Ploan_trn_y, family = "binomial", alpha = 0.5)
Elastic_model plot(Elastic_model, xvar = "lambda")
# Find the best lambda based in 5-fold cross validation
<- cv.glmnet(Ploan_trn_X, Ploan_trn_y, family = "binomial", alpha = 0.5)
CV_Elastic plot(CV_Elastic)
<- CV_Elastic$lambda.min
best_lambda
# Check the coefficients
<- predict(Elastic_model, s = best_lambda, newx = Ploan_tst_X, type = "coefficient")
Elastic_model_coeff Elastic_model_coeff
12 x 1 sparse Matrix of class "dgCMatrix"
1
(Intercept) -4.17040114
Age .
Experience 0.10435387
Income 2.18935181
Family 0.65871820
CCAvg 0.25523583
Education 1.17994054
Mortgage 0.01211337
Securities.Account -0.39643764
CD.Account 0.86433111
Online -0.13353287
CreditCard -0.51142014
# Make predictions
<- predict(Elastic_model, s = best_lambda, newx = Ploan_tst_X, type = "class")
Elastic_model_prey <- as.factor(Elastic_model_prey)
Elastic_model_prey <- table(Ploan_tst_y, Elastic_model_prey)
Elastic_model_cm Elastic_model_cm
Elastic_model_prey
Ploan_tst_y 0 1
0 882 14
1 40 64
# Peformance evaluation
8,] <- perf_eval(Elastic_model_cm)
Perf_Table[ Perf_Table
TPR Precision TNR Accuracy BCR F1-Measure
All 0.6538462 0.8192771 0.9832589 0.949 0.8018105 0.7272727
Forward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Backward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Stepwise 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
GA 0.6346154 0.8354430 0.9854911 0.949 0.7908273 0.7213115
Ridge 0.5384615 0.8888889 0.9921875 0.945 0.7309274 0.6706587
Lasso 0.6250000 0.8227848 0.9843750 0.947 0.7843688 0.7103825
Elastic Net 0.6153846 0.8205128 0.9843750 0.946 0.7783118 0.7032967
TRAIN_Forward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Backward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
PCA_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
library(caret)
#cctrl3 <- trainControl(method = "none")
<- dplyr::select(Ploan_trn, -Ploan_target)
temp_x
set.seed(2021)
#TRACE 결과는 일반적인 forward와는 다름: AIC: 420.27
<- train(y = Ploan_trn$Ploan_target, x = temp_x,
test_class_none_model method = "glmStepAIC", direction ="forward",
# trControl = cctrl3,
tuneLength = 1,
# preProcess = c("center", "scale"), #데이터에서 이미 수행했기 때문에 제외
trace = FALSE
) # preProcess을 적용 ==> Accuracy Kappa 0.9474599 0.6900772
# preProcess을 미적용==> Accuracy Kappa 0.9461401 0.6846301
summary(test_class_none_model)
Call:
NULL
Deviance Residuals:
Min 1Q Median 3Q Max
-2.1510 -0.2194 -0.0947 -0.0378 3.5566
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.4217 0.2685 -16.467 < 2e-16 ***
Income 2.3871 0.2021 11.814 < 2e-16 ***
Education 1.3033 0.1661 7.849 4.20e-15 ***
CD.Account 0.9373 0.1426 6.572 4.95e-11 ***
Family 0.7319 0.1507 4.858 1.18e-06 ***
CreditCard -0.5889 0.1769 -3.329 0.000871 ***
Securities.Account -0.4675 0.1809 -2.585 0.009751 **
CCAvg 0.2482 0.1197 2.073 0.038149 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 984.01 on 1499 degrees of freedom
Residual deviance: 404.27 on 1492 degrees of freedom
AIC: 420.27
Number of Fisher Scoring iterations: 7
# Make prediction
<- predict(test_class_none_model, newdata = Ploan_tst)
trainStepwise_model_prey <- table(Ploan_tst$Ploan_target, trainStepwise_model_prey)
trainStepwise_model_cm trainStepwise_model_cm
trainStepwise_model_prey
0 1
0 881 15
1 37 67
# Peformance evaluation
9,] <- perf_eval(trainStepwise_model_cm)
Perf_Table[ Perf_Table
TPR Precision TNR Accuracy BCR F1-Measure
All 0.6538462 0.8192771 0.9832589 0.949 0.8018105 0.7272727
Forward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Backward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Stepwise 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
GA 0.6346154 0.8354430 0.9854911 0.949 0.7908273 0.7213115
Ridge 0.5384615 0.8888889 0.9921875 0.945 0.7309274 0.6706587
Lasso 0.6250000 0.8227848 0.9843750 0.947 0.7843688 0.7103825
Elastic Net 0.6153846 0.8205128 0.9843750 0.946 0.7783118 0.7032967
TRAIN_Forward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
TRAIN_Backward 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
TRAIN_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
PCA_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
<- trainControl(method = "none")
cctrl3
#Ploan_trn
set.seed(2021)
<- train(Ploan_target ~ ., data = Ploan_trn,
test_class_none_model method = "glmStepAIC", direction ="backward",
# trControl = cctrl3,
tuneLength = 1,
# preProc = c("center", "scale"),
trace = FALSE)
# Make prediction
<- predict(test_class_none_model, newdata = Ploan_tst)
trainBackward_model_prey <- table(Ploan_tst$Ploan_target, trainBackward_model_prey)
trainBackward_model_cm trainBackward_model_cm
trainBackward_model_prey
0 1
0 881 15
1 37 67
# Peformance evaluation
10,] <- perf_eval(trainBackward_model_cm)
Perf_Table[ Perf_Table
TPR Precision TNR Accuracy BCR F1-Measure
All 0.6538462 0.8192771 0.9832589 0.949 0.8018105 0.7272727
Forward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Backward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Stepwise 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
GA 0.6346154 0.8354430 0.9854911 0.949 0.7908273 0.7213115
Ridge 0.5384615 0.8888889 0.9921875 0.945 0.7309274 0.6706587
Lasso 0.6250000 0.8227848 0.9843750 0.947 0.7843688 0.7103825
Elastic Net 0.6153846 0.8205128 0.9843750 0.946 0.7783118 0.7032967
TRAIN_Forward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
TRAIN_Backward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
TRAIN_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
PCA_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
#Ploan_trn
set.seed(2021)
<- train(Ploan_target ~ ., data = Ploan_trn,
test_class_none_model method = "glmStepAIC", direction ="both",
# trControl = cctrl3,
tuneLength = 1,
# preProc = c("center", "scale"),
trace = FALSE)
# Make prediction
<- predict(test_class_none_model, newdata = Ploan_tst)
trainBackward_model_prey <- table(Ploan_tst$Ploan_target, trainBackward_model_prey)
trainBackward_model_cm trainBackward_model_cm
trainBackward_model_prey
0 1
0 881 15
1 37 67
# Peformance evaluation
11,] <- perf_eval(trainBackward_model_cm)
Perf_Table[ Perf_Table
TPR Precision TNR Accuracy BCR F1-Measure
All 0.6538462 0.8192771 0.9832589 0.949 0.8018105 0.7272727
Forward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Backward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Stepwise 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
GA 0.6346154 0.8354430 0.9854911 0.949 0.7908273 0.7213115
Ridge 0.5384615 0.8888889 0.9921875 0.945 0.7309274 0.6706587
Lasso 0.6250000 0.8227848 0.9843750 0.947 0.7843688 0.7103825
Elastic Net 0.6153846 0.8205128 0.9843750 0.946 0.7783118 0.7032967
TRAIN_Forward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
TRAIN_Backward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
TRAIN_Stepwise 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
PCA_Stepwise 0.0000000 0.0000000 0.0000000 0.000 0.0000000 0.0000000
PCA, ISOMAP 정ㄹ
par(mfrow = c(1,1))
<- preProcess(Ploan_trn, method = c("pca"))
pca_model #summary(pca_model)
# 12개의 변수가 10개로 줄었음 (별의미는 없는듯 )
<- predict(pca_model, newdata = Ploan_trn)
Ploan_trn_pca <- predict(pca_model, newdata = Ploan_tst)
Ploan_tst_pca summary(Ploan_trn_pca)
Ploan_target PC1 PC2 PC3
0:1348 Min. :-2.77868 Min. :-2.3013 Min. :-5.4877
1: 152 1st Qu.:-1.26352 1st Qu.:-0.9734 1st Qu.:-0.3705
Median : 0.05533 Median :-0.3796 Median : 0.2283
Mean : 0.00000 Mean : 0.0000 Mean : 0.0000
3rd Qu.: 1.16633 3rd Qu.: 0.6314 3rd Qu.: 0.6969
Max. : 3.38379 Max. : 5.2857 Max. : 2.3862
PC4 PC5 PC6 PC7
Min. :-3.2148 Min. :-1.58849 Min. :-2.93559 Min. :-3.483626
1st Qu.:-0.5666 1st Qu.:-0.84713 1st Qu.:-0.61709 1st Qu.:-0.617015
Median :-0.1628 Median :-0.06393 Median :-0.02256 Median : 0.008665
Mean : 0.0000 Mean : 0.00000 Mean : 0.00000 Mean : 0.000000
3rd Qu.: 0.7661 3rd Qu.: 0.66841 3rd Qu.: 0.62796 3rd Qu.: 0.649336
Max. : 2.3225 Max. : 3.03559 Max. : 3.48301 Max. : 2.581928
PC8 PC9
Min. :-2.744662 Min. :-3.0380
1st Qu.:-0.655235 1st Qu.:-0.4476
Median :-0.005629 Median :-0.0703
Mean : 0.000000 Mean : 0.0000
3rd Qu.: 0.647278 3rd Qu.: 0.4777
Max. : 3.242973 Max. : 2.2697
(참고: 일반적인 PCA 방식으로 진행하면 컴포넌트는 11개 생성됨)
<- dplyr::select(Ploan_trn, -Ploan_target)
temp_x <- prcomp(temp_x, scale. = F)
fit1 summary(fit1)
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7
Standard deviation 1.4354 1.3565 1.1662 1.03022 0.99475 0.97956 0.96695
Proportion of Variance 0.1885 0.1683 0.1244 0.09709 0.09052 0.08777 0.08553
Cumulative Proportion 0.1885 0.3568 0.4812 0.57828 0.66880 0.75657 0.84210
PC8 PC9 PC10 PC11
Standard deviation 0.93897 0.70314 0.58711 0.07344
Proportion of Variance 0.08065 0.04523 0.03153 0.00049
Cumulative Proportion 0.92275 0.96798 0.99951 1.00000
print(fit1)
Standard deviations (1, .., p=11):
[1] 1.43541491 1.35648788 1.16617585 1.03021653 0.99475038 0.97955962
[7] 0.96695223 0.93896588 0.70313571 0.58710600 0.07343963
Rotation (n x k) = (11 x 11):
PC1 PC2 PC3 PC4
Age 0.696180238 -0.04838940 0.07479750 -0.01350101
Experience 0.696384421 -0.03713509 0.07708192 -0.01085800
Income -0.001384212 0.61797784 0.13955305 -0.04370085
Family -0.102058838 -0.23027911 -0.21820862 -0.33250490
CCAvg 0.010393488 0.59433150 0.15500099 -0.09632198
Education 0.041769067 -0.23166315 -0.14160503 -0.17137425
Mortgage 0.007543432 0.24879820 0.01071920 -0.18130459
Securities.Account 0.069533152 0.08270822 -0.46971740 -0.50494268
CD.Account 0.102235714 0.28247474 -0.63892202 0.02104799
Online 0.042005643 0.04641450 -0.32562601 0.04247570
CreditCard 0.031667566 0.06139339 -0.37721795 0.74732713
PC5 PC6 PC7 PC8
Age -0.001656604 0.0044643197 -0.067527151 0.04461818
Experience 0.009870082 -0.0091653273 -0.084418273 0.03152169
Income -0.029472954 0.0612546532 -0.004957992 0.19421827
Family -0.048417470 0.0847178718 -0.699608601 0.52334581
CCAvg -0.021093587 -0.0008482191 0.055491970 0.39666268
Education -0.474604824 0.5215375871 0.534025678 0.30643878
Mortgage -0.337208306 0.4973624456 -0.382289438 -0.61492714
Securities.Account -0.052050089 -0.4588270262 0.163203995 -0.22509510
CD.Account -0.066471362 -0.0102810346 0.053652788 0.02640722
Online 0.751391514 0.5056755575 0.071898282 -0.01786276
CreditCard -0.292516582 -0.0570922997 -0.172468363 0.06586665
PC9 PC10 PC11
Age -0.013781268 0.006878171 0.7073923904
Experience -0.003409557 0.015877346 -0.7064311762
Income -0.033610160 0.743755466 0.0069797136
Family -0.043250675 0.060380364 -0.0004559003
CCAvg -0.202013712 -0.641603668 -0.0052640923
Education -0.118635095 0.065678460 -0.0209552782
Mortgage -0.074253015 -0.119054078 0.0030769663
Securities.Account -0.466953259 0.064364542 0.0000838058
CD.Account 0.696288551 -0.090297487 0.0044750488
Online -0.249814296 0.007102624 -0.0013009949
CreditCard -0.413679318 0.019822746 -0.0020118058
screeplot(fit1, npcs = 7, type = "lines")
PCA 결과를 stepwise 방식으로 수행
본 Sample Data에 PCA적용하는 것이 큰 의미는 없으나, 비교의 의미로 PCA 진행이후 Stepwise를 수행함함
<- glm(Ploan_target ~ ., family=binomial, Ploan_trn_pca)
full_model_pca
#Full 모델의 변수 리스트 생성
<- paste(colnames(Ploan_trn_pca)[-1], collapse=" + ")
tmp_x <- paste("Ploan_target ~ ", tmp_x, collapse = "")
tmp_xy as.formula(tmp_xy)
Ploan_target ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 +
PC9
<- step(full_model_pca,
stepwise_model scope = list(upper = as.formula(tmp_xy), lower = Ploan_target ~ 1),
direction="both", trace = 0)
summary(stepwise_model)
Call:
glm(formula = Ploan_target ~ PC2 + PC4 + PC5 + PC6 + PC7 + PC8 +
PC9, family = binomial, data = Ploan_trn_pca)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.44229 -0.26162 -0.13572 -0.05914 3.02479
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.92753 0.22211 -17.682 < 2e-16 ***
PC2 1.11061 0.08642 12.852 < 2e-16 ***
PC4 -0.66901 0.15213 -4.398 1.09e-05 ***
PC5 0.63433 0.12778 4.964 6.89e-07 ***
PC6 0.85149 0.13076 6.512 7.43e-11 ***
PC7 0.62681 0.14022 4.470 7.81e-06 ***
PC8 1.27405 0.13907 9.161 < 2e-16 ***
PC9 -1.03175 0.18069 -5.710 1.13e-08 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 984.01 on 1499 degrees of freedom
Residual deviance: 480.83 on 1492 degrees of freedom
AIC: 496.83
Number of Fisher Scoring iterations: 7
<- as.matrix(stepwise_model$coefficients, 12, 1)
stepwise_model_coeff stepwise_model_coeff
[,1]
(Intercept) -3.9275272
PC2 1.1106103
PC4 -0.6690077
PC5 0.6343311
PC6 0.8514872
PC7 0.6268145
PC8 1.2740450
PC9 -1.0317498
#변수가 추가된 순서를 확인
$anova$Step #중간에 추가된 내역이 없음() stepwise_model
[1] "" "- PC3" "- PC1"
$anova$AIC stepwise_model
[1] 499.2019 497.2528 496.8326
# Make prediction
<- predict(stepwise_model, type = "response", newdata = Ploan_tst_pca)
stepwise_model_prob <- rep(0, nrow(Ploan_tst))
stepwise_model_prey which(stepwise_model_prob >= 0.5)] <- 1
stepwise_model_prey[<- table(Ploan_tst_pca$Ploan_target, stepwise_model_prey)
stepwise_model_cm stepwise_model_cm
stepwise_model_prey
0 1
0 879 17
1 48 56
# Peformance evaluation
12,] <- perf_eval(stepwise_model_cm)
Perf_Table[ Perf_Table
TPR Precision TNR Accuracy BCR F1-Measure
All 0.6538462 0.8192771 0.9832589 0.949 0.8018105 0.7272727
Forward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Backward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
Stepwise 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
GA 0.6346154 0.8354430 0.9854911 0.949 0.7908273 0.7213115
Ridge 0.5384615 0.8888889 0.9921875 0.945 0.7309274 0.6706587
Lasso 0.6250000 0.8227848 0.9843750 0.947 0.7843688 0.7103825
Elastic Net 0.6153846 0.8205128 0.9843750 0.946 0.7783118 0.7032967
TRAIN_Forward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
TRAIN_Backward 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
TRAIN_Stepwise 0.6442308 0.8170732 0.9832589 0.948 0.7958930 0.7204301
PCA_Stepwise 0.5384615 0.7671233 0.9810268 0.935 0.7268048 0.6327684
이 부분은 아직 공부 중 …