Databases Reference
In-Depth Information
fold
<-
c
()
# make a formula object
f
=
as.formula
(
paste
(
"Y"
,
"~"
,
paste
(
vars
,
collapse
=
"+"
)))
for
(
i
in
c
(
1
:
folds
))
{
train
<-
data
[(
data
$
fold
!=
i
),
]
test
<-
data
[(
data
$
fold
==
i
),
]
mod_x
<-
glm
(
f
,
data
=
train
,
family
=
binomial
(
logit
))
p
<-
predict
(
mod_x
,
newdata
=
test
,
type
=
"response"
)
# Get wMAE
wmae
<-
c
(
wmae
,
getmae
(
p
,
test
$
Y
,
mae_bins
,
"dummy"
,
0
))
fold
<-
c
(
fold
,
i
)
auc
<-
c
(
auc
,
get_auc
(
p
,
test
$
Y
))
}
return
(
data.frame
(
fold
,
wmae
,
auc
))
}
###############################################################
########## MAIN: MODELS AND PLOTS ##########
###############################################################
# Now build a model on all variables and look at coefficients
and model fit
vlist
<-
c
(
"AT_BUY_BOOLEAN"
,
"AT_FREQ_BUY"
,
"AT_FREQ_LAST24_BUY"
,
"AT_FREQ_LAST24_SV"
,
"AT_FREQ_SV"
,
"EXPECTED_TIME_BUY"
,
"EXPECTED_TIME_SV"
,
"LAST_BUY"
,
"LAST_SV"
,
"num_checkins"
)
f
=
as.formula
(
paste
(
"Y_BUY"
,
"~"
,
paste
(
vlist
,
collapse
=
"+"
)))
fit
<-
glm
(
f
,
data
=
train
,
family
=
binomial
(
logit
))
summary
(
fit
)
# Get performance metrics on each variable
vlist
<-
c
(
"AT_BUY_BOOLEAN"
,
"AT_FREQ_BUY"
,
"AT_FREQ_LAST24_BUY"
,
"AT_FREQ_LAST24_SV"
,
"AT_FREQ_SV"
,
"EXPECTED_TIME_BUY"
,
"EXPECTED_TIME_SV"
,
"LAST_BUY"
,
"LAST_SV"
,
"num_checkins"
)
# Create empty vectors to store the performance/evaluation met
rics
auc_mu
<-
c
()
auc_sig
<-
c
()
mae_mu
<-
c
()
mae_sig
<-
c
()
for
(
i
in
c
(
1
:
length
(
vlist
)))
{
a
<-
getxval
(
c
(
vlist
[
i
]),
set
,
10
,
100
)
auc_mu
<-
c
(
auc_mu
,
mean
(
a
$
auc
))
auc_sig
<-
c
(
auc_sig
,
sd
(
a
$
auc
))
mae_mu
<-
c
(
mae_mu
,
mean
(
a
$
wmae
))