Databases Reference
In-Depth Information
Sample R Code
# Author: Brian Dalessandro
# Read in data, look at the variables and create a training
and test set
file
<-
"binary_class_dataset.txt"
set
<-
read.table
(
file
,
header
=
TRUE
,
sep
=
"\t"
,
row.names
=
"client_id"
)
names
(
set
)
split
<-
.65
set
[
"rand"
]
<-
runif
(
nrow
(
set
))
train
<-
set
[(
set
$
rand
<=
split
),
]
test
<-
set
[(
set
$
rand
>
split
),
]
set
$
Y
<-
set
$
Y_BUY
##########################################################
########### R FUNCTIONS ##########
##########################################################
library
(
mgcv
)
# GAM Smoothed plot
plotrel
<-
function
(
x
,
y
,
b
,
title
)
{
# Produce a GAM smoothed representation of the data
g
<-
gam
(
as.formula
(
"y ~ x"
),
family
=
"binomial"
,
data
=
set
)
xs
<-
seq
(
min
(
x
),
max
(
x
),
length
=
200
)
p
<-
predict
(
g
,
newdata
=
data.frame
(
x
=
xs
),
type
=
"response"
)
# Now get empirical estimates (and discretize if
non discrete
)
if
(
length
(
unique
(
x
))
>
b
)
{
div
<-
floor
(
max
(
x
)
/
b
)
x_b
<-
floor
(
x
/
div
)
*
div
c
<-
table
(
x_b
,
y
)
}
else
{
c
<-
table
(
x
,
y
)
}
pact
<-
c
[
,
2
]
/
(
c
[
,
1
]
+
c
[,
2
])
cnt
<-
c
[
,
1
]
+
c
[
,
2
]
xd
<-
as.integer
(
rownames
(
c
))
plot
(
xs
,
p
,
type
=
"l"
,
main
=
title
,
ylab
=
"P(Conversion | Ad, X)"
,
xlab
=
"X"
)
points
(
xd
,
pact
,
type
=
"p"
,
col
=
"red"
)
rug
(
x
+
runif
(
length
(
x
)))
}
library
(
plyr
)