Databases Reference
In-Depth Information
might go about building your regression models, and the second
shows how you might clean and prepare your data and then build a
k-NN classifier.
Sample R code: Linear regression on the housing dataset
Author
:
Ben Reddy
model1
<-
lm
(
log
(
sale.price.n
)
~
log
(
gross.sqft
),
data
=
bk.homes
)
## what's going on here?
bk.homes
[
which
(
bk.homes
$
gross.sqft
==
0
),]
bk.homes
<-
bk.homes
[
which
(
bk.homes
$
gross.sqft
>
0
&
bk.homes
$
land.sqft
>
0
),]
model1
<-
lm
(
log
(
sale.price.n
)
~
log
(
gross.sqft
),
data
=
bk.homes
)
summary
(
model1
)
plot
(
log
(
bk.homes
$
gross.sqft
),
log
(
bk.homes
$
sale.price.n
))
abline
(
model1
,
col
=
"red"
,
lwd
=
2
)
plot
(
resid
(
model1
))
model2
<-
lm
(
log
(
sale.price.n
)
~
log
(
gross.sqft
)
+
log
(
land.sqft
)
+
factor
(
neighborhood
),
data
=
bk.homes
)
summary
(
model2
)
plot
(
resid
(
model2
))
## leave out intercept for ease of interpretability
model2a
<-
lm
(
log
(
sale.price.n
)
~
0
+
log
(
gross.sqft
)
+
log
(
land.sqft
)
+
factor
(
neighborhood
),
data
=
bk.homes
)
summary
(
model2a
)
plot
(
resid
(
model2a
))
## add building type
model3
<-
lm
(
log
(
sale.price.n
)
~
log
(
gross.sqft
)
+
log
(
land.sqft
)
+
factor
(
neighborhood
)
+
factor
(
building.class.category
),
data
=
bk.homes
)
summary
(
model3
)
plot
(
resid
(
model3
))
## interact neighborhood and building type
model4
<-
lm
(
log
(
sale.price.n
)
~
log
(
gross.sqft
)
+
log
(
land.sqft
)
+
factor
(
neighborhood
)
*
factor
(
building.class.category
),
data
=
bk.homes
)
summary
(
model4
)
plot
(
resid
(
model4
))
Sample R code: K-NN on the housing dataset
Author
:
Ben Reddy
require
(
gdata
)
require
(
geoPlot
)