Java Reference
In-Depth Information
If there is only one category, that is the category of the set, otherwise the
category of the set cannot be determined.
private static
String findCategory(Map items){
// no category if the set is empty
if
(items.size()
##
0)
return
"?";
// computes the frequency of each category
Map catFreq
#
new
HashMap();
Iterator it
#
items.keySet().iterator();
String category
#
"";
while
(it.hasNext()){
Item item
#
(Item)it.next();
category
#
(String)items.get(item);
Integer count
#
(Integer)catFreq.get(category);
if
(count
##
null
)
catFreq.put(category,
new
Integer(1));
else
catFreq.put(category,
new
Integer(1
!
count.intValue()));
}
// if only one category is present it is the set's category
if
(catFreq.keySet().size()
##
1)
return
category;
// otherwise it is not possible to assign a category
return
"?";
}
The
selectSplit()
method determines the best split feature for a set of items.
The best split feature is the one implying the highest information gain.
private static
String selectSplit(Map items,Map features){
Iterator attr
#
features.keySet().iterator();
String split
#
null
;
double
maxGain
#
0.0;
while
(attr.hasNext()){
String candidate
#
(String)attr.next();
FeatureType type
#
(FeatureType)features.get(candidate);
double
gain
#
evaluateSplitGain(items,
candidate,type.allowedValues());
if
(gain>maxGain){
maxGain
#
gain;
split
#
candidate;
}
}
return
split;
}