Skip to content

Random Forest

Asaf Schers edited this page Jun 24, 2018 · 2 revisions

Generate PMML - R

# Install and require randomForest, pmml packages

install.packages('randomForest')
install.packages('pmml')
library('randomForest')
library('pmml')

# Login to Kaggle and download titanic dataset 
# https://www.kaggle.com/c/titanic/data 
# Load CSV to data frame -

titanic.train <- read.table("titanic_train.csv", header = TRUE, sep = ",")
titanic.train$Survived <- as.factor(titanic.train$Survived)

# Train RF model

titanic.rf <- randomForest(Survived ~ . - Name - Cabin - Ticket,
                           data = titanic.train, 
                           na.action = na.roughfix)

# Generate pmml from model

pmml <- pmml(titanic.rf)
saveXML(pmml, 'titanic_rf.pmml')

Classify by PMML - Ruby

random_forest = Scoruby.get_model 'titanic_rf.pmml'
features =  {
        Sex: 'male',
        Parch: 0,
        Age: 30,
        Fare: 9.6875,
        Pclass: 2,
        SibSp: 0,
        Embarked: 'Q'       
    }

random_forest.predict(features)

=> {:label=>"0", :score=>0.882}

random_forest.decisions_count(features)

=> {"0"=>441, "1"=>59}
Clone this wiki locally