1 : installl r java package in r studio then rhdfs package
library("rJava", lib.loc="~/R/x86_64-pc-linux-gnu-library/3.2")
2 : before installing it set hadoop path to studio and start the hadoop by typing
start-all.sh
Sys.setenv(HADOOP_CMD="/usr/local/hadoop/bin/hadoop")
Sys.setenv(HADOOP_HOME="/usr/local/hadoop")
library("rhdfs", lib.loc="~/R/x86_64-pc-linux-gnu-library/3.2")
3 : then import the data by using the command
reder=hdfs.line.reader("/data/diabetes1")
diabetes = reder$read()
typeof(diabetes)
diabetes
4 : now implement the decesion tree by using rpart package
names(diabetes) <- gsub("\\.","",names(diabetes))
str(diabetes)
attributes(diabetes)
library(rpart)
set.seed(564)
flags = sample(2,nrow(diabetes), replace = TRUE, prob =c(0.7,0.3))
trainset = diabetes[which(flags==1),]
testset = diabetes[which(flags==2),]
str(trainset)
str(testset)
index = sample(1:nrow(diabetes), nrow(diabetes)*0.7, replace=FALSE)
trainset = diabetes[index,]
testset = diabetes[-index,]
str(trainset)
str(testset)
?rpart
dtree = rpart(Classvariable ~ Number_of_times_pregnant
+Plasma_glucose_concentration
+Diastolic_blood_pressure
+Triceps_skin_fold_thickness
+Hour_serum_insulin
+Body_mass_index
+Diabetes_pedigree_function
+Ageyears ,
data=trainset,
control=rpart.control(minsplit = 10))
str(dtree)
dtree
plot(dtree)
text(dtree)
5 : now the mining results is accessed by decesion maker
No comments:
Post a Comment