Friday, 25 March 2016











1 : installl r java package in r studio then rhdfs package

        library("rJava", lib.loc="~/R/x86_64-pc-linux-gnu-library/3.2")

2 : before installing it set hadoop path to studio and start the hadoop by typing 

        start-all.sh
        Sys.setenv(HADOOP_CMD="/usr/local/hadoop/bin/hadoop")
        Sys.setenv(HADOOP_HOME="/usr/local/hadoop") 
        library("rhdfs", lib.loc="~/R/x86_64-pc-linux-gnu-library/3.2")








3  : then import the data by using the command

      reder=hdfs.line.reader("/data/diabetes1")
      diabetes = reder$read()
      typeof(diabetes)
      diabetes





4 : now implement the decesion tree by using rpart package

      names(diabetes) <- gsub("\\.","",names(diabetes))
     str(diabetes)
      attributes(diabetes)
      library(rpart)


      set.seed(564)
      flags = sample(2,nrow(diabetes), replace = TRUE, prob =c(0.7,0.3))
      trainset = diabetes[which(flags==1),]
      testset = diabetes[which(flags==2),]
      str(trainset)
      str(testset)


      index = sample(1:nrow(diabetes), nrow(diabetes)*0.7, replace=FALSE)
      trainset = diabetes[index,]
      testset = diabetes[-index,]
     str(trainset)
     str(testset)


    ?rpart
    dtree = rpart(Classvariable ~ Number_of_times_pregnant
              +Plasma_glucose_concentration
              +Diastolic_blood_pressure
              +Triceps_skin_fold_thickness
              +Hour_serum_insulin
              +Body_mass_index
              +Diabetes_pedigree_function
              +Ageyears ,
              data=trainset,
              control=rpart.control(minsplit = 10))
    str(dtree)
    dtree
   plot(dtree)
   text(dtree)









5 : now the mining results is accessed by decesion maker


 

No comments:

Post a Comment