It runs on a individual machine, as well as the distributed processing frameworks Apache Hadoop, Apache spark, and Apache Flink. It has gained much popularity and attention recently as the algorithm of choice for many winning teams of machine learning contests. It can also be integrated into data Flow frameworks like Apache spark, Apache Hadoop, and Apache Flink use the abstracted Rabit and XGBoost4J. XGBoost is also available on OpenCL for FPGAs. Soon after, the Python and R packages were build, and XGBoost now has package implementations for Java, Scala, Julia, Perl, and other languages. XGBoost initially started as a research project by Tianqi Chen as part of the distribute (Deep) machine teach Community (DMLC) group.
library(tidyverse) library(xgboost) ind<-sample(2,nrow(diamonds),replace = T,prob = c(0.7,0.3)) train.set<-diamonds[ind==1,] test.set<-diamonds[ind==2,] xgb.train<-bind_cols(select_if(train.set,is.numeric),model.matrix(~cut-1,train.set) %>% as.tibble(),model.matrix(~color-1,train.set) %>% as.tibble(),model.matrix(~clarity-1,train.set) %>% as.tibble()) xgboost.train<-xgb.DMatrix(data = as.matrix(select(xgb.train,-price)),label=xgb.train$price) xgb.test<-bind_cols(select_if(test.set,is.numeric),model.matrix(~cut-1,test.set) %>% as.tibble(),model.matrix(~color-1,test.set) %>% as.tibble(),model.matrix(~clarity-1,test.set) %>% as.tibble()) xgboost.test<-xgb.DMatrix(data = select(xgb.test,-price) %>% as.matrix(),label=xgb.test$price) param<-list(eval_metric='rmse',gamma=1,max_depth=6,nthread = 3) xg.model<-xgb.train(data = xgboost.train,params = param,watchlist = list(test=xgboost.test),nrounds = 500,early_stopping_rounds = 60, print_every_n = 30) xg.predict<-predict(xg.model,xgboost.test) mse.xgb<-sqrt(mean((test.set$price-xg.predict)^2)) plot((test.set$price-xg.predict))