Library Includes

library(tidyverse)      # General utility packages
library(party)          # Random Forest conditional inference tree utilization
library(missForest)     # Imputation (for both factor and numeric)
library(cowplot)        # Plotting ggplot's side by side
library(corrplot)       # Visual correlation plotting
library(fastDummies)    # Used for dummy coding
library(caret)          # Used for k-fold cross validation
library(ggthemes)       # Provides extra themes for styling ggplots.

Data Loading

train <- read_csv("data/train.csv") # Returns tibble
test <- read_csv("data/test.csv")   # Returns tibble
test$Survived <- NA
combi <- rbind(train, test)

Utility Functions

createSubmission <- function(submissionName = "submit.csv", prediction) {
  submit <- data.frame(PassengerId = test$PassengerId, Survived = prediction)
  write_csv(submit, submissionName)
}

Feature Eng. (Categorical / Factors)

# Creating a new Title feature by parsing it using a regular expression.
combi$Title <- sapply(combi$Name, FUN = function(x) { strsplit(x, split = "[,.]")[[1]][2]})
combi$Title <- sub(' ', '', combi$Title)

# Combining some rare Title values to simplify our factor counts.
combi$Title[combi$Title %in% c("Mme", "Mlle")] <- "Mlle"
combi$Title[combi$Title %in% c("Capt", "Don", "Major", "Sir")] <- "Sir"
combi$Title[combi$Title %in% c("Dona", "Lady", "the Countess", "Jonkheer")] <- "Lady"
combi$Title <- factor(combi$Title)

# Creating a new Surname feature by parsing it using a regular expression.
combi$Surname <- sapply(combi$Name, FUN = function(x) { strsplit(x, split = "[,.]")[[1]][1]})

# Creating a new FamilySize feature including self, parents, siblings, spouse, and children.
combi$FamilySize <- combi$SibSp + combi$Parch + 1

# Creating simple flag indicating whether person has any family on board.
combi$FamilyOnBoard <- ifelse(combi$SibSp + combi$Parch > 0, 1, 0)

# Creating a new CabinLetter feature by parsing it using a regular expression.
combi$CabinLetter <- sapply(combi$Cabin, 
                            FUN = function(x) { ifelse(is.na(x), NA_character_, substr(x, 1, 1)) })
combi$CabinLetter <- factor(combi$CabinLetter)

# Keeping track of whether an observation had a Cabin noted at all
combi$HasCabin <- ifelse(is.na(combi$Cabin), 0, 1)

# Creating socioeconomic feature
combi$SocioEconomic <- ifelse(
  is.na(combi$CabinLetter), 
  paste(combi$Pclass, "X", sep=""),
  paste(combi$Pclass, combi$CabinLetter, sep=""))

Feature Eng. (Continuous)

Normalization (Standardization)

Binning (Discretization)

# Binning Age into 3 factor bins (so that future imputation picks a factor before conversion).
combi$Age <- sapply(combi$Age, 
                    FUN = function(x) { 
                      if (is.na(x)) NA
                      else if (x < 18) "Child"
                      else if (x >= 18 & x < 50) "Adult"
                      else "Elder"
                    })
ggplot(combi) +
  aes(x = Age) +
  geom_bar(width = 0.2,
           stat = "count") +
  theme_clean(base_size = 10) +
  labs(x = "Age", y = "Frequency")

Data Imputation & Cleanup

# In order to leverage missForest for imputation, it only supports consuming / predicting factor 
# and numeric data types so all applicable passed-in features should be in one of those two formats.
combi$Age <- as.factor(combi$Age)
combi$Sex <- as.factor(combi$Sex)
combi$Embarked <- as.factor(combi$Embarked)
combi$SocioEconomic <- as.factor(combi$SocioEconomic)
glimpse(combi, width = 105)
Rows: 1,309
Columns: 19
$ PassengerId   <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 2…
$ Survived      <dbl> 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0,…
$ Pclass        <dbl> 3, 1, 3, 1, 3, 3, 1, 3, 3, 2, 3, 1, 3, 3, 3, 2, 3, 2, 3, 3, 2, 2, 3, 1, 3, 3, 3,…
$ Name          <chr> "Braund, Mr. Owen Harris", "Cumings, Mrs. John Bradley (Florence Briggs Thayer)"…
$ Sex           <fct> male, female, female, female, male, male, male, male, female, female, female, fe…
$ Age           <fct> Adult, Adult, Adult, Adult, Adult, NA, Elder, Child, Adult, Child, Child, Elder,…
$ SibSp         <dbl> 1, 1, 0, 1, 0, 0, 0, 3, 0, 1, 1, 0, 0, 1, 0, 0, 4, 0, 1, 0, 0, 0, 0, 0, 3, 1, 0,…
$ Parch         <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 1, 0, 0, 5, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 5, 0,…
$ Ticket        <chr> "A/5 21171", "PC 17599", "STON/O2. 3101282", "113803", "373450", "330877", "1746…
$ Fare          <dbl> 7.2500, 71.2833, 7.9250, 53.1000, 8.0500, 8.4583, 51.8625, 21.0750, 11.1333, 30.…
$ Cabin         <chr> NA, "C85", NA, "C123", NA, NA, "E46", NA, NA, NA, "G6", "C103", NA, NA, NA, NA, …
$ Embarked      <fct> S, C, S, S, S, Q, S, S, S, C, S, S, S, S, S, S, Q, S, S, C, S, S, Q, S, S, S, C,…
$ Title         <fct> Mr, Mrs, Miss, Mrs, Mr, Mr, Mr, Master, Mrs, Mrs, Miss, Miss, Mr, Mr, Miss, Mrs,…
$ Surname       <chr> "Braund", "Cumings", "Heikkinen", "Futrelle", "Allen", "Moran", "McCarthy", "Pal…
$ FamilySize    <dbl> 2, 2, 1, 2, 1, 1, 1, 5, 3, 2, 3, 1, 1, 7, 1, 1, 6, 1, 2, 1, 1, 1, 1, 1, 5, 7, 1,…
$ FamilyOnBoard <dbl> 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,…
$ CabinLetter   <fct> NA, C, NA, C, NA, NA, E, NA, NA, NA, G, C, NA, NA, NA, NA, NA, NA, NA, NA, NA, D…
$ HasCabin      <dbl> 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,…
$ SocioEconomic <fct> 3X, 1C, 3X, 1C, 3X, 3X, 1E, 3X, 3X, 2X, 3G, 1C, 3X, 3X, 3X, 2X, 3X, 2X, 3X, 3X, …
# Examining how many NA's and blanks our dataset has.
sapply(combi, function(x) sum(is.na(x) | x == "")) %>%
  as.data.frame()
                 .
PassengerId      0
Survived       418
Pclass           0
Name             0
Sex              0
Age            263
SibSp            0
Parch            0
Ticket           0
Fare             1
Cabin         1014
Embarked         2
Title            0
Surname          0
FamilySize       0
FamilyOnBoard    0
CabinLetter   1014
HasCabin         0
SocioEconomic    0
# Imputation Step: excluding irrelevant / unsupported data type features, plus casting to dataframe 
# for missForest.
set.seed(420)
combi.imp <- combi %>%
  select(-c("Survived", "Name", "Ticket", "Surname", "Cabin", "CabinLetter")) %>%
  as.data.frame() %>%
  missForest()
  missForest iteration 1 in progress...done!
  missForest iteration 2 in progress...done!
  missForest iteration 3 in progress...done!
  missForest iteration 4 in progress...done!
# Observing results + error rates for imputation (~4.72% for numeric, and ~9.82% for factors).
combi.imp$OOBerror
     NRMSE        PFC 
0.04723969 0.09826029 
# Merging imputed features back into "combi".
combi$Age <- combi.imp$ximp$Age
combi$Fare <- combi.imp$ximp$Fare
combi$Embarked <- combi.imp$ximp$Embarked
# Examining how many NA's and blanks our dataset has.
sapply(combi, function(x) sum(is.na(x) | x == "")) %>%
  as.data.frame()
                 .
PassengerId      0
Survived       418
Pclass           0
Name             0
Sex              0
Age              0
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin         1014
Embarked         0
Title            0
Surname          0
FamilySize       0
FamilyOnBoard    0
CabinLetter   1014
HasCabin         0
SocioEconomic    0
ggplot(combi) +
  aes(x = Age) +
  geom_bar(width = 0.2,
           stat = "count") +
  theme_clean(base_size = 10) +
  labs(x = "Age", y = "Frequency")

Dimensional Reduction

Feature Selection

Feature Extraction

Dummy Coding

# All features selected for dummification must be character or factor columns. Also decided to remove 
# said selected columns after they have been dummified to conserve space and boost future correlation
# performance.
combi <- dummy_cols(combi, 
                    select_columns = c("Age", "Sex", "Embarked", "Title", "SocioEconomic"),
                    remove_selected_columns = TRUE)
glimpse(combi, width = 105)
Rows: 1,309
Columns: 48
$ PassengerId      <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22…
$ Survived         <dbl> 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1,…
$ Pclass           <dbl> 3, 1, 3, 1, 3, 3, 1, 3, 3, 2, 3, 1, 3, 3, 3, 2, 3, 2, 3, 3, 2, 2, 3, 1, 3, 3,…
$ Name             <chr> "Braund, Mr. Owen Harris", "Cumings, Mrs. John Bradley (Florence Briggs Thaye…
$ SibSp            <dbl> 1, 1, 0, 1, 0, 0, 0, 3, 0, 1, 1, 0, 0, 1, 0, 0, 4, 0, 1, 0, 0, 0, 0, 0, 3, 1,…
$ Parch            <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 1, 0, 0, 5, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 5,…
$ Ticket           <chr> "A/5 21171", "PC 17599", "STON/O2. 3101282", "113803", "373450", "330877", "1…
$ Fare             <dbl> 7.2500, 71.2833, 7.9250, 53.1000, 8.0500, 8.4583, 51.8625, 21.0750, 11.1333, …
$ Cabin            <chr> NA, "C85", NA, "C123", NA, NA, "E46", NA, NA, NA, "G6", "C103", NA, NA, NA, N…
$ Surname          <chr> "Braund", "Cumings", "Heikkinen", "Futrelle", "Allen", "Moran", "McCarthy", "…
$ FamilySize       <dbl> 2, 2, 1, 2, 1, 1, 1, 5, 3, 2, 3, 1, 1, 7, 1, 1, 6, 1, 2, 1, 1, 1, 1, 1, 5, 7,…
$ FamilyOnBoard    <dbl> 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1,…
$ CabinLetter      <fct> NA, C, NA, C, NA, NA, E, NA, NA, NA, G, C, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ HasCabin         <dbl> 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,…
$ Age_Adult        <int> 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1,…
$ Age_Child        <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0,…
$ Age_Elder        <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ Sex_female       <int> 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,…
$ Sex_male         <int> 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0,…
$ Embarked_C       <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,…
$ Embarked_Q       <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,…
$ Embarked_S       <int> 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1,…
$ Title_Col        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ Title_Dr         <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ Title_Lady       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ Title_Master     <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ Title_Miss       <int> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,…
$ Title_Mlle       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ Title_Mr         <int> 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0,…
$ Title_Mrs        <int> 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,…
$ Title_Ms         <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ Title_Rev        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ Title_Sir        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ SocioEconomic_1A <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,…
$ SocioEconomic_1B <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ SocioEconomic_1C <int> 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ SocioEconomic_1D <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ SocioEconomic_1E <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ SocioEconomic_1T <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ SocioEconomic_1X <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ SocioEconomic_2D <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,…
$ SocioEconomic_2E <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ SocioEconomic_2F <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ SocioEconomic_2X <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,…
$ SocioEconomic_3E <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ SocioEconomic_3F <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ SocioEconomic_3G <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ SocioEconomic_3X <int> 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,…

Data Splitting & Sampling

train <- combi[1:891,]
test <- combi[892:1309,]

Feature Correlation Analysis

# Examine correlations between dependent Survived feature and independent input features. Excluding
# specific features.
trainCor <- cor(
  train %>%
    select(-PassengerId, -Name, -Ticket, -Cabin, -Surname, -CabinLetter)
)
corrplot(trainCor, type = "upper")

# Examining frequency distribution for all dummy coding values to get a sense of which features
# have the widest usage.
trainDummies <- train[sapply(train, is.numeric)] %>%
  colSums(na.rm = TRUE) %>%
  t() %>%
  data.frame() %>%
  select(contains("_")) %>%
  t() %>%
  data.frame()
# Moving the row names into a new column, and updating the row names to simply be numbers.
trainDummies <- cbind(DummyCode = rownames(trainDummies), trainDummies)
rownames(trainDummies) <- 1:nrow(trainDummies)
# Plotting the dummy-coded features from most to least used.
ggplot(trainDummies) +
  aes(x = reorder(DummyCode,.), weight = .) +
  geom_bar() +
  coord_flip() +
  theme_clean(base_size = 10) +
  labs(x = "Dummy Code", y = "Frequency")

Model Creation & Tuning

#Defining our repeated k-fold cross validation to split into c chunks, and cycle process t times.
train_control <- trainControl(method = "repeatedcv", number = 5, repeats = 3)
survivalModel <- train(as.factor(Survived) ~ 
                         FamilyOnBoard + 
                         Title_Miss + 
                         Sex_female + 
                         Title_Mrs + 
                         Fare + 
                         HasCabin + 
                         Embarked_C +
                         Title_Mr + 
                         Sex_male + 
                         Age_Child +
                         Pclass + 
                         Embarked_S + 
                         Age_Adult +
                         Title_Master +
                         SocioEconomic_3X,
                       data = train,
                       method = "cforest",
                       trControl = train_control,
                       controls = party::cforest_unbiased(ntree = 1000))

Model Scoring & Prediction

# Examining the model scoring after cross validation (uses portions of the train set as validation).
survivalModel
Conditional Inference Random Forest 

891 samples
 15 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (5 fold, repeated 3 times) 
Summary of sample sizes: 713, 713, 713, 713, 712, 713, ... 
Resampling results across tuning parameters:

  mtry  Accuracy   Kappa    
   2    0.8125820  0.5919011
   8    0.8200433  0.6046258
  15    0.8207945  0.6070504

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was mtry = 15.
# Make predictions based on resulting trained model.
predictions <- predict(survivalModel, newdata = test, OOB = TRUE, type = "raw")

# Creating submission.
createSubmission("submit.csv", predictions)
LS0tCnRpdGxlOiAiVGl0YW5pYyBTdXJ2aXZvciBBbmFseXNpcyIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyMgTGlicmFyeSBJbmNsdWRlcwoKYGBge3IgbWVzc2FnZT1GQUxTRX0KbGlicmFyeSh0aWR5dmVyc2UpICAgICAgIyBHZW5lcmFsIHV0aWxpdHkgcGFja2FnZXMKbGlicmFyeShwYXJ0eSkgICAgICAgICAgIyBSYW5kb20gRm9yZXN0IGNvbmRpdGlvbmFsIGluZmVyZW5jZSB0cmVlIHV0aWxpemF0aW9uCmxpYnJhcnkobWlzc0ZvcmVzdCkgICAgICMgSW1wdXRhdGlvbiAoZm9yIGJvdGggZmFjdG9yIGFuZCBudW1lcmljKQpsaWJyYXJ5KGNvd3Bsb3QpICAgICAgICAjIFBsb3R0aW5nIGdncGxvdCdzIHNpZGUgYnkgc2lkZQpsaWJyYXJ5KGNvcnJwbG90KSAgICAgICAjIFZpc3VhbCBjb3JyZWxhdGlvbiBwbG90dGluZwpsaWJyYXJ5KGZhc3REdW1taWVzKSAgICAjIFVzZWQgZm9yIGR1bW15IGNvZGluZwpsaWJyYXJ5KGNhcmV0KSAgICAgICAgICAjIFVzZWQgZm9yIGstZm9sZCBjcm9zcyB2YWxpZGF0aW9uCmxpYnJhcnkoZ2d0aGVtZXMpICAgICAgICMgUHJvdmlkZXMgZXh0cmEgdGhlbWVzIGZvciBzdHlsaW5nIGdncGxvdHMuCmBgYAoKIyMgRGF0YSBMb2FkaW5nCgpgYGB7ciBtZXNzYWdlPUZBTFNFfQp0cmFpbiA8LSByZWFkX2NzdigiZGF0YS90cmFpbi5jc3YiKSAjIFJldHVybnMgdGliYmxlCnRlc3QgPC0gcmVhZF9jc3YoImRhdGEvdGVzdC5jc3YiKSAgICMgUmV0dXJucyB0aWJibGUKdGVzdCRTdXJ2aXZlZCA8LSBOQQpjb21iaSA8LSByYmluZCh0cmFpbiwgdGVzdCkKYGBgCgojIyBVdGlsaXR5IEZ1bmN0aW9ucwoKYGBge3J9CmNyZWF0ZVN1Ym1pc3Npb24gPC0gZnVuY3Rpb24oc3VibWlzc2lvbk5hbWUgPSAic3VibWl0LmNzdiIsIHByZWRpY3Rpb24pIHsKICBzdWJtaXQgPC0gZGF0YS5mcmFtZShQYXNzZW5nZXJJZCA9IHRlc3QkUGFzc2VuZ2VySWQsIFN1cnZpdmVkID0gcHJlZGljdGlvbikKICB3cml0ZV9jc3Yoc3VibWl0LCBzdWJtaXNzaW9uTmFtZSkKfQpgYGAKCiMjIEZlYXR1cmUgRW5nLiAoQ2F0ZWdvcmljYWwgLyBGYWN0b3JzKQoKYGBge3J9CiMgQ3JlYXRpbmcgYSBuZXcgVGl0bGUgZmVhdHVyZSBieSBwYXJzaW5nIGl0IHVzaW5nIGEgcmVndWxhciBleHByZXNzaW9uLgpjb21iaSRUaXRsZSA8LSBzYXBwbHkoY29tYmkkTmFtZSwgRlVOID0gZnVuY3Rpb24oeCkgeyBzdHJzcGxpdCh4LCBzcGxpdCA9ICJbLC5dIilbWzFdXVsyXX0pCmNvbWJpJFRpdGxlIDwtIHN1YignICcsICcnLCBjb21iaSRUaXRsZSkKCiMgQ29tYmluaW5nIHNvbWUgcmFyZSBUaXRsZSB2YWx1ZXMgdG8gc2ltcGxpZnkgb3VyIGZhY3RvciBjb3VudHMuCmNvbWJpJFRpdGxlW2NvbWJpJFRpdGxlICVpbiUgYygiTW1lIiwgIk1sbGUiKV0gPC0gIk1sbGUiCmNvbWJpJFRpdGxlW2NvbWJpJFRpdGxlICVpbiUgYygiQ2FwdCIsICJEb24iLCAiTWFqb3IiLCAiU2lyIildIDwtICJTaXIiCmNvbWJpJFRpdGxlW2NvbWJpJFRpdGxlICVpbiUgYygiRG9uYSIsICJMYWR5IiwgInRoZSBDb3VudGVzcyIsICJKb25raGVlciIpXSA8LSAiTGFkeSIKY29tYmkkVGl0bGUgPC0gZmFjdG9yKGNvbWJpJFRpdGxlKQoKIyBDcmVhdGluZyBhIG5ldyBTdXJuYW1lIGZlYXR1cmUgYnkgcGFyc2luZyBpdCB1c2luZyBhIHJlZ3VsYXIgZXhwcmVzc2lvbi4KY29tYmkkU3VybmFtZSA8LSBzYXBwbHkoY29tYmkkTmFtZSwgRlVOID0gZnVuY3Rpb24oeCkgeyBzdHJzcGxpdCh4LCBzcGxpdCA9ICJbLC5dIilbWzFdXVsxXX0pCgojIENyZWF0aW5nIGEgbmV3IEZhbWlseVNpemUgZmVhdHVyZSBpbmNsdWRpbmcgc2VsZiwgcGFyZW50cywgc2libGluZ3MsIHNwb3VzZSwgYW5kIGNoaWxkcmVuLgpjb21iaSRGYW1pbHlTaXplIDwtIGNvbWJpJFNpYlNwICsgY29tYmkkUGFyY2ggKyAxCgojIENyZWF0aW5nIHNpbXBsZSBmbGFnIGluZGljYXRpbmcgd2hldGhlciBwZXJzb24gaGFzIGFueSBmYW1pbHkgb24gYm9hcmQuCmNvbWJpJEZhbWlseU9uQm9hcmQgPC0gaWZlbHNlKGNvbWJpJFNpYlNwICsgY29tYmkkUGFyY2ggPiAwLCAxLCAwKQoKIyBDcmVhdGluZyBhIG5ldyBDYWJpbkxldHRlciBmZWF0dXJlIGJ5IHBhcnNpbmcgaXQgdXNpbmcgYSByZWd1bGFyIGV4cHJlc3Npb24uCmNvbWJpJENhYmluTGV0dGVyIDwtIHNhcHBseShjb21iaSRDYWJpbiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBGVU4gPSBmdW5jdGlvbih4KSB7IGlmZWxzZShpcy5uYSh4KSwgTkFfY2hhcmFjdGVyXywgc3Vic3RyKHgsIDEsIDEpKSB9KQpjb21iaSRDYWJpbkxldHRlciA8LSBmYWN0b3IoY29tYmkkQ2FiaW5MZXR0ZXIpCgojIEtlZXBpbmcgdHJhY2sgb2Ygd2hldGhlciBhbiBvYnNlcnZhdGlvbiBoYWQgYSBDYWJpbiBub3RlZCBhdCBhbGwKY29tYmkkSGFzQ2FiaW4gPC0gaWZlbHNlKGlzLm5hKGNvbWJpJENhYmluKSwgMCwgMSkKCiMgQ3JlYXRpbmcgc29jaW9lY29ub21pYyBmZWF0dXJlCmNvbWJpJFNvY2lvRWNvbm9taWMgPC0gaWZlbHNlKAogIGlzLm5hKGNvbWJpJENhYmluTGV0dGVyKSwgCiAgcGFzdGUoY29tYmkkUGNsYXNzLCAiWCIsIHNlcD0iIiksCiAgcGFzdGUoY29tYmkkUGNsYXNzLCBjb21iaSRDYWJpbkxldHRlciwgc2VwPSIiKSkKYGBgCgojIyBGZWF0dXJlIEVuZy4gKENvbnRpbnVvdXMpCgojIyMjIE5vcm1hbGl6YXRpb24gKFN0YW5kYXJkaXphdGlvbikKCiMjIyMgQmlubmluZyAoRGlzY3JldGl6YXRpb24pCgpgYGB7cn0KIyBCaW5uaW5nIEFnZSBpbnRvIDMgZmFjdG9yIGJpbnMgKHNvIHRoYXQgZnV0dXJlIGltcHV0YXRpb24gcGlja3MgYSBmYWN0b3IgYmVmb3JlIGNvbnZlcnNpb24pLgpjb21iaSRBZ2UgPC0gc2FwcGx5KGNvbWJpJEFnZSwgCiAgICAgICAgICAgICAgICAgICAgRlVOID0gZnVuY3Rpb24oeCkgeyAKICAgICAgICAgICAgICAgICAgICAgIGlmIChpcy5uYSh4KSkgTkEKICAgICAgICAgICAgICAgICAgICAgIGVsc2UgaWYgKHggPCAxOCkgIkNoaWxkIgogICAgICAgICAgICAgICAgICAgICAgZWxzZSBpZiAoeCA+PSAxOCAmIHggPCA1MCkgIkFkdWx0IgogICAgICAgICAgICAgICAgICAgICAgZWxzZSAiRWxkZXIiCiAgICAgICAgICAgICAgICAgICAgfSkKYGBgCgpgYGB7cn0KZ2dwbG90KGNvbWJpKSArCiAgYWVzKHggPSBBZ2UpICsKICBnZW9tX2Jhcih3aWR0aCA9IDAuMiwKICAgICAgICAgICBzdGF0ID0gImNvdW50IikgKwogIHRoZW1lX2NsZWFuKGJhc2Vfc2l6ZSA9IDEwKSArCiAgbGFicyh4ID0gIkFnZSIsIHkgPSAiRnJlcXVlbmN5IikKYGBgCgojIyBEYXRhIEltcHV0YXRpb24gJiBDbGVhbnVwCgpgYGB7cn0KIyBJbiBvcmRlciB0byBsZXZlcmFnZSBtaXNzRm9yZXN0IGZvciBpbXB1dGF0aW9uLCBpdCBvbmx5IHN1cHBvcnRzIGNvbnN1bWluZyAvIHByZWRpY3RpbmcgZmFjdG9yIAojIGFuZCBudW1lcmljIGRhdGEgdHlwZXMgc28gYWxsIGFwcGxpY2FibGUgcGFzc2VkLWluIGZlYXR1cmVzIHNob3VsZCBiZSBpbiBvbmUgb2YgdGhvc2UgdHdvIGZvcm1hdHMuCmNvbWJpJEFnZSA8LSBhcy5mYWN0b3IoY29tYmkkQWdlKQpjb21iaSRTZXggPC0gYXMuZmFjdG9yKGNvbWJpJFNleCkKY29tYmkkRW1iYXJrZWQgPC0gYXMuZmFjdG9yKGNvbWJpJEVtYmFya2VkKQpjb21iaSRTb2Npb0Vjb25vbWljIDwtIGFzLmZhY3Rvcihjb21iaSRTb2Npb0Vjb25vbWljKQpnbGltcHNlKGNvbWJpLCB3aWR0aCA9IDEwNSkKYGBgCgpgYGB7ciBwYWdlZC5wcmludD1GQUxTRX0KIyBFeGFtaW5pbmcgaG93IG1hbnkgTkEncyBhbmQgYmxhbmtzIG91ciBkYXRhc2V0IGhhcy4Kc2FwcGx5KGNvbWJpLCBmdW5jdGlvbih4KSBzdW0oaXMubmEoeCkgfCB4ID09ICIiKSkgJT4lCiAgYXMuZGF0YS5mcmFtZSgpCmBgYAoKYGBge3J9CiMgSW1wdXRhdGlvbiBTdGVwOiBleGNsdWRpbmcgaXJyZWxldmFudCAvIHVuc3VwcG9ydGVkIGRhdGEgdHlwZSBmZWF0dXJlcywgcGx1cyBjYXN0aW5nIHRvIGRhdGFmcmFtZSAKIyBmb3IgbWlzc0ZvcmVzdC4Kc2V0LnNlZWQoNDIwKQpjb21iaS5pbXAgPC0gY29tYmkgJT4lCiAgc2VsZWN0KC1jKCJTdXJ2aXZlZCIsICJOYW1lIiwgIlRpY2tldCIsICJTdXJuYW1lIiwgIkNhYmluIiwgIkNhYmluTGV0dGVyIikpICU+JQogIGFzLmRhdGEuZnJhbWUoKSAlPiUKICBtaXNzRm9yZXN0KCkKYGBgCgpgYGB7cn0KIyBPYnNlcnZpbmcgcmVzdWx0cyArIGVycm9yIHJhdGVzIGZvciBpbXB1dGF0aW9uICh+NC43MiUgZm9yIG51bWVyaWMsIGFuZCB+OS44MiUgZm9yIGZhY3RvcnMpLgpjb21iaS5pbXAkT09CZXJyb3IKYGBgCgpgYGB7cn0KIyBNZXJnaW5nIGltcHV0ZWQgZmVhdHVyZXMgYmFjayBpbnRvICJjb21iaSIuCmNvbWJpJEFnZSA8LSBjb21iaS5pbXAkeGltcCRBZ2UKY29tYmkkRmFyZSA8LSBjb21iaS5pbXAkeGltcCRGYXJlCmNvbWJpJEVtYmFya2VkIDwtIGNvbWJpLmltcCR4aW1wJEVtYmFya2VkCmBgYAoKYGBge3IgcGFnZWQucHJpbnQ9RkFMU0V9CiMgRXhhbWluaW5nIGhvdyBtYW55IE5BJ3MgYW5kIGJsYW5rcyBvdXIgZGF0YXNldCBoYXMuCnNhcHBseShjb21iaSwgZnVuY3Rpb24oeCkgc3VtKGlzLm5hKHgpIHwgeCA9PSAiIikpICU+JQogIGFzLmRhdGEuZnJhbWUoKQpgYGAKCmBgYHtyfQpnZ3Bsb3QoY29tYmkpICsKICBhZXMoeCA9IEFnZSkgKwogIGdlb21fYmFyKHdpZHRoID0gMC4yLAogICAgICAgICAgIHN0YXQgPSAiY291bnQiKSArCiAgdGhlbWVfY2xlYW4oYmFzZV9zaXplID0gMTApICsKICBsYWJzKHggPSAiQWdlIiwgeSA9ICJGcmVxdWVuY3kiKQpgYGAKCiMjIERpbWVuc2lvbmFsIFJlZHVjdGlvbgoKIyMjIyBGZWF0dXJlIFNlbGVjdGlvbgoKIyMjIyBGZWF0dXJlIEV4dHJhY3Rpb24KCiMjIER1bW15IENvZGluZwoKYGBge3J9CiMgQWxsIGZlYXR1cmVzIHNlbGVjdGVkIGZvciBkdW1taWZpY2F0aW9uIG11c3QgYmUgY2hhcmFjdGVyIG9yIGZhY3RvciBjb2x1bW5zLiBBbHNvIGRlY2lkZWQgdG8gcmVtb3ZlIAojIHNhaWQgc2VsZWN0ZWQgY29sdW1ucyBhZnRlciB0aGV5IGhhdmUgYmVlbiBkdW1taWZpZWQgdG8gY29uc2VydmUgc3BhY2UgYW5kIGJvb3N0IGZ1dHVyZSBjb3JyZWxhdGlvbgojIHBlcmZvcm1hbmNlLgpjb21iaSA8LSBkdW1teV9jb2xzKGNvbWJpLCAKICAgICAgICAgICAgICAgICAgICBzZWxlY3RfY29sdW1ucyA9IGMoIkFnZSIsICJTZXgiLCAiRW1iYXJrZWQiLCAiVGl0bGUiLCAiU29jaW9FY29ub21pYyIpLAogICAgICAgICAgICAgICAgICAgIHJlbW92ZV9zZWxlY3RlZF9jb2x1bW5zID0gVFJVRSkKYGBgCgpgYGB7cn0KZ2xpbXBzZShjb21iaSwgd2lkdGggPSAxMDUpCmBgYAoKIyMgRGF0YSBTcGxpdHRpbmcgJiBTYW1wbGluZwoKYGBge3J9CnRyYWluIDwtIGNvbWJpWzE6ODkxLF0KdGVzdCA8LSBjb21iaVs4OTI6MTMwOSxdCmBgYAoKIyMgRmVhdHVyZSBDb3JyZWxhdGlvbiBBbmFseXNpcwoKYGBge3IgZmlnLmhlaWdodD03LCBmaWcud2lkdGg9N30KIyBFeGFtaW5lIGNvcnJlbGF0aW9ucyBiZXR3ZWVuIGRlcGVuZGVudCBTdXJ2aXZlZCBmZWF0dXJlIGFuZCBpbmRlcGVuZGVudCBpbnB1dCBmZWF0dXJlcy4gRXhjbHVkaW5nCiMgc3BlY2lmaWMgZmVhdHVyZXMuCnRyYWluQ29yIDwtIGNvcigKICB0cmFpbiAlPiUKICAgIHNlbGVjdCgtUGFzc2VuZ2VySWQsIC1OYW1lLCAtVGlja2V0LCAtQ2FiaW4sIC1TdXJuYW1lLCAtQ2FiaW5MZXR0ZXIpCikKY29ycnBsb3QodHJhaW5Db3IsIHR5cGUgPSAidXBwZXIiKQpgYGAKCmBgYHtyfQojIEV4YW1pbmluZyBmcmVxdWVuY3kgZGlzdHJpYnV0aW9uIGZvciBhbGwgZHVtbXkgY29kaW5nIHZhbHVlcyB0byBnZXQgYSBzZW5zZSBvZiB3aGljaCBmZWF0dXJlcwojIGhhdmUgdGhlIHdpZGVzdCB1c2FnZS4KdHJhaW5EdW1taWVzIDwtIHRyYWluW3NhcHBseSh0cmFpbiwgaXMubnVtZXJpYyldICU+JQogIGNvbFN1bXMobmEucm0gPSBUUlVFKSAlPiUKICB0KCkgJT4lCiAgZGF0YS5mcmFtZSgpICU+JQogIHNlbGVjdChjb250YWlucygiXyIpKSAlPiUKICB0KCkgJT4lCiAgZGF0YS5mcmFtZSgpCmBgYAoKYGBge3J9CiMgTW92aW5nIHRoZSByb3cgbmFtZXMgaW50byBhIG5ldyBjb2x1bW4sIGFuZCB1cGRhdGluZyB0aGUgcm93IG5hbWVzIHRvIHNpbXBseSBiZSBudW1iZXJzLgp0cmFpbkR1bW1pZXMgPC0gY2JpbmQoRHVtbXlDb2RlID0gcm93bmFtZXModHJhaW5EdW1taWVzKSwgdHJhaW5EdW1taWVzKQpyb3duYW1lcyh0cmFpbkR1bW1pZXMpIDwtIDE6bnJvdyh0cmFpbkR1bW1pZXMpCmBgYAoKYGBge3J9CiMgUGxvdHRpbmcgdGhlIGR1bW15LWNvZGVkIGZlYXR1cmVzIGZyb20gbW9zdCB0byBsZWFzdCB1c2VkLgpnZ3Bsb3QodHJhaW5EdW1taWVzKSArCiAgYWVzKHggPSByZW9yZGVyKER1bW15Q29kZSwuKSwgd2VpZ2h0ID0gLikgKwogIGdlb21fYmFyKCkgKwogIGNvb3JkX2ZsaXAoKSArCiAgdGhlbWVfY2xlYW4oYmFzZV9zaXplID0gMTApICsKICBsYWJzKHggPSAiRHVtbXkgQ29kZSIsIHkgPSAiRnJlcXVlbmN5IikKYGBgCgojIyBNb2RlbCBDcmVhdGlvbiAmIFR1bmluZwoKYGBge3J9CiNEZWZpbmluZyBvdXIgcmVwZWF0ZWQgay1mb2xkIGNyb3NzIHZhbGlkYXRpb24gdG8gc3BsaXQgaW50byBjIGNodW5rcywgYW5kIGN5Y2xlIHByb2Nlc3MgdCB0aW1lcy4KdHJhaW5fY29udHJvbCA8LSB0cmFpbkNvbnRyb2wobWV0aG9kID0gInJlcGVhdGVkY3YiLCBudW1iZXIgPSA1LCByZXBlYXRzID0gMykKc3Vydml2YWxNb2RlbCA8LSB0cmFpbihhcy5mYWN0b3IoU3Vydml2ZWQpIH4gCiAgICAgICAgICAgICAgICAgICAgICAgICBGYW1pbHlPbkJvYXJkICsgCiAgICAgICAgICAgICAgICAgICAgICAgICBUaXRsZV9NaXNzICsgCiAgICAgICAgICAgICAgICAgICAgICAgICBTZXhfZmVtYWxlICsgCiAgICAgICAgICAgICAgICAgICAgICAgICBUaXRsZV9NcnMgKyAKICAgICAgICAgICAgICAgICAgICAgICAgIEZhcmUgKyAKICAgICAgICAgICAgICAgICAgICAgICAgIEhhc0NhYmluICsgCiAgICAgICAgICAgICAgICAgICAgICAgICBFbWJhcmtlZF9DICsKICAgICAgICAgICAgICAgICAgICAgICAgIFRpdGxlX01yICsgCiAgICAgICAgICAgICAgICAgICAgICAgICBTZXhfbWFsZSArIAogICAgICAgICAgICAgICAgICAgICAgICAgQWdlX0NoaWxkICsKICAgICAgICAgICAgICAgICAgICAgICAgIFBjbGFzcyArIAogICAgICAgICAgICAgICAgICAgICAgICAgRW1iYXJrZWRfUyArIAogICAgICAgICAgICAgICAgICAgICAgICAgQWdlX0FkdWx0ICsKICAgICAgICAgICAgICAgICAgICAgICAgIFRpdGxlX01hc3RlciArCiAgICAgICAgICAgICAgICAgICAgICAgICBTb2Npb0Vjb25vbWljXzNYLAogICAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSB0cmFpbiwKICAgICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSAiY2ZvcmVzdCIsCiAgICAgICAgICAgICAgICAgICAgICAgdHJDb250cm9sID0gdHJhaW5fY29udHJvbCwKICAgICAgICAgICAgICAgICAgICAgICBjb250cm9scyA9IHBhcnR5OjpjZm9yZXN0X3VuYmlhc2VkKG50cmVlID0gMTAwMCkpCmBgYAoKIyMgTW9kZWwgU2NvcmluZyAmIFByZWRpY3Rpb24KCmBgYHtyfQojIEV4YW1pbmluZyB0aGUgbW9kZWwgc2NvcmluZyBhZnRlciBjcm9zcyB2YWxpZGF0aW9uICh1c2VzIHBvcnRpb25zIG9mIHRoZSB0cmFpbiBzZXQgYXMgdmFsaWRhdGlvbikuCnN1cnZpdmFsTW9kZWwKYGBgCgpgYGB7cn0KIyBNYWtlIHByZWRpY3Rpb25zIGJhc2VkIG9uIHJlc3VsdGluZyB0cmFpbmVkIG1vZGVsLgpwcmVkaWN0aW9ucyA8LSBwcmVkaWN0KHN1cnZpdmFsTW9kZWwsIG5ld2RhdGEgPSB0ZXN0LCBPT0IgPSBUUlVFLCB0eXBlID0gInJhdyIpCgojIENyZWF0aW5nIHN1Ym1pc3Npb24uCmNyZWF0ZVN1Ym1pc3Npb24oInN1Ym1pdC5jc3YiLCBwcmVkaWN0aW9ucykKYGBgCg==