This journal includes code for applying lime to various models (rtrees and logisitic regression with main effects only).

# Load libraries
library(bulletr)
library(lime)
library(furrr)
library(future)
library(gretchenalbrecht)
library(randomForest)
library(tidyverse)

# Source functions
source("../../code/helper_functions.R")

The training and testing datasets are loaded in below.

# Load in the training data (Hamby Data 173 and 252)
hamby173and252_train <- read.csv("../../../data/hamby173and252_train.csv")

# Load in the testing data (Hamby Data 224 Sets 1 and 11)
hamby224_test <- read.csv("../../../data/hamby224_test.csv")

A vector containing the features used in the random forest rtrees is created below.

# Obtain features used when fitting the rtrees random forest
rf_features <- rownames(rtrees$importance)

Random Forest

One Implementation

The code below applies the LIME algorithm to the bullet data one time for the following different input cases:

2 to 6 quantile bins
2 to 6 equally spaced bins
2 to 6 tree based bins with samesource as the response variable
2 to 6 tree based bins with rfscore as the response variable
kernel density estimation
normal density approximation

It applies LIME using my function run_lime to specify the different input values. The outputs from the lime function are combined into a list, and the outputs from the explain function are combined into dataframe. Both are saved as .rds files.

# Apply the run_lime function if the lime results file does not already exist
if(!file.exists("../../../data/hamby224_lime_inputs.rds")) {
  
  # Specify the input options to use with lime
  hamby224_lime_inputs <- list(bin_continuous = c(rep(TRUE, 20), 
                                                  rep(FALSE, 2)),
                               quantile_bins = c(rep(TRUE, 5), 
                                                 rep(FALSE, 5), 
                                                 rep(TRUE, 12)),
                               nbins = c(rep(2:6, 4), 
                                         rep(4, 2)),
                               use_density = c(rep(TRUE, 20), TRUE, FALSE),
                               bin_method = c(rep("quantile_bins", 5),
                                              rep("equally_spaced", 5),
                                              rep("tree", 10),
                                              rep("quantile_bins", 2)),
                               response = c(rep(NA, 10), 
                                            rep("samesource", 5), 
                                            rep("rfscore", 5), 
                                            rep(NA, 2)))
  
  # Tell R to run the upcoming code in parallel
  plan(multiprocess)
  
  # Apply lime to the full training data with the specified input options
  hamby224_lime_explain <- future_pmap(.l = hamby224_lime_inputs,
             .f = run_lime, # run_lime is one of my helper functions
             features = rf_features,
             train = hamby173and252_train,
             test = hamby224_test %>% 
               arrange(case) %>% 
               select(rf_features) %>% 
               na.omit(),
             rfmodel = as_classifier(rtrees),
             label = "TRUE",
             nfeatures = 3,
             seed = TRUE)
  
  # Separate the lime and explain function results from the full data
  hamby224_lime <- map(hamby224_lime_explain, function(list) list$lime)
  hamby224_explain <- map_df(hamby224_lime_explain, function(list) list$explain)
  
  # Name the items in the lime list
  names(hamby224_lime) <- map_chr(1:22, function(case) 
      sprintf("case: bin_continuous = %s, quantile_bins = %s, nbins = %0.f, use_density = %s, bin_method = %s, response = %s",
              hamby224_lime_inputs$bin_continuous[case],
              hamby224_lime_inputs$quantile_bins[case],
              hamby224_lime_inputs$nbins[case],
              hamby224_lime_inputs$use_density[case],
              hamby224_lime_inputs$bin_method[case],
              hamby224_lime_inputs$response[case]))

  # Turn the lime input options into a dataframe before saving it
  hamby224_lime_inputs <- hamby224_lime_inputs %>%
    unlist() %>%
    matrix(ncol = length(hamby224_lime_inputs), 
           dimnames = list(NULL, names(hamby224_lime_inputs))) %>%
    as.data.frame() %>%
    mutate(case = 1:length(hamby224_lime_inputs$quantile_bins)) %>%
    select(case, bin_continuous:response)
  
  # Save the lime objects
  saveRDS(hamby224_lime_inputs, "../../../data/hamby224_lime_inputs.rds")
  saveRDS(hamby224_lime, "../../../data/hamby224_lime.rds")
  saveRDS(hamby224_explain, "../../../data/hamby224_explain.rds")
  
} else {
  
  # Load in the lime objects
  hamby224_lime_inputs <- readRDS("../../../data/hamby224_lime_inputs.rds")
  hamby224_lime <- readRDS("../../../data/hamby224_lime.rds")
  hamby224_explain <- readRDS("../../../data/hamby224_explain.rds")
  
}

This code creates a dataframe of the bins to use in the app based on the output from the lime function for each of the input settings. The object hamby224_bin_boundaries contains the boundaries for the bins, and hamby224_bins contains nice forms of the bin intervals (i.e. [lower, upper)).

# Create a dataframe with the bins
if (!file.exists("../../../data/hamby224_bins.csv")) {
  
  # Create a list a dataframes with the bin boundaries and bins 
  # for the different evaluations of the lime functions
  hamby224_bin_list <- map(hamby224_lime, create_bin_data)
  
  # Save the bin boundaries and the bins as separate dataframes
  hamby224_bin_boundaries <- map(hamby224_bin_list, function(m) m$boundaries)
  hamby224_bins <- map(hamby224_bin_list, function(m) m$bins)
  
  # Save the bin boundaries and bins
  saveRDS(hamby224_bin_boundaries, "../../../data/hamby224_bin_boundaries.rds")
  saveRDS(hamby224_bins, "../../../data/hamby224_bins.rds")
  
} else {
  
  # Load in the bin boundaries and bins
  hamby224_bin_boundaries <- readRDS("../../../data/hamby224_bin_boundaries.rds")
  hamby224_bins <- readRDS("../../../data/hamby224_bins.rds")
  
}

This code combines the test data and the LIME explanations into one dataframe and saves the dataframe.

# Create the test_explain combined data if the file does not already exist
if(!file.exists("../../../data/hamby224_test_explain.rds")) {
  
  # Join the data and the explanations and edit and add additional variables
  # Create the feature bin labels using my function "bin_labeller"
  hamby224_test_explain <- hamby224_test %>%
    mutate(case = as.character(case)) %>%
    full_join(hamby224_explain, by = "case") %>%
    mutate(case = factor(case),
           feature_desc = factor(feature_desc),
           feature_bin = pmap_chr(list(feature = feature, 
                                  feature_value = feature_value,
                                  b_c = bin_continuous,
                                  q_b = quantile_bins,
                                  n_b = nbins,
                                  u_d = use_density,
                                  b_m = bin_method,
                                  r_v = response),
                            .f = bin_labeller, # bin_labeller is one of my helper functions
                            bin_data = hamby224_bin_boundaries,
                            case_info = hamby224_lime_inputs)) %>%
    mutate(feature = factor(feature),
           nbins = factor(nbins),
           feature_number = readr::parse_number(as.character(feature_desc)),
           strictly_less = FALSE) %>%
    arrange(nbins)

  # Finish creating the strictly less than variable
  hamby224_test_explain$strictly_less[grep("< ", hamby224_test_explain$feature_desc)] <- TRUE
  
  # Reorder the variables of feature_desc and feature_bin for plotting purposes and
  # create new variables of situation and bin_situation
  hamby224_test_explain <- hamby224_test_explain %>%
    mutate(feature_desc = reorder(feature_desc, strictly_less),
           feature_desc = reorder(feature_desc, feature_number),
           feature_desc = reorder(feature_desc, as.numeric(feature))) %>%
    mutate(nbins = as.numeric(as.character(nbins)),
           situation = ifelse(bin_continuous == TRUE & bin_method == "quantile_bins", 
                              sprintf("%.0f quantile", nbins),
                              ifelse(bin_continuous == TRUE & bin_method == "equally_spaced",
                                     sprintf("%.0f equally spaced", nbins),
                                     ifelse(bin_continuous == TRUE & bin_method == "tree" &
                                              response == "samesource",
                                            sprintf("%.0f samesource tree", nbins),
                                            ifelse(bin_continuous == TRUE & bin_method == "tree" &
                                              response == "rfscore",
                                              sprintf("%.0f rfscore tree", nbins),
                                              ifelse(bin_continuous == FALSE & 
                                                     use_density == TRUE, 
                                                     "kernel density", 
                                                     "normal approximation"))))) %>%
             fct_relevel("2 quantile", "3 quantile", "4 quantile",
                         "5 quantile", "6 quantile", "2 equally spaced",
                         "3 equally spaced", "4 equally spaced",
                         "5 equally spaced", "6 equally spaced",
                         "2 samesource tree", "3 samesource tree",
                         "4 samesource tree", "5 samesource tree",
                         "6 samesource tree")) %>%
    mutate(bin_situation = ifelse(bin_method == "quantile_bins" & 
                                  bin_continuous == TRUE,
                                  "quantile",
                                  ifelse(bin_method == "equally_spaced" & 
                                         bin_continuous == TRUE,
                                         "equally spaced", 
                                         ifelse(bin_method == "tree" & 
                                                bin_continuous == TRUE & 
                                                response == "samesource",
                                                "samesource tree", 
                                                ifelse(bin_method == "tree" & 
                                                       bin_continuous == TRUE & 
                                                       response == "rfscore",
                                                       "rfscore tree", 
                                                       ifelse(bin_continuous == FALSE & 
                                                       use_density == TRUE, 
                                                       "kernel density", 
                                                       "normal approximation")))))) %>%
    mutate(bin_situation = factor(bin_situation)) %>%
    select(situation, bin_situation, bin_continuous:response, case:feature_desc,
           feature_bin:strictly_less, data, prediction)

  # Save the combined test and explain data
  saveRDS(hamby224_test_explain, "../../../data/hamby224_test_explain.rds")
   
} else {
  
  # Load in the data
  hamby224_test_explain <- readRDS("../../../data/hamby224_test_explain.rds")
  
}

This code creates and saves a dataset with summaries of the explanations.

# Create the lime comparison data if the file does not already exist
if(!file.exists("../../../data/hamby224_lime_comparisons.rds")) {
  
  # Create a data frame with the interesting information relating to the different
  # evaluations of lime and compute the difference and mean between the rf and rr
  # model predictions
  hamby224_lime_comparisons <- hamby224_test_explain %>%
    select(-data, -prediction) %>%
    group_by(case, bin_continuous, quantile_bins, nbins, use_density, bin_method, response) %>%
    slice(1) %>%
    ungroup() %>%
    select(situation, bin_situation, bin_method, bin_continuous, quantile_bins, response, 
           nbins, use_density, set, case, rf_features, rfscore, model_prediction, model_r2) %>%
    mutate(diff = rfscore - model_prediction,
           mean = (rfscore + model_prediction) / 2)
    
  
  # Save the lime comparison data frame
  saveRDS(hamby224_lime_comparisons, "../../../data/hamby224_lime_comparisons.rds")
  
} else {
  
  # Load in the lime comparison data frame
  hamby224_lime_comparisons <- readRDS("../../../data/hamby224_lime_comparisons.rds")
  
}

Multiple Implementations

I was interested in determining how the random permutations affect ths results from LIME. That is, I wanted to do a sensativity analysis. The code below applies the LIME algorithm to the bullet data ten times for each of the following different input cases:

2 to 6 quantile bins
2 to 6 equally spaced bins
2 to 6 tree based bins with samesource as the response variable
2 to 6 tree based bins with rfscore as the response variable
kernel density estimation
normal density approximation

It applies LIME using my function run_lime to specify the different input values. The outputs from the explain function are combined into a dataframe and saved as .rds file. The input values are also saved as an .rds file.

# Perform the sensitivity analysis if not already saved
if(!file.exists("../../../data/hamby224_sensitivity_inputs.rds")) {
  
  # Specify the number of reps and input cases
  nreps = 10
  noptions = 22
  
  # Specify the inputs for the sensitivity analysis
  hamby224_sensitivity_inputs <- list(bin_continuous = c(rep(TRUE, nreps * 20),
                                                         rep(FALSE, nreps * 2)),
                                      quantile_bins = c(rep(c(TRUE, FALSE), each = nreps * 5), 
                                                        rep(TRUE, nreps * 12)),
                                      nbins = c(rep(rep(2:6, each = nreps), 4), 
                                                rep(4, nreps * 2)),
                                      use_density = c(rep(TRUE, nreps * 21), 
                                                      rep(FALSE, nreps)),
                                      bin_method = c(rep("quantile_bins", nreps * 5),
                                                      rep("equally_spaced", nreps * 5),
                                                      rep("tree", nreps * 10),
                                                      rep("quantile_bins", nreps * 2)),
                                      response = c(rep(NA, nreps * 10),
                                                   rep("samesource", nreps * 5),
                                                   rep("rfscore", nreps * 5),
                                                   rep(NA, nreps * 2)))
  # Tell R to run the upcoming code in parallel
  plan(multiprocess)
  
  # Run lime for the sensitivity analysis and organize the output
  hamby224_sensitivity_outputs <- future_pmap(.l = hamby224_sensitivity_inputs,
             .f = run_lime, # run_lime is one of my helper functions
             features = rf_features,
             train = hamby173and252_train,
             test = hamby224_test %>% 
               arrange(case) %>% 
               select(rf_features) %>% 
               na.omit(),
             rfmodel = as_classifier(rtrees),
             label = "TRUE",
             nfeatures = 3,
             seed = FALSE) %>%
    map_df(function(list) list$explain) %>%
    mutate(rep = factor(rep(rep(1:nreps, each = dim(hamby224_test %>% na.omit())[1] * 3),
                            noptions)))
  
  # Turn the input options into a dataframe to be saved
  hamby224_sensitivity_inputs <- hamby224_sensitivity_inputs %>%
    unlist() %>%
    matrix(ncol = length(hamby224_sensitivity_inputs), 
           dimnames = list(NULL, names(hamby224_sensitivity_inputs))) %>%
    as.data.frame() %>%
    mutate(case = 1:(nreps * noptions)) %>%
    select(case, bin_continuous:bin_method)

  # Save the sensitivity inputs and outputs
  saveRDS(hamby224_sensitivity_inputs, "../../../data/hamby224_sensitivity_inputs.rds")
  saveRDS(hamby224_sensitivity_outputs, "../../../data/hamby224_sensitivity_outputs.rds")
  
} else {
  
  # Load in the sensitivity inputs and outputs
  hamby224_sensitivity_inputs <- readRDS("../../../data/hamby224_sensitivity_inputs.rds")
  hamby224_sensitivity_outputs <- readRDS("../../../data/hamby224_sensitivity_outputs.rds")

}

The code below joins the sensativity outputs with the test data into a dataframe. The dataframe is saved as an .rds file.

# Join the sensativity outputs with the test data if not already saved
if(!file.exists("../../../data/hamby224_sensitivity_joined.rds")) {

hamby224_sensitivity_joined <- hamby224_sensitivity_outputs %>%
  full_join(hamby224_test %>% na.omit() %>% 
                mutate(case = as.character(case)), by = "case") %>%
    mutate(case = factor(case)) %>%
    select(case, model_r2:feature_weight, bin_continuous:rep, 
           set:land2, rfscore, samesource) %>%
    mutate(diff = rfscore - model_prediction,
         nbins = as.numeric(as.character(nbins)),
         situation = ifelse(bin_continuous == TRUE & bin_method == "quantile_bins", 
                            sprintf("%.0f quantile", nbins),
                            ifelse(bin_continuous == TRUE & bin_method == "equally_spaced",
                                   sprintf("%.0f equally spaced", nbins),
                                   ifelse(bin_continuous == TRUE & bin_method == "tree" &
                                            response == "samesource",
                                          sprintf("%.0f samesource tree", nbins),
                                          ifelse(bin_continuous == TRUE & bin_method == "tree" &
                                            response == "rfscore",
                                            sprintf("%.0f rfscore tree", nbins),
                                            ifelse(bin_continuous == FALSE & 
                                                   use_density == TRUE, 
                                                   "kernel density", 
                                                   "normal approximation"))))) %>%
           fct_relevel("2 quantile", "3 quantile", "4 quantile",
                       "5 quantile", "6 quantile", "2 equally spaced",
                       "3 equally spaced", "4 equally spaced",
                       "5 equally spaced", "6 equally spaced",
                       "2 samesource tree", "3 samesource tree",
                       "4 samesource tree", "5 samesource tree",
                       "6 samesource tree")) %>%
    mutate(bin_situation = ifelse(bin_method == "quantile_bins" & 
                                  bin_continuous == TRUE,
                                  "quantile",
                                  ifelse(bin_method == "equally_spaced" & 
                                         bin_continuous == TRUE,
                                         "equally spaced", 
                                         ifelse(bin_method == "tree" & 
                                                bin_continuous == TRUE & 
                                                response == "samesource",
                                                "samesource tree", 
                                                ifelse(bin_method == "tree" & 
                                                       bin_continuous == TRUE & 
                                                       response == "rfscore",
                                                       "rfscore tree", 
                                                       ifelse(bin_continuous == FALSE & 
                                                       use_density == TRUE, 
                                                       "kernel density", 
                                                       "normal approximation")))))) %>%
    mutate(bin_situation = factor(bin_situation))

  # Save the joined test and explained dataframes
  saveRDS(hamby224_sensitivity_joined, "../../../data/hamby224_sensitivity_joined.rds")

} else {

  # Load in the sensitivity inputs and outputs
  hamby224_sensitivity_joined <- readRDS("../../../data/hamby224_sensitivity_joined.rds")

}

Logistic Regression

# Load the model
logistic_mains <- readRDS("../../../data/logistic_mains.rds")

One Implementataion

The functions lime and explain from the lime package are applied below to the logisitc regression model with only main effects.

# Create or load the lime and explain objects for the main effects logisitic regression model
if(!file.exists("../../../data/lime_explain_mains.rds")) {
  
  # Set a seed
  set.seed(20190226)
  
  # Apply lime
  lime_mains <- lime(x = hamby173and252_train %>%
                       select(rf_features), 
                     model = logistic_mains)
  
  # Apply explain
  explain_mains <- explain(x = hamby224_test %>%
                             select(rf_features) %>% 
                             na.omit(), 
                           explainer = lime_mains, 
                           n_labels = 1,
                           n_features = 3)
  
  # Join the lime and explain objects in a list
  lime_explain_mains <- list(lime = lime_mains, explain = explain_mains)
  
  # Save the lime and explain objects
  saveRDS(lime_explain_mains, "../../../data/lime_explain_mains.rds")

} else {
  
  # Load the lime and explain objects
  lime_explain_mains <- readRDS("../../../data/lime_explain_mains.rds")
  
}

Multiple Implementations

The code below applies the LIME algorithm to the main effects logisitic regression model one time for the following different input cases:

2 to 6 quantile bins
2 to 6 equally spaced bins
2 to 6 tree based bins with samesource as the response variable
2 to 6 tree based bins with rfscore as the response variable
kernel density estimation
normal density approximation

# Apply the run_lime function if the lime results file does not already exist
if(!file.exists("../../../data/logistic_lime_inputs.rds")) {
  
  # Specify the input options to use with lime
  logistic_lime_inputs <- list(bin_continuous = c(rep(TRUE, 20), 
                                                  rep(FALSE, 2)),
                               quantile_bins = c(rep(TRUE, 5), 
                                                 rep(FALSE, 5), 
                                                 rep(TRUE, 12)),
                               nbins = c(rep(2:6, 4), 
                                         rep(4, 2)),
                               use_density = c(rep(TRUE, 20), TRUE, FALSE),
                               bin_method = c(rep("quantile_bins", 5),
                                              rep("equally_spaced", 5),
                                              rep("tree", 10),
                                              rep("quantile_bins", 2)),
                               response = c(rep(NA, 10), 
                                            rep("samesource", 5), 
                                            rep("rfscore", 5), 
                                            rep(NA, 2)))
  
  # Tell R to run the upcoming code in parallel
  plan(multiprocess)
  
  # Apply lime to the full training data with the specified input options
  logistic_lime_explain <- future_pmap(.l = logistic_lime_inputs,
             .f = run_lime, # run_lime is one of my helper functions
             features = rf_features,
             train = hamby173and252_train,
             test = hamby224_test %>% 
               arrange(case) %>% 
               select(rf_features) %>% 
               na.omit(),
             rfmodel = logistic_mains,
             label = "TRUE",
             nfeatures = 3,
             seed = TRUE)
  
  # Separate the lime and explain function results from the full data
  logistic_lime <- map(logistic_lime_explain, function(list) list$lime)
  logistic_explain <- map_df(logistic_lime_explain, function(list) list$explain)
  
  # Name the items in the lime list
  names(logistic_lime) <- map_chr(1:22, function(case) 
      sprintf("case: bin_continuous = %s, quantile_bins = %s, nbins = %0.f, use_density = %s, bin_method = %s, response = %s",
              logistic_lime_inputs$bin_continuous[case],
              logistic_lime_inputs$quantile_bins[case],
              logistic_lime_inputs$nbins[case],
              logistic_lime_inputs$use_density[case],
              logistic_lime_inputs$bin_method[case],
              logistic_lime_inputs$response[case]))

  # Turn the lime input options into a dataframe before saving it
  logistic_lime_inputs <- logistic_lime_inputs %>%
    unlist() %>%
    matrix(ncol = length(logistic_lime_inputs), 
           dimnames = list(NULL, names(logistic_lime_inputs))) %>%
    as.data.frame() %>%
    mutate(case = 1:length(logistic_lime_inputs$quantile_bins)) %>%
    select(case, bin_continuous:response)
  
  # Save the lime objects
  saveRDS(logistic_lime_inputs, "../../../data/logistic_lime_inputs.rds")
  saveRDS(logistic_lime, "../../../data/logistic_lime.rds")
  saveRDS(logistic_explain, "../../../data/logistic_explain.rds")
  
} else {
  
  # Load in the lime objects
  logistic_lime_inputs <- readRDS("../../../data/logistic_lime_inputs.rds")
  logistic_lime <- readRDS("../../../data/logistic_lime.rds")
  logistic_explain <- readRDS("../../../data/logistic_explain.rds")
  
}

# Create the test_explain combined data if the file does not already exist
if(!file.exists("../../../data/logistic_test_explain.rds")) {
  
  # Join the data and the explanations and edit and add additional variables
  logistic_test_explain <- hamby224_test %>%
    mutate(case = as.character(case)) %>%
    full_join(logistic_explain, by = "case") %>%
    mutate(case = factor(case),
           feature_desc = factor(feature_desc),
           feature = factor(feature),
           nbins = factor(nbins)) %>%
    arrange(nbins) %>%
    mutate(nbins = as.numeric(as.character(nbins)),
           situation = ifelse(bin_continuous == TRUE & bin_method == "quantile_bins", 
                              sprintf("%.0f quantile", nbins),
                              ifelse(bin_continuous == TRUE & bin_method == "equally_spaced",
                                     sprintf("%.0f equally spaced", nbins),
                                     ifelse(bin_continuous == TRUE & bin_method == "tree" &
                                              response == "samesource",
                                            sprintf("%.0f samesource tree", nbins),
                                            ifelse(bin_continuous == TRUE & bin_method == "tree" &
                                              response == "rfscore",
                                              sprintf("%.0f rfscore tree", nbins),
                                              ifelse(bin_continuous == FALSE & 
                                                     use_density == TRUE, 
                                                     "kernel density", 
                                                     "normal approximation"))))) %>%
             fct_relevel("2 quantile", "3 quantile", "4 quantile",
                         "5 quantile", "6 quantile", "2 equally spaced",
                         "3 equally spaced", "4 equally spaced",
                         "5 equally spaced", "6 equally spaced",
                         "2 samesource tree", "3 samesource tree",
                         "4 samesource tree", "5 samesource tree",
                         "6 samesource tree")) %>%
    mutate(bin_situation = ifelse(bin_method == "quantile_bins" & 
                                  bin_continuous == TRUE,
                                  "quantile",
                                  ifelse(bin_method == "equally_spaced" & 
                                         bin_continuous == TRUE,
                                         "equally spaced", 
                                         ifelse(bin_method == "tree" & 
                                                bin_continuous == TRUE & 
                                                response == "samesource",
                                                "samesource tree", 
                                                ifelse(bin_method == "tree" & 
                                                       bin_continuous == TRUE & 
                                                       response == "rfscore",
                                                       "rfscore tree", 
                                                       ifelse(bin_continuous == FALSE & 
                                                       use_density == TRUE, 
                                                       "kernel density", 
                                                       "normal approximation")))))) %>%
    mutate(bin_situation = factor(bin_situation))

  # Save the combined test and explain data
  saveRDS(logistic_test_explain, "../../../data/logistic_test_explain.rds")
   
} else {
  
  # Load in the data
  logistic_test_explain <- readRDS("../../../data/logistic_test_explain.rds")
  
}

This code creates and saves a dataset with summaries of the explanations.

# Create the lime comparison data if the file does not already exist
if(!file.exists("../../../data/logistic_lime_comparisons.rds")) {
  
  # Create a data frame with the interesting information relating to the different
  # evaluations of lime and compute the difference and mean between the rf and rr
  # model predictions
  logistic_lime_comparisons <- logistic_test_explain %>%
    select(-data, -prediction) %>%
    group_by(case, bin_continuous, quantile_bins, nbins, use_density, bin_method, response) %>%
    slice(1) %>%
    ungroup() %>%
    select(situation, bin_situation, bin_method, bin_continuous, quantile_bins, response, 
           nbins, use_density, set, case, rf_features, rfscore, model_prediction, model_r2) %>%
    mutate(diff = rfscore - model_prediction,
           mean = (rfscore + model_prediction) / 2)
    
  
  # Save the lime comparison data frame
  saveRDS(logistic_lime_comparisons, "../../../data/logistic_lime_comparisons.rds")
  
} else {
  
  # Load in the lime comparison data frame
  logistic_lime_comparisons <- readRDS("../../../data/logistic_lime_comparisons.rds")
  
}

Session Info

sessionInfo()

## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] assertthat_0.2.1       tree_1.0-40            forcats_0.5.0         
##  [4] stringr_1.4.0          dplyr_1.0.2            purrr_0.3.4           
##  [7] readr_1.3.1            tidyr_1.1.2            tibble_3.0.3          
## [10] ggplot2_3.3.2.9000     tidyverse_1.3.0        randomForest_4.6-14   
## [13] gretchenalbrecht_0.1.0 furrr_0.1.0            future_1.18.0         
## [16] lime_0.5.1             bulletr_0.1.0.9000    
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_1.4-1        ellipsis_0.3.1          class_7.3-17           
##   [4] fs_1.5.0                rstudioapi_0.11         listenv_0.8.0          
##   [7] prodlim_2019.11.13      fansi_0.4.1             lubridate_1.7.9        
##  [10] xml2_1.3.2              codetools_0.2-16        splines_4.0.2          
##  [13] robustbase_0.93-6       knitr_1.29              shinythemes_1.1.2      
##  [16] jsonlite_1.7.1          pROC_1.16.2             caret_6.0-86           
##  [19] broom_0.7.0             dbplyr_1.4.4            shiny_1.5.0            
##  [22] compiler_4.0.2          httr_1.4.2              backports_1.1.10       
##  [25] Matrix_1.2-18           fastmap_1.0.1           lazyeval_0.2.2         
##  [28] cli_2.0.2               later_1.1.0.1           htmltools_0.5.0        
##  [31] tools_4.0.2             gtable_0.3.0            glue_1.4.2             
##  [34] reshape2_1.4.4          Rcpp_1.0.5              cellranger_1.1.0       
##  [37] vctrs_0.3.4             nlme_3.1-148            iterators_1.0.12       
##  [40] crosstalk_1.1.0.1       timeDate_3043.102       gower_0.2.2            
##  [43] xfun_0.17               globals_0.12.5          rvest_0.3.6            
##  [46] mime_0.9                miniUI_0.1.1.1          lifecycle_0.2.0        
##  [49] DEoptimR_1.0-8          MASS_7.3-51.6           zoo_1.8-8              
##  [52] scales_1.1.1            ipred_0.9-9             hms_0.5.3              
##  [55] promises_1.1.1          parallel_4.0.2          yaml_2.2.1             
##  [58] curl_4.3                rpart_4.1-15            stringi_1.5.3          
##  [61] foreach_1.5.0           TTR_0.24.2              manipulateWidget_0.10.1
##  [64] lava_1.6.7              shape_1.4.4             rlang_0.4.7            
##  [67] pkgconfig_2.0.3         rgl_0.100.54            evaluate_0.14          
##  [70] lattice_0.20-41         recipes_0.1.13          htmlwidgets_1.5.1      
##  [73] tidyselect_1.1.0        plyr_1.8.6              magrittr_1.5           
##  [76] R6_2.4.1                generics_0.0.2          DBI_1.1.0              
##  [79] pillar_1.4.6            haven_2.3.1             withr_2.2.0            
##  [82] xts_0.12.1              survival_3.1-12         nnet_7.3-14            
##  [85] modelr_0.1.8            crayon_1.3.4            plotly_4.9.2.1         
##  [88] rmarkdown_2.3           grid_4.0.2              readxl_1.3.1           
##  [91] data.table_1.13.0       blob_1.2.1              ModelMetrics_1.2.2.2   
##  [94] reprex_0.3.0            digest_0.6.25           webshot_0.5.2          
##  [97] xtable_1.8-4            httpuv_1.5.4            stats4_4.0.2           
## [100] munsell_0.5.0           glmnet_4.0-2            viridisLite_0.3.0      
## [103] smoother_1.1

Applying lime to `rtrees`

Katherine Goode

September 29, 2020

Random Forest

One Implementation

Multiple Implementations

Logistic Regression

One Implementataion

Multiple Implementations

Session Info

Applying lime to rtrees

Katherine Goode

September 29, 2020

Random Forest

One Implementation

Multiple Implementations

Logistic Regression

One Implementataion

Multiple Implementations

Session Info

Applying lime to `rtrees`