This journal includes code for applying lime to various models (rtrees and logisitic regression with main effects only).

# Load libraries
library(bulletr)
library(lime)
library(furrr)
library(future)
library(gretchenalbrecht)
library(randomForest)
library(tidyverse)

# Source functions
source("../../code/helper_functions.R")

The training and testing datasets are loaded in below.

# Load in the training data (Hamby Data 173 and 252)
hamby173and252_train <- read.csv("../../../data/hamby173and252_train.csv")

# Load in the testing data (Hamby Data 224 Sets 1 and 11)
hamby224_test <- read.csv("../../../data/hamby224_test.csv")

A vector containing the features used in the random forest rtrees is created below.

# Obtain features used when fitting the rtrees random forest
rf_features <- rownames(rtrees$importance)

Random Forest

One Implementation

The code below applies the LIME algorithm to the bullet data one time for the following different input cases:

  • 2 to 6 quantile bins
  • 2 to 6 equally spaced bins
  • 2 to 6 tree based bins with samesource as the response variable
  • 2 to 6 tree based bins with rfscore as the response variable
  • kernel density estimation
  • normal density approximation

It applies LIME using my function run_lime to specify the different input values. The outputs from the lime function are combined into a list, and the outputs from the explain function are combined into dataframe. Both are saved as .rds files.

# Apply the run_lime function if the lime results file does not already exist
if(!file.exists("../../../data/hamby224_lime_inputs.rds")) {
  
  # Specify the input options to use with lime
  hamby224_lime_inputs <- list(bin_continuous = c(rep(TRUE, 20), 
                                                  rep(FALSE, 2)),
                               quantile_bins = c(rep(TRUE, 5), 
                                                 rep(FALSE, 5), 
                                                 rep(TRUE, 12)),
                               nbins = c(rep(2:6, 4), 
                                         rep(4, 2)),
                               use_density = c(rep(TRUE, 20), TRUE, FALSE),
                               bin_method = c(rep("quantile_bins", 5),
                                              rep("equally_spaced", 5),
                                              rep("tree", 10),
                                              rep("quantile_bins", 2)),
                               response = c(rep(NA, 10), 
                                            rep("samesource", 5), 
                                            rep("rfscore", 5), 
                                            rep(NA, 2)))
  
  # Tell R to run the upcoming code in parallel
  plan(multiprocess)
  
  # Apply lime to the full training data with the specified input options
  hamby224_lime_explain <- future_pmap(.l = hamby224_lime_inputs,
             .f = run_lime, # run_lime is one of my helper functions
             features = rf_features,
             train = hamby173and252_train,
             test = hamby224_test %>% 
               arrange(case) %>% 
               select(rf_features) %>% 
               na.omit(),
             rfmodel = as_classifier(rtrees),
             label = "TRUE",
             nfeatures = 3,
             seed = TRUE)
  
  # Separate the lime and explain function results from the full data
  hamby224_lime <- map(hamby224_lime_explain, function(list) list$lime)
  hamby224_explain <- map_df(hamby224_lime_explain, function(list) list$explain)
  
  # Name the items in the lime list
  names(hamby224_lime) <- map_chr(1:22, function(case) 
      sprintf("case: bin_continuous = %s, quantile_bins = %s, nbins = %0.f, use_density = %s, bin_method = %s, response = %s",
              hamby224_lime_inputs$bin_continuous[case],
              hamby224_lime_inputs$quantile_bins[case],
              hamby224_lime_inputs$nbins[case],
              hamby224_lime_inputs$use_density[case],
              hamby224_lime_inputs$bin_method[case],
              hamby224_lime_inputs$response[case]))

  # Turn the lime input options into a dataframe before saving it
  hamby224_lime_inputs <- hamby224_lime_inputs %>%
    unlist() %>%
    matrix(ncol = length(hamby224_lime_inputs), 
           dimnames = list(NULL, names(hamby224_lime_inputs))) %>%
    as.data.frame() %>%
    mutate(case = 1:length(hamby224_lime_inputs$quantile_bins)) %>%
    select(case, bin_continuous:response)
  
  # Save the lime objects
  saveRDS(hamby224_lime_inputs, "../../../data/hamby224_lime_inputs.rds")
  saveRDS(hamby224_lime, "../../../data/hamby224_lime.rds")
  saveRDS(hamby224_explain, "../../../data/hamby224_explain.rds")
  
} else {
  
  # Load in the lime objects
  hamby224_lime_inputs <- readRDS("../../../data/hamby224_lime_inputs.rds")
  hamby224_lime <- readRDS("../../../data/hamby224_lime.rds")
  hamby224_explain <- readRDS("../../../data/hamby224_explain.rds")
  
}

This code creates a dataframe of the bins to use in the app based on the output from the lime function for each of the input settings. The object hamby224_bin_boundaries contains the boundaries for the bins, and hamby224_bins contains nice forms of the bin intervals (i.e. [lower, upper)).

# Create a dataframe with the bins
if (!file.exists("../../../data/hamby224_bins.csv")) {
  
  # Create a list a dataframes with the bin boundaries and bins 
  # for the different evaluations of the lime functions
  hamby224_bin_list <- map(hamby224_lime, create_bin_data)
  
  # Save the bin boundaries and the bins as separate dataframes
  hamby224_bin_boundaries <- map(hamby224_bin_list, function(m) m$boundaries)
  hamby224_bins <- map(hamby224_bin_list, function(m) m$bins)
  
  # Save the bin boundaries and bins
  saveRDS(hamby224_bin_boundaries, "../../../data/hamby224_bin_boundaries.rds")
  saveRDS(hamby224_bins, "../../../data/hamby224_bins.rds")
  
} else {
  
  # Load in the bin boundaries and bins
  hamby224_bin_boundaries <- readRDS("../../../data/hamby224_bin_boundaries.rds")
  hamby224_bins <- readRDS("../../../data/hamby224_bins.rds")
  
}

This code combines the test data and the LIME explanations into one dataframe and saves the dataframe.

# Create the test_explain combined data if the file does not already exist
if(!file.exists("../../../data/hamby224_test_explain.rds")) {
  
  # Join the data and the explanations and edit and add additional variables
  # Create the feature bin labels using my function "bin_labeller"
  hamby224_test_explain <- hamby224_test %>%
    mutate(case = as.character(case)) %>%
    full_join(hamby224_explain, by = "case") %>%
    mutate(case = factor(case),
           feature_desc = factor(feature_desc),
           feature_bin = pmap_chr(list(feature = feature, 
                                  feature_value = feature_value,
                                  b_c = bin_continuous,
                                  q_b = quantile_bins,
                                  n_b = nbins,
                                  u_d = use_density,
                                  b_m = bin_method,
                                  r_v = response),
                            .f = bin_labeller, # bin_labeller is one of my helper functions
                            bin_data = hamby224_bin_boundaries,
                            case_info = hamby224_lime_inputs)) %>%
    mutate(feature = factor(feature),
           nbins = factor(nbins),
           feature_number = readr::parse_number(as.character(feature_desc)),
           strictly_less = FALSE) %>%
    arrange(nbins)

  # Finish creating the strictly less than variable
  hamby224_test_explain$strictly_less[grep("< ", hamby224_test_explain$feature_desc)] <- TRUE
  
  # Reorder the variables of feature_desc and feature_bin for plotting purposes and
  # create new variables of situation and bin_situation
  hamby224_test_explain <- hamby224_test_explain %>%
    mutate(feature_desc = reorder(feature_desc, strictly_less),
           feature_desc = reorder(feature_desc, feature_number),
           feature_desc = reorder(feature_desc, as.numeric(feature))) %>%
    mutate(nbins = as.numeric(as.character(nbins)),
           situation = ifelse(bin_continuous == TRUE & bin_method == "quantile_bins", 
                              sprintf("%.0f quantile", nbins),
                              ifelse(bin_continuous == TRUE & bin_method == "equally_spaced",
                                     sprintf("%.0f equally spaced", nbins),
                                     ifelse(bin_continuous == TRUE & bin_method == "tree" &
                                              response == "samesource",
                                            sprintf("%.0f samesource tree", nbins),
                                            ifelse(bin_continuous == TRUE & bin_method == "tree" &
                                              response == "rfscore",
                                              sprintf("%.0f rfscore tree", nbins),
                                              ifelse(bin_continuous == FALSE & 
                                                     use_density == TRUE, 
                                                     "kernel density", 
                                                     "normal approximation"))))) %>%
             fct_relevel("2 quantile", "3 quantile", "4 quantile",
                         "5 quantile", "6 quantile", "2 equally spaced",
                         "3 equally spaced", "4 equally spaced",
                         "5 equally spaced", "6 equally spaced",
                         "2 samesource tree", "3 samesource tree",
                         "4 samesource tree", "5 samesource tree",
                         "6 samesource tree")) %>%
    mutate(bin_situation = ifelse(bin_method == "quantile_bins" & 
                                  bin_continuous == TRUE,
                                  "quantile",
                                  ifelse(bin_method == "equally_spaced" & 
                                         bin_continuous == TRUE,
                                         "equally spaced", 
                                         ifelse(bin_method == "tree" & 
                                                bin_continuous == TRUE & 
                                                response == "samesource",
                                                "samesource tree", 
                                                ifelse(bin_method == "tree" & 
                                                       bin_continuous == TRUE & 
                                                       response == "rfscore",
                                                       "rfscore tree", 
                                                       ifelse(bin_continuous == FALSE & 
                                                       use_density == TRUE, 
                                                       "kernel density", 
                                                       "normal approximation")))))) %>%
    mutate(bin_situation = factor(bin_situation)) %>%
    select(situation, bin_situation, bin_continuous:response, case:feature_desc,
           feature_bin:strictly_less, data, prediction)

  # Save the combined test and explain data
  saveRDS(hamby224_test_explain, "../../../data/hamby224_test_explain.rds")
   
} else {
  
  # Load in the data
  hamby224_test_explain <- readRDS("../../../data/hamby224_test_explain.rds")
  
}

This code creates and saves a dataset with summaries of the explanations.

# Create the lime comparison data if the file does not already exist
if(!file.exists("../../../data/hamby224_lime_comparisons.rds")) {
  
  # Create a data frame with the interesting information relating to the different
  # evaluations of lime and compute the difference and mean between the rf and rr
  # model predictions
  hamby224_lime_comparisons <- hamby224_test_explain %>%
    select(-data, -prediction) %>%
    group_by(case, bin_continuous, quantile_bins, nbins, use_density, bin_method, response) %>%
    slice(1) %>%
    ungroup() %>%
    select(situation, bin_situation, bin_method, bin_continuous, quantile_bins, response, 
           nbins, use_density, set, case, rf_features, rfscore, model_prediction, model_r2) %>%
    mutate(diff = rfscore - model_prediction,
           mean = (rfscore + model_prediction) / 2)
    
  
  # Save the lime comparison data frame
  saveRDS(hamby224_lime_comparisons, "../../../data/hamby224_lime_comparisons.rds")
  
} else {
  
  # Load in the lime comparison data frame
  hamby224_lime_comparisons <- readRDS("../../../data/hamby224_lime_comparisons.rds")
  
}

Multiple Implementations

I was interested in determining how the random permutations affect ths results from LIME. That is, I wanted to do a sensativity analysis. The code below applies the LIME algorithm to the bullet data ten times for each of the following different input cases:

  • 2 to 6 quantile bins
  • 2 to 6 equally spaced bins
  • 2 to 6 tree based bins with samesource as the response variable
  • 2 to 6 tree based bins with rfscore as the response variable
  • kernel density estimation
  • normal density approximation

It applies LIME using my function run_lime to specify the different input values. The outputs from the explain function are combined into a dataframe and saved as .rds file. The input values are also saved as an .rds file.

# Perform the sensitivity analysis if not already saved
if(!file.exists("../../../data/hamby224_sensitivity_inputs.rds")) {
  
  # Specify the number of reps and input cases
  nreps = 10
  noptions = 22
  
  # Specify the inputs for the sensitivity analysis
  hamby224_sensitivity_inputs <- list(bin_continuous = c(rep(TRUE, nreps * 20),
                                                         rep(FALSE, nreps * 2)),
                                      quantile_bins = c(rep(c(TRUE, FALSE), each = nreps * 5), 
                                                        rep(TRUE, nreps * 12)),
                                      nbins = c(rep(rep(2:6, each = nreps), 4), 
                                                rep(4, nreps * 2)),
                                      use_density = c(rep(TRUE, nreps * 21), 
                                                      rep(FALSE, nreps)),
                                      bin_method = c(rep("quantile_bins", nreps * 5),
                                                      rep("equally_spaced", nreps * 5),
                                                      rep("tree", nreps * 10),
                                                      rep("quantile_bins", nreps * 2)),
                                      response = c(rep(NA, nreps * 10),
                                                   rep("samesource", nreps * 5),
                                                   rep("rfscore", nreps * 5),
                                                   rep(NA, nreps * 2)))
  # Tell R to run the upcoming code in parallel
  plan(multiprocess)
  
  # Run lime for the sensitivity analysis and organize the output
  hamby224_sensitivity_outputs <- future_pmap(.l = hamby224_sensitivity_inputs,
             .f = run_lime, # run_lime is one of my helper functions
             features = rf_features,
             train = hamby173and252_train,
             test = hamby224_test %>% 
               arrange(case) %>% 
               select(rf_features) %>% 
               na.omit(),
             rfmodel = as_classifier(rtrees),
             label = "TRUE",
             nfeatures = 3,
             seed = FALSE) %>%
    map_df(function(list) list$explain) %>%
    mutate(rep = factor(rep(rep(1:nreps, each = dim(hamby224_test %>% na.omit())[1] * 3),
                            noptions)))
  
  # Turn the input options into a dataframe to be saved
  hamby224_sensitivity_inputs <- hamby224_sensitivity_inputs %>%
    unlist() %>%
    matrix(ncol = length(hamby224_sensitivity_inputs), 
           dimnames = list(NULL, names(hamby224_sensitivity_inputs))) %>%
    as.data.frame() %>%
    mutate(case = 1:(nreps * noptions)) %>%
    select(case, bin_continuous:bin_method)

  # Save the sensitivity inputs and outputs
  saveRDS(hamby224_sensitivity_inputs, "../../../data/hamby224_sensitivity_inputs.rds")
  saveRDS(hamby224_sensitivity_outputs, "../../../data/hamby224_sensitivity_outputs.rds")
  
} else {
  
  # Load in the sensitivity inputs and outputs
  hamby224_sensitivity_inputs <- readRDS("../../../data/hamby224_sensitivity_inputs.rds")
  hamby224_sensitivity_outputs <- readRDS("../../../data/hamby224_sensitivity_outputs.rds")

}

The code below joins the sensativity outputs with the test data into a dataframe. The dataframe is saved as an .rds file.

# Join the sensativity outputs with the test data if not already saved
if(!file.exists("../../../data/hamby224_sensitivity_joined.rds")) {

hamby224_sensitivity_joined <- hamby224_sensitivity_outputs %>%
  full_join(hamby224_test %>% na.omit() %>% 
                mutate(case = as.character(case)), by = "case") %>%
    mutate(case = factor(case)) %>%
    select(case, model_r2:feature_weight, bin_continuous:rep, 
           set:land2, rfscore, samesource) %>%
    mutate(diff = rfscore - model_prediction,
         nbins = as.numeric(as.character(nbins)),
         situation = ifelse(bin_continuous == TRUE & bin_method == "quantile_bins", 
                            sprintf("%.0f quantile", nbins),
                            ifelse(bin_continuous == TRUE & bin_method == "equally_spaced",
                                   sprintf("%.0f equally spaced", nbins),
                                   ifelse(bin_continuous == TRUE & bin_method == "tree" &
                                            response == "samesource",
                                          sprintf("%.0f samesource tree", nbins),
                                          ifelse(bin_continuous == TRUE & bin_method == "tree" &
                                            response == "rfscore",
                                            sprintf("%.0f rfscore tree", nbins),
                                            ifelse(bin_continuous == FALSE & 
                                                   use_density == TRUE, 
                                                   "kernel density", 
                                                   "normal approximation"))))) %>%
           fct_relevel("2 quantile", "3 quantile", "4 quantile",
                       "5 quantile", "6 quantile", "2 equally spaced",
                       "3 equally spaced", "4 equally spaced",
                       "5 equally spaced", "6 equally spaced",
                       "2 samesource tree", "3 samesource tree",
                       "4 samesource tree", "5 samesource tree",
                       "6 samesource tree")) %>%
    mutate(bin_situation = ifelse(bin_method == "quantile_bins" & 
                                  bin_continuous == TRUE,
                                  "quantile",
                                  ifelse(bin_method == "equally_spaced" & 
                                         bin_continuous == TRUE,
                                         "equally spaced", 
                                         ifelse(bin_method == "tree" & 
                                                bin_continuous == TRUE & 
                                                response == "samesource",
                                                "samesource tree", 
                                                ifelse(bin_method == "tree" & 
                                                       bin_continuous == TRUE & 
                                                       response == "rfscore",
                                                       "rfscore tree", 
                                                       ifelse(bin_continuous == FALSE & 
                                                       use_density == TRUE, 
                                                       "kernel density", 
                                                       "normal approximation")))))) %>%
    mutate(bin_situation = factor(bin_situation))

  # Save the joined test and explained dataframes
  saveRDS(hamby224_sensitivity_joined, "../../../data/hamby224_sensitivity_joined.rds")

} else {

  # Load in the sensitivity inputs and outputs
  hamby224_sensitivity_joined <- readRDS("../../../data/hamby224_sensitivity_joined.rds")

}

Logistic Regression

# Load the model
logistic_mains <- readRDS("../../../data/logistic_mains.rds")

One Implementataion

The functions lime and explain from the lime package are applied below to the logisitc regression model with only main effects.

# Create or load the lime and explain objects for the main effects logisitic regression model
if(!file.exists("../../../data/lime_explain_mains.rds")) {
  
  # Set a seed
  set.seed(20190226)
  
  # Apply lime
  lime_mains <- lime(x = hamby173and252_train %>%
                       select(rf_features), 
                     model = logistic_mains)
  
  # Apply explain
  explain_mains <- explain(x = hamby224_test %>%
                             select(rf_features) %>% 
                             na.omit(), 
                           explainer = lime_mains, 
                           n_labels = 1,
                           n_features = 3)
  
  # Join the lime and explain objects in a list
  lime_explain_mains <- list(lime = lime_mains, explain = explain_mains)
  
  # Save the lime and explain objects
  saveRDS(lime_explain_mains, "../../../data/lime_explain_mains.rds")

} else {
  
  # Load the lime and explain objects
  lime_explain_mains <- readRDS("../../../data/lime_explain_mains.rds")
  
}

Multiple Implementations

The code below applies the LIME algorithm to the main effects logisitic regression model one time for the following different input cases:

  • 2 to 6 quantile bins
  • 2 to 6 equally spaced bins
  • 2 to 6 tree based bins with samesource as the response variable
  • 2 to 6 tree based bins with rfscore as the response variable
  • kernel density estimation
  • normal density approximation

It applies LIME using my function run_lime to specify the different input values. The outputs from the lime function are combined into a list, and the outputs from the explain function are combined into dataframe. Both are saved as .rds files.

# Apply the run_lime function if the lime results file does not already exist
if(!file.exists("../../../data/logistic_lime_inputs.rds")) {
  
  # Specify the input options to use with lime
  logistic_lime_inputs <- list(bin_continuous = c(rep(TRUE, 20), 
                                                  rep(FALSE, 2)),
                               quantile_bins = c(rep(TRUE, 5), 
                                                 rep(FALSE, 5), 
                                                 rep(TRUE, 12)),
                               nbins = c(rep(2:6, 4), 
                                         rep(4, 2)),
                               use_density = c(rep(TRUE, 20), TRUE, FALSE),
                               bin_method = c(rep("quantile_bins", 5),
                                              rep("equally_spaced", 5),
                                              rep("tree", 10),
                                              rep("quantile_bins", 2)),
                               response = c(rep(NA, 10), 
                                            rep("samesource", 5), 
                                            rep("rfscore", 5), 
                                            rep(NA, 2)))
  
  # Tell R to run the upcoming code in parallel
  plan(multiprocess)
  
  # Apply lime to the full training data with the specified input options
  logistic_lime_explain <- future_pmap(.l = logistic_lime_inputs,
             .f = run_lime, # run_lime is one of my helper functions
             features = rf_features,
             train = hamby173and252_train,
             test = hamby224_test %>% 
               arrange(case) %>% 
               select(rf_features) %>% 
               na.omit(),
             rfmodel = logistic_mains,
             label = "TRUE",
             nfeatures = 3,
             seed = TRUE)
  
  # Separate the lime and explain function results from the full data
  logistic_lime <- map(logistic_lime_explain, function(list) list$lime)
  logistic_explain <- map_df(logistic_lime_explain, function(list) list$explain)
  
  # Name the items in the lime list
  names(logistic_lime) <- map_chr(1:22, function(case) 
      sprintf("case: bin_continuous = %s, quantile_bins = %s, nbins = %0.f, use_density = %s, bin_method = %s, response = %s",
              logistic_lime_inputs$bin_continuous[case],
              logistic_lime_inputs$quantile_bins[case],
              logistic_lime_inputs$nbins[case],
              logistic_lime_inputs$use_density[case],
              logistic_lime_inputs$bin_method[case],
              logistic_lime_inputs$response[case]))

  # Turn the lime input options into a dataframe before saving it
  logistic_lime_inputs <- logistic_lime_inputs %>%
    unlist() %>%
    matrix(ncol = length(logistic_lime_inputs), 
           dimnames = list(NULL, names(logistic_lime_inputs))) %>%
    as.data.frame() %>%
    mutate(case = 1:length(logistic_lime_inputs$quantile_bins)) %>%
    select(case, bin_continuous:response)
  
  # Save the lime objects
  saveRDS(logistic_lime_inputs, "../../../data/logistic_lime_inputs.rds")
  saveRDS(logistic_lime, "../../../data/logistic_lime.rds")
  saveRDS(logistic_explain, "../../../data/logistic_explain.rds")
  
} else {
  
  # Load in the lime objects
  logistic_lime_inputs <- readRDS("../../../data/logistic_lime_inputs.rds")
  logistic_lime <- readRDS("../../../data/logistic_lime.rds")
  logistic_explain <- readRDS("../../../data/logistic_explain.rds")
  
}
# Create the test_explain combined data if the file does not already exist
if(!file.exists("../../../data/logistic_test_explain.rds")) {
  
  # Join the data and the explanations and edit and add additional variables
  logistic_test_explain <- hamby224_test %>%
    mutate(case = as.character(case)) %>%
    full_join(logistic_explain, by = "case") %>%
    mutate(case = factor(case),
           feature_desc = factor(feature_desc),
           feature = factor(feature),
           nbins = factor(nbins)) %>%
    arrange(nbins) %>%
    mutate(nbins = as.numeric(as.character(nbins)),
           situation = ifelse(bin_continuous == TRUE & bin_method == "quantile_bins", 
                              sprintf("%.0f quantile", nbins),
                              ifelse(bin_continuous == TRUE & bin_method == "equally_spaced",
                                     sprintf("%.0f equally spaced", nbins),
                                     ifelse(bin_continuous == TRUE & bin_method == "tree" &
                                              response == "samesource",
                                            sprintf("%.0f samesource tree", nbins),
                                            ifelse(bin_continuous == TRUE & bin_method == "tree" &
                                              response == "rfscore",
                                              sprintf("%.0f rfscore tree", nbins),
                                              ifelse(bin_continuous == FALSE & 
                                                     use_density == TRUE, 
                                                     "kernel density", 
                                                     "normal approximation"))))) %>%
             fct_relevel("2 quantile", "3 quantile", "4 quantile",
                         "5 quantile", "6 quantile", "2 equally spaced",
                         "3 equally spaced", "4 equally spaced",
                         "5 equally spaced", "6 equally spaced",
                         "2 samesource tree", "3 samesource tree",
                         "4 samesource tree", "5 samesource tree",
                         "6 samesource tree")) %>%
    mutate(bin_situation = ifelse(bin_method == "quantile_bins" & 
                                  bin_continuous == TRUE,
                                  "quantile",
                                  ifelse(bin_method == "equally_spaced" & 
                                         bin_continuous == TRUE,
                                         "equally spaced", 
                                         ifelse(bin_method == "tree" & 
                                                bin_continuous == TRUE & 
                                                response == "samesource",
                                                "samesource tree", 
                                                ifelse(bin_method == "tree" & 
                                                       bin_continuous == TRUE & 
                                                       response == "rfscore",
                                                       "rfscore tree", 
                                                       ifelse(bin_continuous == FALSE & 
                                                       use_density == TRUE, 
                                                       "kernel density", 
                                                       "normal approximation")))))) %>%
    mutate(bin_situation = factor(bin_situation))

  # Save the combined test and explain data
  saveRDS(logistic_test_explain, "../../../data/logistic_test_explain.rds")
   
} else {
  
  # Load in the data
  logistic_test_explain <- readRDS("../../../data/logistic_test_explain.rds")
  
}

This code creates and saves a dataset with summaries of the explanations.

# Create the lime comparison data if the file does not already exist
if(!file.exists("../../../data/logistic_lime_comparisons.rds")) {
  
  # Create a data frame with the interesting information relating to the different
  # evaluations of lime and compute the difference and mean between the rf and rr
  # model predictions
  logistic_lime_comparisons <- logistic_test_explain %>%
    select(-data, -prediction) %>%
    group_by(case, bin_continuous, quantile_bins, nbins, use_density, bin_method, response) %>%
    slice(1) %>%
    ungroup() %>%
    select(situation, bin_situation, bin_method, bin_continuous, quantile_bins, response, 
           nbins, use_density, set, case, rf_features, rfscore, model_prediction, model_r2) %>%
    mutate(diff = rfscore - model_prediction,
           mean = (rfscore + model_prediction) / 2)
    
  
  # Save the lime comparison data frame
  saveRDS(logistic_lime_comparisons, "../../../data/logistic_lime_comparisons.rds")
  
} else {
  
  # Load in the lime comparison data frame
  logistic_lime_comparisons <- readRDS("../../../data/logistic_lime_comparisons.rds")
  
}

Session Info

sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] assertthat_0.2.1       tree_1.0-40            forcats_0.5.0         
##  [4] stringr_1.4.0          dplyr_1.0.2            purrr_0.3.4           
##  [7] readr_1.3.1            tidyr_1.1.2            tibble_3.0.3          
## [10] ggplot2_3.3.2.9000     tidyverse_1.3.0        randomForest_4.6-14   
## [13] gretchenalbrecht_0.1.0 furrr_0.1.0            future_1.18.0         
## [16] lime_0.5.1             bulletr_0.1.0.9000    
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_1.4-1        ellipsis_0.3.1          class_7.3-17           
##   [4] fs_1.5.0                rstudioapi_0.11         listenv_0.8.0          
##   [7] prodlim_2019.11.13      fansi_0.4.1             lubridate_1.7.9        
##  [10] xml2_1.3.2              codetools_0.2-16        splines_4.0.2          
##  [13] robustbase_0.93-6       knitr_1.29              shinythemes_1.1.2      
##  [16] jsonlite_1.7.1          pROC_1.16.2             caret_6.0-86           
##  [19] broom_0.7.0             dbplyr_1.4.4            shiny_1.5.0            
##  [22] compiler_4.0.2          httr_1.4.2              backports_1.1.10       
##  [25] Matrix_1.2-18           fastmap_1.0.1           lazyeval_0.2.2         
##  [28] cli_2.0.2               later_1.1.0.1           htmltools_0.5.0        
##  [31] tools_4.0.2             gtable_0.3.0            glue_1.4.2             
##  [34] reshape2_1.4.4          Rcpp_1.0.5              cellranger_1.1.0       
##  [37] vctrs_0.3.4             nlme_3.1-148            iterators_1.0.12       
##  [40] crosstalk_1.1.0.1       timeDate_3043.102       gower_0.2.2            
##  [43] xfun_0.17               globals_0.12.5          rvest_0.3.6            
##  [46] mime_0.9                miniUI_0.1.1.1          lifecycle_0.2.0        
##  [49] DEoptimR_1.0-8          MASS_7.3-51.6           zoo_1.8-8              
##  [52] scales_1.1.1            ipred_0.9-9             hms_0.5.3              
##  [55] promises_1.1.1          parallel_4.0.2          yaml_2.2.1             
##  [58] curl_4.3                rpart_4.1-15            stringi_1.5.3          
##  [61] foreach_1.5.0           TTR_0.24.2              manipulateWidget_0.10.1
##  [64] lava_1.6.7              shape_1.4.4             rlang_0.4.7            
##  [67] pkgconfig_2.0.3         rgl_0.100.54            evaluate_0.14          
##  [70] lattice_0.20-41         recipes_0.1.13          htmlwidgets_1.5.1      
##  [73] tidyselect_1.1.0        plyr_1.8.6              magrittr_1.5           
##  [76] R6_2.4.1                generics_0.0.2          DBI_1.1.0              
##  [79] pillar_1.4.6            haven_2.3.1             withr_2.2.0            
##  [82] xts_0.12.1              survival_3.1-12         nnet_7.3-14            
##  [85] modelr_0.1.8            crayon_1.3.4            plotly_4.9.2.1         
##  [88] rmarkdown_2.3           grid_4.0.2              readxl_1.3.1           
##  [91] data.table_1.13.0       blob_1.2.1              ModelMetrics_1.2.2.2   
##  [94] reprex_0.3.0            digest_0.6.25           webshot_0.5.2          
##  [97] xtable_1.8-4            httpuv_1.5.4            stats4_4.0.2           
## [100] munsell_0.5.0           glmnet_4.0-2            viridisLite_0.3.0      
## [103] smoother_1.1