We have finally identified the data that was most likely used to train rtrees (CCFs_withlands). This data is different from the one we previously believed to be the one that trained rtrees (hamby_comparisons; previously called features-hamby173and252.csv). It is of interest to keep a record of how the LIME explanations on a testing dataset (Hamby 224 sets 1 and 11) vary across the various training datasets and models. This journal applies LIME to various versions of a random forest trained on the Hamby bullet signature comparisons data.

library(dplyr)
library(forcats)
library(ggplot2)
library(gretchenalbrecht)
library(limeaid)

Data

Load the filtered version of the hamby_comparisons data: - LEAs with tank rash in Eric’s paper filtered out - Contains same number of rows as rtrees - Prepared in journal 09-identifying-rtrees-training-data.Rmd

hamby_comparisons_filtered <- read.csv("../../../data/hamby_comparisons_filtered.csv")

Load the cleaned version of the CCFs_withlands data (prepared in 09-identifying-rtrees-training-data.Rmd):

CCFs_withlands_clean <- read.csv("../../../data/CCFs_withlands_clean.csv")

Load the bullet testing data using in the LIME diagnostics paper (prepared in https://github.com/goodekat/LIME-diagnostics-paper/blob/master/code/01-raw-file-compression.Rmd):

bullet_test <- read.csv("../../../diagnostics-paper/data/bullet-test.csv")

Obtain the features used to train rtrees for use throughout this journal:

rtrees_features <- rownames(bulletxtrctr::rtrees$importance)

Apply LIME

rtrees with hamby_comparisons

Wrong training data for rtrees

# Create a file path
file_rtrees_hc_explanations = "../../../data/rtrees_explanations_hc.rds"

# Implement and save or load LIME explanations
if (!file.exists(file_rtrees_hc_explanations)) {
  
  # Apply LIME
  rtrees_hc_explanations <- apply_lime(
    train = hamby_comparisons_filtered %>% select(all_of(rtrees_features)),
    test = bullet_test %>% select(all_of(rtrees_features)),
    model = bulletxtrctr::rtrees,
    label = as.character(TRUE),
    n_features = 3,
    sim_method = c('quantile_bins', 'equal_bins',
                   'kernel_density', 'normal_approx'),
    nbins = 2:6,
    feature_select = "auto",
    dist_fun = "gower",
    kernel_width = NULL,
    gower_pow = c(0.5, 1, 10),
    return_perms = FALSE,
    all_fs = FALSE,
    seed = 20190914)

  # Save the explanations
  saveRDS(object = rtrees_hc_explanations, file = file_rtrees_hc_explanations)
  
} else {

  # Load the explanations
  rtrees_hc_explanations <- readRDS(file_rtrees_hc_explanations)

}

rtrees with CCFs_withlands

Probably the training data for rtrees

# Create a file path
file_rtrees_ccfwl_explanations = "../../../data/rtrees_explanations_ccfwl.rds"

# Implement and save or load LIME explanations
if (!file.exists(file_rtrees_ccfwl_explanations)) {
  
  # Apply LIME
  rtrees_ccfwl_explanations <- apply_lime(
    train = CCFs_withlands_clean %>% select(all_of(rtrees_features)),
    test = bullet_test %>% select(all_of(rtrees_features)),
    model = bulletxtrctr::rtrees,
    label = as.character(TRUE),
    n_features = 3,
    sim_method = c('quantile_bins', 'equal_bins',
                   'kernel_density', 'normal_approx'),
    nbins = 2:6,
    feature_select = "auto",
    dist_fun = "gower",
    kernel_width = NULL,
    gower_pow = c(0.5, 1, 10),
    return_perms = FALSE,
    all_fs = FALSE,
    seed = 20190914)

  # Save the explanations
  saveRDS(object = rtrees_ccfwl_explanations, file = file_rtrees_ccfwl_explanations)
  
} else {

  # Load the explanations
  rtrees_ccfwl_explanations <- readRDS(file_rtrees_ccfwl_explanations)

}

New RF with CCFs_withlands

Using the data that was probably used to train rtrees to train a new random forest (see the Rnw of the LIME diagnostics paper for the code):

bullet_explain <- readRDS("../../../diagnostics-paper/data/bullet-explain.rds")

Feature Heatmaps

rtrees with hamby_comparisons

# Create a feature heatmap
plot_feature_heatmap(
  rtrees_hc_explanations$explain %>%
    mutate(
      label = as.factor(label),
      feature = fct_recode(
        feature,
        "Rough Correlation" = "rough_cor",
        "Consecutively Matching Striae" = "cms",
        "Distance" = "D",
        "Matches" = "matches",
        "Mismatches" = "mismatches",
        "Non-Consecutively Matching Striae" = "non_cms",
        "Cross Correlation Function" = "ccf",
        "Sum of Peaks" = "sum_peaks",
        "Distance Standard Deviation" = "sd_D"
      )
    ),
  facet_var = bullet_test %>%
    mutate(samesource = fct_recode(
      factor(samesource),
      "Match" = "TRUE",
      "Non-Match" = "FALSE"
    )) %>%
    pull(samesource),
  order_method = "PCA"
) +
  scale_fill_gretchenalbrecht(palette = "last_rays", discrete = TRUE) +
  scale_color_gretchenalbrecht(palette = "last_rays",
                               discrete = TRUE,
                               reverse = TRUE) +
  theme_bw() +
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    strip.background = element_rect(color = "white", fill = "white"),
    strip.text.y.right = element_text(angle = 0),
    legend.position = "bottom",
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
  guides(fill = guide_legend(nrow = 3)) +
  labs(y = "Case", color = "Complex Model Feature", fill = "Complex Model Feature")

rtrees with CCFs_withlands

# Create a feature heatmap
plot_feature_heatmap(
  rtrees_ccfwl_explanations$explain %>%
    mutate(
      label = as.factor(label),
      feature = fct_recode(
        feature,
        "Rough Correlation" = "rough_cor",
        "Consecutively Matching Striae" = "cms",
        "Distance" = "D",
        "Matches" = "matches",
        "Mismatches" = "mismatches",
        "Non-Consecutively Matching Striae" = "non_cms",
        "Cross Correlation Function" = "ccf",
        "Sum of Peaks" = "sum_peaks",
        "Distance Standard Deviation" = "sd_D"
      )
    ),
  facet_var = bullet_test %>%
    mutate(samesource = fct_recode(
      factor(samesource),
      "Match" = "TRUE",
      "Non-Match" = "FALSE"
    )) %>%
    pull(samesource),
  order_method = "PCA"
) +
  scale_fill_gretchenalbrecht(palette = "last_rays", discrete = TRUE) +
  scale_color_gretchenalbrecht(palette = "last_rays",
                               discrete = TRUE,
                               reverse = TRUE) +
  theme_bw() +
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    strip.background = element_rect(color = "white", fill = "white"),
    strip.text.y.right = element_text(angle = 0),
    legend.position = "bottom",
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
  guides(fill = guide_legend(nrow = 3)) +
  labs(y = "Case", color = "Complex Model Feature", fill = "Complex Model Feature")

New RF with CCFs_withlands

# Create a feature heatmap
plot_feature_heatmap(
  bullet_explain %>%
    mutate(
      label = as.factor(label),
      feature = fct_recode(
        feature,
        "Rough Correlation" = "rough_cor",
        "Consecutively Matching Striae" = "cms",
        "Distance" = "D",
        "Matches" = "matches",
        "Mismatches" = "mismatches",
        "Non-Consecutively Matching Striae" = "non_cms",
        "Cross Correlation Function" = "ccf",
        "Sum of Peaks" = "sum_peaks",
        "Distance Standard Deviation" = "sd_D"
      )
    ),
  facet_var = bullet_test %>%
    mutate(samesource = fct_recode(
      factor(samesource),
      "Match" = "TRUE",
      "Non-Match" = "FALSE"
    )) %>%
    pull(samesource),
  order_method = "PCA"
) +
  scale_fill_gretchenalbrecht(palette = "last_rays", discrete = TRUE) +
  scale_color_gretchenalbrecht(palette = "last_rays",
                               discrete = TRUE,
                               reverse = TRUE) +
  theme_bw() +
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    strip.background = element_rect(color = "white", fill = "white"),
    strip.text.y.right = element_text(angle = 0),
    legend.position = "bottom",
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
  guides(fill = guide_legend(nrow = 3)) +
  labs(y = "Case", color = "Complex Model Feature", fill = "Complex Model Feature")

Assessment Metric Plots

rtrees with hamby_comparisons

plot_metrics(
  rtrees_hc_explanations$explain %>% mutate(label = as.factor(label)),
  add_lines = TRUE,
  line_alpha = 0.75
) +
  theme_bw() +
  theme(
    strip.background = element_rect(color = "white", fill = "white"),
    strip.placement = "outside"
  )

rtrees with CCFs_withlands

plot_metrics(
  rtrees_ccfwl_explanations$explain %>% mutate(label = as.factor(label)),
  add_lines = TRUE,
  line_alpha = 0.75
) +
  theme_bw() +
  theme(
    strip.background = element_rect(color = "white", fill = "white"),
    strip.placement = "outside"
  )

New RF with CCFs_withlands

plot_metrics(
  bullet_explain %>% mutate(label = as.factor(label)),
  add_lines = TRUE,
  line_alpha = 0.75
) +
  theme_bw() +
  theme(
    strip.background = element_rect(color = "white", fill = "white"),
    strip.placement = "outside"
  )

Session Info

sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] limeaid_0.0.1          gretchenalbrecht_0.1.0 ggplot2_3.3.2.9000    
## [4] forcats_0.5.0          dplyr_1.0.2           
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_1.4-1        ellipsis_0.3.1          listenv_0.8.0          
##   [4] furrr_0.1.0             farver_2.0.3            x3ptools_0.0.2.9000    
##   [7] fansi_0.4.1             mvtnorm_1.1-1           xml2_1.3.2             
##  [10] codetools_0.2-16        splines_4.0.2           lime_0.5.1             
##  [13] knitr_1.29              shinythemes_1.1.2       readbitmap_0.1.5       
##  [16] jsonlite_1.7.1          cluster_2.1.0           png_0.1-7              
##  [19] shiny_1.5.0             readr_1.3.1             compiler_4.0.2         
##  [22] backports_1.1.10        assertthat_0.2.1        bmp_0.3                
##  [25] Matrix_1.2-18           fastmap_1.0.1           cli_2.0.2              
##  [28] later_1.1.0.1           htmltools_0.5.0         tools_4.0.2            
##  [31] igraph_1.2.5            gtable_0.3.0            glue_1.4.2             
##  [34] Rcpp_1.0.5              grooveFinder_0.0.1      vctrs_0.3.4            
##  [37] gdata_2.18.0            iterators_1.0.12        crosstalk_1.1.0.1      
##  [40] gbRd_0.4-11             gower_0.2.2             xfun_0.17              
##  [43] stringr_1.4.0           globals_0.12.5          testthat_2.3.2         
##  [46] mime_0.9                miniUI_0.1.1.1          lifecycle_0.2.0        
##  [49] gtools_3.8.2            dendextend_1.14.0       future_1.18.0          
##  [52] MASS_7.3-51.6           zoo_1.8-8               scales_1.1.1           
##  [55] TSP_1.1-10              hms_0.5.3               promises_1.1.1         
##  [58] parallel_4.0.2          yaml_2.2.1              curl_4.3               
##  [61] gridExtra_2.3           stringi_1.5.3           imager_0.42.3          
##  [64] gclus_1.3.2             foreach_1.5.0           checkmate_2.0.0        
##  [67] seriation_1.2-8         TTR_0.24.2              tiff_0.1-5             
##  [70] caTools_1.18.0          bibtex_0.4.2.2          manipulateWidget_0.10.1
##  [73] shape_1.4.4             Rdpack_1.0.0            rlang_0.4.7            
##  [76] pkgconfig_2.0.3         bitops_1.0-6            rgl_0.100.54           
##  [79] evaluate_0.14           lattice_0.20-41         purrr_0.3.4            
##  [82] bulletxtrctr_0.2.0      htmlwidgets_1.5.1       labeling_0.3           
##  [85] tidyselect_1.1.0        magrittr_1.5            R6_2.4.1               
##  [88] gplots_3.0.4            generics_0.0.2          pillar_1.4.6           
##  [91] withr_2.2.0             xts_0.12.1              survival_3.1-12        
##  [94] tibble_3.0.3            crayon_1.3.4            KernSmooth_2.23-17     
##  [97] rmarkdown_2.3           viridis_0.5.1           jpeg_0.1-8.1           
## [100] locfit_1.5-9.4          grid_4.0.2              digest_0.6.25          
## [103] webshot_0.5.2           xtable_1.8-4            tidyr_1.1.2            
## [106] httpuv_1.5.4            munsell_0.5.0           bulletcp_1.0.0         
## [109] glmnet_4.0-2            registry_0.5-1          viridisLite_0.3.0      
## [112] smoother_1.1