We have finally identified the data that was most likely used to train rtrees (CCFs_withlands). This data is different from the one we previously believed to be the one that trained rtrees (hamby_comparisons; previously called features-hamby173and252.csv). It is of interest to keep a record of how the LIME explanations on a testing dataset (Hamby 224 sets 1 and 11) vary across the various training datasets and models. This journal applies LIME to various versions of a random forest trained on the Hamby bullet signature comparisons data.
library(dplyr)
library(forcats)
library(ggplot2)
library(gretchenalbrecht)
library(limeaid)
Load the filtered version of the hamby_comparisons data: - LEAs with tank rash in Eric’s paper filtered out - Contains same number of rows as rtrees - Prepared in journal 09-identifying-rtrees-training-data.Rmd
hamby_comparisons_filtered <- read.csv("../../../data/hamby_comparisons_filtered.csv")
Load the cleaned version of the CCFs_withlands data (prepared in 09-identifying-rtrees-training-data.Rmd):
CCFs_withlands_clean <- read.csv("../../../data/CCFs_withlands_clean.csv")
Load the bullet testing data using in the LIME diagnostics paper (prepared in https://github.com/goodekat/LIME-diagnostics-paper/blob/master/code/01-raw-file-compression.Rmd):
bullet_test <- read.csv("../../../diagnostics-paper/data/bullet-test.csv")
Obtain the features used to train rtrees for use throughout this journal:
rtrees_features <- rownames(bulletxtrctr::rtrees$importance)
Wrong training data for rtrees
# Create a file path
file_rtrees_hc_explanations = "../../../data/rtrees_explanations_hc.rds"
# Implement and save or load LIME explanations
if (!file.exists(file_rtrees_hc_explanations)) {
# Apply LIME
rtrees_hc_explanations <- apply_lime(
train = hamby_comparisons_filtered %>% select(all_of(rtrees_features)),
test = bullet_test %>% select(all_of(rtrees_features)),
model = bulletxtrctr::rtrees,
label = as.character(TRUE),
n_features = 3,
sim_method = c('quantile_bins', 'equal_bins',
'kernel_density', 'normal_approx'),
nbins = 2:6,
feature_select = "auto",
dist_fun = "gower",
kernel_width = NULL,
gower_pow = c(0.5, 1, 10),
return_perms = FALSE,
all_fs = FALSE,
seed = 20190914)
# Save the explanations
saveRDS(object = rtrees_hc_explanations, file = file_rtrees_hc_explanations)
} else {
# Load the explanations
rtrees_hc_explanations <- readRDS(file_rtrees_hc_explanations)
}
Probably the training data for rtrees
# Create a file path
file_rtrees_ccfwl_explanations = "../../../data/rtrees_explanations_ccfwl.rds"
# Implement and save or load LIME explanations
if (!file.exists(file_rtrees_ccfwl_explanations)) {
# Apply LIME
rtrees_ccfwl_explanations <- apply_lime(
train = CCFs_withlands_clean %>% select(all_of(rtrees_features)),
test = bullet_test %>% select(all_of(rtrees_features)),
model = bulletxtrctr::rtrees,
label = as.character(TRUE),
n_features = 3,
sim_method = c('quantile_bins', 'equal_bins',
'kernel_density', 'normal_approx'),
nbins = 2:6,
feature_select = "auto",
dist_fun = "gower",
kernel_width = NULL,
gower_pow = c(0.5, 1, 10),
return_perms = FALSE,
all_fs = FALSE,
seed = 20190914)
# Save the explanations
saveRDS(object = rtrees_ccfwl_explanations, file = file_rtrees_ccfwl_explanations)
} else {
# Load the explanations
rtrees_ccfwl_explanations <- readRDS(file_rtrees_ccfwl_explanations)
}
Using the data that was probably used to train rtrees to train a new random forest (see the Rnw of the LIME diagnostics paper for the code):
bullet_explain <- readRDS("../../../diagnostics-paper/data/bullet-explain.rds")
# Create a feature heatmap
plot_feature_heatmap(
rtrees_hc_explanations$explain %>%
mutate(
label = as.factor(label),
feature = fct_recode(
feature,
"Rough Correlation" = "rough_cor",
"Consecutively Matching Striae" = "cms",
"Distance" = "D",
"Matches" = "matches",
"Mismatches" = "mismatches",
"Non-Consecutively Matching Striae" = "non_cms",
"Cross Correlation Function" = "ccf",
"Sum of Peaks" = "sum_peaks",
"Distance Standard Deviation" = "sd_D"
)
),
facet_var = bullet_test %>%
mutate(samesource = fct_recode(
factor(samesource),
"Match" = "TRUE",
"Non-Match" = "FALSE"
)) %>%
pull(samesource),
order_method = "PCA"
) +
scale_fill_gretchenalbrecht(palette = "last_rays", discrete = TRUE) +
scale_color_gretchenalbrecht(palette = "last_rays",
discrete = TRUE,
reverse = TRUE) +
theme_bw() +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
strip.background = element_rect(color = "white", fill = "white"),
strip.text.y.right = element_text(angle = 0),
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
) +
guides(fill = guide_legend(nrow = 3)) +
labs(y = "Case", color = "Complex Model Feature", fill = "Complex Model Feature")
# Create a feature heatmap
plot_feature_heatmap(
rtrees_ccfwl_explanations$explain %>%
mutate(
label = as.factor(label),
feature = fct_recode(
feature,
"Rough Correlation" = "rough_cor",
"Consecutively Matching Striae" = "cms",
"Distance" = "D",
"Matches" = "matches",
"Mismatches" = "mismatches",
"Non-Consecutively Matching Striae" = "non_cms",
"Cross Correlation Function" = "ccf",
"Sum of Peaks" = "sum_peaks",
"Distance Standard Deviation" = "sd_D"
)
),
facet_var = bullet_test %>%
mutate(samesource = fct_recode(
factor(samesource),
"Match" = "TRUE",
"Non-Match" = "FALSE"
)) %>%
pull(samesource),
order_method = "PCA"
) +
scale_fill_gretchenalbrecht(palette = "last_rays", discrete = TRUE) +
scale_color_gretchenalbrecht(palette = "last_rays",
discrete = TRUE,
reverse = TRUE) +
theme_bw() +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
strip.background = element_rect(color = "white", fill = "white"),
strip.text.y.right = element_text(angle = 0),
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
) +
guides(fill = guide_legend(nrow = 3)) +
labs(y = "Case", color = "Complex Model Feature", fill = "Complex Model Feature")
# Create a feature heatmap
plot_feature_heatmap(
bullet_explain %>%
mutate(
label = as.factor(label),
feature = fct_recode(
feature,
"Rough Correlation" = "rough_cor",
"Consecutively Matching Striae" = "cms",
"Distance" = "D",
"Matches" = "matches",
"Mismatches" = "mismatches",
"Non-Consecutively Matching Striae" = "non_cms",
"Cross Correlation Function" = "ccf",
"Sum of Peaks" = "sum_peaks",
"Distance Standard Deviation" = "sd_D"
)
),
facet_var = bullet_test %>%
mutate(samesource = fct_recode(
factor(samesource),
"Match" = "TRUE",
"Non-Match" = "FALSE"
)) %>%
pull(samesource),
order_method = "PCA"
) +
scale_fill_gretchenalbrecht(palette = "last_rays", discrete = TRUE) +
scale_color_gretchenalbrecht(palette = "last_rays",
discrete = TRUE,
reverse = TRUE) +
theme_bw() +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
strip.background = element_rect(color = "white", fill = "white"),
strip.text.y.right = element_text(angle = 0),
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
) +
guides(fill = guide_legend(nrow = 3)) +
labs(y = "Case", color = "Complex Model Feature", fill = "Complex Model Feature")
plot_metrics(
rtrees_hc_explanations$explain %>% mutate(label = as.factor(label)),
add_lines = TRUE,
line_alpha = 0.75
) +
theme_bw() +
theme(
strip.background = element_rect(color = "white", fill = "white"),
strip.placement = "outside"
)
plot_metrics(
rtrees_ccfwl_explanations$explain %>% mutate(label = as.factor(label)),
add_lines = TRUE,
line_alpha = 0.75
) +
theme_bw() +
theme(
strip.background = element_rect(color = "white", fill = "white"),
strip.placement = "outside"
)
plot_metrics(
bullet_explain %>% mutate(label = as.factor(label)),
add_lines = TRUE,
line_alpha = 0.75
) +
theme_bw() +
theme(
strip.background = element_rect(color = "white", fill = "white"),
strip.placement = "outside"
)
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] limeaid_0.0.1 gretchenalbrecht_0.1.0 ggplot2_3.3.2.9000
## [4] forcats_0.5.0 dplyr_1.0.2
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.4-1 ellipsis_0.3.1 listenv_0.8.0
## [4] furrr_0.1.0 farver_2.0.3 x3ptools_0.0.2.9000
## [7] fansi_0.4.1 mvtnorm_1.1-1 xml2_1.3.2
## [10] codetools_0.2-16 splines_4.0.2 lime_0.5.1
## [13] knitr_1.29 shinythemes_1.1.2 readbitmap_0.1.5
## [16] jsonlite_1.7.1 cluster_2.1.0 png_0.1-7
## [19] shiny_1.5.0 readr_1.3.1 compiler_4.0.2
## [22] backports_1.1.10 assertthat_0.2.1 bmp_0.3
## [25] Matrix_1.2-18 fastmap_1.0.1 cli_2.0.2
## [28] later_1.1.0.1 htmltools_0.5.0 tools_4.0.2
## [31] igraph_1.2.5 gtable_0.3.0 glue_1.4.2
## [34] Rcpp_1.0.5 grooveFinder_0.0.1 vctrs_0.3.4
## [37] gdata_2.18.0 iterators_1.0.12 crosstalk_1.1.0.1
## [40] gbRd_0.4-11 gower_0.2.2 xfun_0.17
## [43] stringr_1.4.0 globals_0.12.5 testthat_2.3.2
## [46] mime_0.9 miniUI_0.1.1.1 lifecycle_0.2.0
## [49] gtools_3.8.2 dendextend_1.14.0 future_1.18.0
## [52] MASS_7.3-51.6 zoo_1.8-8 scales_1.1.1
## [55] TSP_1.1-10 hms_0.5.3 promises_1.1.1
## [58] parallel_4.0.2 yaml_2.2.1 curl_4.3
## [61] gridExtra_2.3 stringi_1.5.3 imager_0.42.3
## [64] gclus_1.3.2 foreach_1.5.0 checkmate_2.0.0
## [67] seriation_1.2-8 TTR_0.24.2 tiff_0.1-5
## [70] caTools_1.18.0 bibtex_0.4.2.2 manipulateWidget_0.10.1
## [73] shape_1.4.4 Rdpack_1.0.0 rlang_0.4.7
## [76] pkgconfig_2.0.3 bitops_1.0-6 rgl_0.100.54
## [79] evaluate_0.14 lattice_0.20-41 purrr_0.3.4
## [82] bulletxtrctr_0.2.0 htmlwidgets_1.5.1 labeling_0.3
## [85] tidyselect_1.1.0 magrittr_1.5 R6_2.4.1
## [88] gplots_3.0.4 generics_0.0.2 pillar_1.4.6
## [91] withr_2.2.0 xts_0.12.1 survival_3.1-12
## [94] tibble_3.0.3 crayon_1.3.4 KernSmooth_2.23-17
## [97] rmarkdown_2.3 viridis_0.5.1 jpeg_0.1-8.1
## [100] locfit_1.5-9.4 grid_4.0.2 digest_0.6.25
## [103] webshot_0.5.2 xtable_1.8-4 tidyr_1.1.2
## [106] httpuv_1.5.4 munsell_0.5.0 bulletcp_1.0.0
## [109] glmnet_4.0-2 registry_0.5-1 viridisLite_0.3.0
## [112] smoother_1.1