knitr::opts_chunk$set(echo = TRUE, include = TRUE, message=FALSE, warning=FALSE)
library(tidyverse) # for data manipulation
library(googlesheets4) # Read raw data stored in a google sheet document
library(vcd) # Calculation reliability statistics, e.g. Kappa
library(gt) # This packages supports the creation of tables
fct_label_inorder <- c("Equivalence of meaning; lexical, as well as conceptual",
"Equivalence of meaning, but with synonymy.",
"Source concept is broader and has a less specific meaning than the target concept",
"Source concept is narrower and has a more specific meaning than the target concept",
"No map is possible")
# Data preprocessing
# Among other tasks: relabels factors
# concepts <- read_csv(file = "./data/mapping-results.csv", col_types = cols(.default = "c")) %>%
concepts <- read_csv(file = "~/Downloads/überarbeitet-snomed-mapping - Tabellenblatt1 (2).csv", col_types = cols(.default = "c")) %>%
mutate_at(.vars = c("equ_jens", "equ_mareike", "equi_final"),
.funs = ~ factor(x = .x, levels = fct_label_inorder, labels = fct_label_inorder, ordered = TRUE))
Reliabilität Mapping
# Restructure/ Pivot Data
mapping <- concepts %>%
select(id, id_mapper, kapitel, kapitelbezeichnung, bezeichnung, snomed_code, kapitel) %>%
pivot_wider(id_cols = c("id", "kapitelbezeichnung", "bezeichnung", "kapitel"),
names_from = "id_mapper",
values_from = "snomed_code")
# Caluclate Kappa values
# Kappa Overall
kappa_overall <- irr::kappam.fleiss(mapping[, c("9f6", "83f", "605")])
# Kappa by Chapter
by_chapter <- mapping %>%
select(-id, -bezeichnung) %>%
mutate_at("kapitelbezeichnung", stringr::str_to_title) %>%
nest(data = c(`9f6`, `83f`, `605`)) %>%
mutate(kappa = map(data, ~ irr::kappam.fleiss(ratings = .x))) %>%
mutate(kappa_val = map_dbl(kappa, pluck, "value")) %>%
mutate(kappa_subjects = map_dbl(kappa, pluck, "subjects")) %>%
mutate(group = "By Chapter")
overall <- tibble(group = "Overall",
kappa_val = kappa_overall$value,
kappa_subjects = kappa_overall$subjects)
# Calculate Agreement
rater <- mapping %>%
set_names(c("id", "kapitelbezeichnung", "bezeichnung", "kapitel", "phil", "lisa", "anne"))
agreement <- rater %>%
mutate(phil_anne = phil == anne) %>%
mutate(anne_lisa = anne == lisa) %>%
mutate(phil_lisa = phil == lisa) %>%
mutate(agreement = (phil_anne + anne_lisa + phil_lisa) / 3)
overall <- agreement %>%
summarise(agreement = mean(agreement)) %>%
bind_cols(overall)
by_chapter <- agreement %>%
mutate_at("kapitelbezeichnung", stringr::str_to_title) %>%
group_by(kapitelbezeichnung) %>%
summarise(agreement = mean(agreement)) %>%
full_join(by_chapter, by = "kapitelbezeichnung") %>%
select(kapitelbezeichnung, data, kappa, agreement, everything())
mapping_results <- by_chapter %>%
select(-data, -kappa) %>%
bind_rows(overall)
mapping_results %>%
mutate_at("kapitelbezeichnung", stringr::str_to_title) %>%
set_names(c("Chapter", "Percent", "Chapter_Nr", "Fleiss-Kappa", "Number of Items", "Group")) %>%
mutate(Chapter = glue::glue("{Chapter_Nr} - {Chapter}", .na = NULL)) %>%
select(-Chapter_Nr) %>%
arrange(Chapter) %>%
mutate_at("Chapter", ~ if_else(condition = is.na(.x), true = "", false = as.character(.x))) %>%
gt::gt(rowname_col = "Chapter", groupname_col = "Group") %>%
gt::fmt_percent(columns = vars("Percent"), decimals = 2) %>%
gt::fmt(columns = vars("Fleiss-Kappa"), fns = function(x) sprintf(x, fmt = '%#.3f')) %>%
gt::tab_style(
style = list(
gt::cell_text(align = "center")
),
locations = gt::cells_body()
) %>%
gt::tab_spanner(label = "Agreement", columns = matches("Percent|Fleiss")) %>%
gt::tab_header(
title = gt::md("Reliability of the Mapping between three Mappers"),
)
Reliability of the Mapping between three Mappers | |||
---|---|---|---|
Agreement | Number of Items | ||
Percent | Fleiss-Kappa | ||
By Chapter | |||
1 - 01 Stammdaten | 60.78% | 0.575 | 34 |
2 - 02 Allgemeinstatus | 75.76% | 0.754 | 66 |
3 - 03 Wundanamnese | 58.33% | 0.568 | 24 |
4 - 04 Wundstatus | 38.60% | 0.366 | 57 |
5 - 05 Diagnostik | 33.33% | 0.280 | 14 |
6 - 06 Therapie | 39.73% | 0.367 | 73 |
Overall | |||
52.36% | 0.512 | 268 |
Reliability of the Equivalence Testing ======================================
overall_equ <- irr::kappam.fleiss(concepts[, c("equ_jens", "equ_mareike")])
by_chapter_equ <- concepts %>%
select(id, kapitelbezeichnung, equ_jens, equ_mareike) %>%
nest(data = c(id, equ_jens, equ_mareike)) %>%
mutate(kappa = map(data, ~ irr::kappam.fleiss(ratings = .x[, c("equ_jens", "equ_mareike")]))) %>%
mutate(kappa_val = map_dbl(kappa, pluck, "value")) %>%
mutate(kappa_subjects = map_dbl(kappa, pluck, "subjects")) %>%
unnest(data) %>%
mutate(agreement = equ_jens == equ_mareike) %>%
group_by(kapitelbezeichnung) %>%
mutate(agreement = sum(agreement) / length(agreement)) %>%
summarise_at(c("kappa_val", "kappa_subjects", "agreement"), .funs = mean) %>%
mutate(Group = "By Chapter")
overall_equ <- tibble(
Chapter = "",
agreement = sum(concepts$equ_jens == concepts$equ_mareike) / nrow(concepts),
kappa_val = overall_equ$value,
kappa_subjects = overall_equ$subjects,
Group = "Overall"
)
by_chapter_equ %>%
rename(Chapter = kapitelbezeichnung) %>%
mutate_at("Chapter", as.character) %>%
bind_rows(overall_equ) %>%
select(Chapter, Percent = agreement, `Fleiss-Kappa` = kappa_val, `Number of Items` = kappa_subjects, Group) %>%
gt::gt(rowname_col = "Chapter", groupname_col = "Group") %>%
gt::fmt_percent(columns = vars("Percent"), decimals = 2) %>%
gt::fmt(columns = vars("Fleiss-Kappa"), fns = function(x) sprintf(x, fmt = '%#.3f')) %>%
gt::tab_style(
style = list(
gt::cell_text(align = "center")
),
locations = gt::cells_body()
) %>%
gt::tab_spanner(label = "Agreement", columns = matches("Percent|Fleiss")) %>%
gt::tab_header(
title = gt::md("Reliability of the Equivalence Rating between two Rater"),
)
Reliability of the Equivalence Rating between two Rater | |||
---|---|---|---|
Agreement | Number of Items | ||
Percent | Fleiss-Kappa | ||
By Chapter | |||
01 Stammdaten | 83.33% | 0.772 | 102 |
02 Allgemeinstatus | 89.90% | 0.835 | 198 |
03 Wundanamnese | 70.83% | 0.583 | 72 |
04 Wundstatus | 74.27% | 0.641 | 171 |
05 Diagnostik | 59.52% | 0.408 | 42 |
06 Therapie | 75.34% | 0.667 | 219 |
Overall | |||
78.48% | 0.702 | 804 |
Coverage ========
coverage_results <- concepts %>%
group_by(id) %>%
filter(!is.na(finales_konzept)) %>%
distinct(id, .keep_all = TRUE) %>%
group_by(equi_final) %>%
count() %>%
ungroup() %>%
mutate(proportion = n / sum(n)) %>%
mutate(map = c("Map", "Map", "Map", "Map", "No Map")) %>%
group_by(map) %>%
mutate(proportion_map = sum(proportion)) %>%
mutate(n_map = sum(n))
# Vielviel Prozent der Konzepte können insgesamt abgedeckt werden?
coverage_results %>%
rename("Number of Items" = n, "Proportion" = proportion) %>%
select(-n_map, -proportion_map) %>%
gt(rowname_col = "equi_final", groupname_col = "map") %>%
fmt_percent(columns = starts_with("proportion"), decimals = 2) %>%
tab_spanner(label = "Equivalence Rating", columns = vars("Number of Items", "Proportion")) %>%
gt::tab_style(
style = list(
gt::cell_text(align = "center")
),
locations = gt::cells_body()
)
Equivalence Rating | ||
---|---|---|
Number of Items | Proportion | |
Map | ||
Equivalence of meaning; lexical, as well as conceptual | 117 | 43.66% |
Equivalence of meaning, but with synonymy. | 63 | 23.51% |
Source concept is broader and has a less specific meaning than the target concept | 6 | 2.24% |
Source concept is narrower and has a more specific meaning than the target concept | 26 | 9.70% |
No Map | ||
No map is possible | 56 | 20.90% |
In this study, three mappers researched SNOMED CT for 268 NKDUC items. Among them, 117 concepts had a direct match in SNOMED CT, 63 concepts had a match but which was classified as a match through synonymy. For 48 NKDUK items, a corresponding concept in the target terminology was missing. For the remainding 39 NKDUC items, the matach was asymmetrical, i.e., the meannig of the source concept was broader or narrower that its corresponding target concept.
results_equivalence <- concepts %>%
filter(!is.na(finales_konzept)) %>%
distinct(id, .keep_all = TRUE) %>%
group_by(equi_final, kapitelbezeichnung) %>%
count() %>%
group_by(kapitelbezeichnung) %>%
mutate(proportion = n / sum(n)) %>%
arrange(kapitelbezeichnung, equi_final) %>%
ungroup()
overall_equ_results <- results_equivalence %>%
group_by(equi_final) %>%
select(-proportion) %>%
summarize(n = sum(n)) %>%
ungroup %>%
mutate(proportion = n / sum(n)) %>%
mutate(kapitelbezeichnung = "Overall") %>%
mutate(section = "Overall") %>%
mutate_at("proportion", ~ scales::percent(x = .x, accuracy = 0.1)) %>%
mutate(n_prop = glue::glue("{proportion} (n={n})")) %>%
select(Overall = n_prop)
results_equivalence %>%
mutate_at("proportion", ~ scales::percent(x = .x, accuracy = 0.1)) %>%
mutate(n_prop = glue::glue("{proportion} (n={n})")) %>%
select(-n, -proportion) %>%
pivot_wider(id_cols = equi_final, names_from = kapitelbezeichnung, values_from = n_prop, values_fill = list(n_prop = "-")) %>%
mutate_all(~ stringr::str_replace(string = .x, pattern = "\\% \\(", replacement = "%\n(")) %>%
bind_cols(overall_equ_results) %>%
select(equi_final, Overall, dplyr::everything()) %>%
gt::gt(rowname_col = "equi_final") %>%
tab_spanner(label = "Chapter", columns = matches("^\\d\\d")) %>%
gt::tab_stubhead(label = "Equivalence Categories") %>%
gt::tab_style(
style = list(
gt::cell_text(align = "center")
),
locations = gt::cells_body()
)
Equivalence Categories | Overall | Chapter | |||||
---|---|---|---|---|---|---|---|
01 Stammdaten | 02 Allgemeinstatus | 03 Wundanamnese | 04 Wundstatus | 05 Diagnostik | 06 Therapie | ||
Equivalence of meaning; lexical, as well as conceptual | 43.7% (n=117) | 23.5% (n=8) | 59.1% (n=39) | 50.0% (n=12) | 43.9% (n=25) | 35.7% (n=5) | 38.4% (n=28) |
Equivalence of meaning, but with synonymy. | 23.5% (n=63) | 26.5% (n=9) | 24.2% (n=16) | 25.0% (n=6) | 21.1% (n=12) | 21.4% (n=3) | 23.3% (n=17) |
Source concept is broader and has a less specific meaning than the target concept | 2.2% (n=6) | 2.9% (n=1) | 3.0% (n=2) | 4.2% (n=1) | 1.8% (n=1) | \- | 1.4% (n=1) |
Source concept is narrower and has a more specific meaning than the target concept | 9.7% (n=26) | 11.8% (n=4) | 3.0% (n=2) | \- | 10.5% (n=6) | 21.4% (n=3) | 15.1% (n=11) |
No map is possible | 20.9% (n=56) | 35.3% (n=12) | 10.6% (n=7) | 20.8% (n=5) | 22.8% (n=13) | 21.4% (n=3) | 21.9% (n=16) |
results_equivalence <- results_equivalence %>%
mutate(equi_simple = as.integer(equi_final)) %>%
mutate(equi_simple = case_when(
equi_simple == 1 ~ "1",
equi_simple == 2 ~ "1",
equi_simple == 3 ~ "2",
equi_simple == 4 ~ "2",
equi_simple == 5 ~ "3"
)) %>%
mutate(equi_simple = factor(x = equi_simple,
levels = 1:3,
labels = c("Semantic Match present (Degree 1 and 2)",
"Semantic Asymmetry present (Degree 3 and 4)",
"Semantic Match absent (Degree 5)"),
ordered = TRUE
))
overall_equ_results_simple <- results_equivalence %>%
group_by(equi_simple) %>%
select(-proportion) %>%
summarize(n = sum(n)) %>%
ungroup %>%
mutate(proportion = n / sum(n)) %>%
mutate(kapitelbezeichnung = "Overall") %>%
mutate(section = "Overall") %>%
mutate_at("proportion", ~ scales::percent(x = .x, accuracy = 0.1)) %>%
mutate(n_prop = glue::glue("{proportion} (n={n})")) %>%
select(Overall = n_prop)
results_equivalence %>%
group_by(equi_simple, kapitelbezeichnung) %>%
summarise(n = sum(n), proportion = sum(proportion)) %>%
mutate_at("proportion", ~ scales::percent(x = .x, accuracy = 0.1)) %>%
mutate(n_prop = glue::glue("{proportion} (n={n})")) %>%
select(-n, -proportion) %>%
pivot_wider(id_cols = equi_simple, names_from = kapitelbezeichnung, values_from = n_prop, values_fill = list(n_prop = "-")) %>%
ungroup() %>%
mutate_all(~ stringr::str_replace(string = .x, pattern = "\\% \\(", replacement = "%\n(")) %>%
bind_cols(overall_equ_results_simple) %>%
select(equi_simple, Overall, dplyr::everything()) %>%
gt::gt(rowname_col = "equi_simple") %>%
tab_spanner(label = "Chapter", columns = matches("^\\d\\d")) %>%
gt::tab_stubhead(label = "Equivalence Categories") %>%
gt::tab_style(
style = list(
gt::cell_text(align = "center")
),
locations = gt::cells_body()
)
Equivalence Categories | Overall | Chapter | |||||
---|---|---|---|---|---|---|---|
01 Stammdaten | 02 Allgemeinstatus | 03 Wundanamnese | 04 Wundstatus | 05 Diagnostik | 06 Therapie | ||
Semantic Match present (Degree 1 and 2) | 67.2% (n=180) | 50.0% (n=17) | 83.3% (n=55) | 75.0% (n=18) | 64.9% (n=37) | 57.1% (n=8) | 61.6% (n=45) |
Semantic Asymmetry present (Degree 3 and 4) | 11.9% (n=32) | 14.7% (n=5) | 6.1% (n=4) | 4.2% (n=1) | 12.3% (n=7) | 21.4% (n=3) | 16.4% (n=12) |
Semantic Match absent (Degree 5) | 20.9% (n=56) | 35.3% (n=12) | 10.6% (n=7) | 20.8% (n=5) | 22.8% (n=13) | 21.4% (n=3) | 21.9% (n=16) |
concepts %>%
group_by(id) %>%
filter(finales_konzept == min(finales_konzept)) %>%
distinct(id, .keep_all = TRUE)
## # A tibble: 0 x 15
## # Groups: id [0]
## # … with 15 variables: id <chr>, id_mapper <chr>, chapter <chr>,
## # kapitelbezeichnung <chr>, bezeichnung <chr>, finaler_beschluss <chr>,
## # snomed_code <chr>, descriptor <chr>, equi_final <ord>,
## # finales_konzept <chr>, equ_jens <ord>, equ_mareike <ord>, agreement <chr>,
## # map <chr>, kapitel <chr>
results_equivalence_agg <- results_equivalence %>%
mutate_at(.vars = "equi_final",
.funs = ~ if_else(
condition = .x == "No map is possible",
true = "No",
false = "Yes"
))
coverage_overall <- results_equivalence_agg %>%
group_by(equi_final) %>%
summarise(n = sum(n), proportion = sum(proportion)) %>%
mutate(kapitelbezeichnung = "") %>%
mutate(group = "Overall") %>%
mutate(proportion = n / sum(n)) %>%
mutate(proportion = glue::glue("{scales::percent(proportion, accuracy = .01)} (n={n})")) %>%
select(-n) %>%
pivot_wider(id_cols = c("kapitelbezeichnung", "group"), names_from = equi_final, values_from = proportion)
coverage_by_chapter <- results_equivalence_agg %>%
group_by(equi_final, kapitelbezeichnung) %>%
summarise(n = sum(n), proportion = sum(proportion)) %>%
mutate(group = "By Chapter") %>%
arrange(kapitelbezeichnung) %>%
mutate(proportion = glue::glue("{scales::percent(proportion, accuracy = .01)} (n={n})")) %>%
mutate_at(c("kapitelbezeichnung", "proportion"), as.character) %>%
select(-n) %>%
pivot_wider(id_cols = c("kapitelbezeichnung", "group"), names_from = equi_final, values_from = proportion)
coverage_by_chapter %>%
bind_rows(coverage_overall) %>%
gt::gt(rowname_col = "kapitelbezeichnung", groupname_col = "group") %>%
gt::tab_spanner(label = "SNOMED CT identified", columns = c("No", "Yes"))
SNOMED CT identified | ||
---|---|---|
No | Yes | |
By Chapter | ||
01 Stammdaten | 35.29% (n=12) | 64.71% (n=22) |
02 Allgemeinstatus | 10.61% (n=7) | 89.39% (n=59) |
03 Wundanamnese | 20.83% (n=5) | 79.17% (n=19) |
04 Wundstatus | 22.81% (n=13) | 77.19% (n=44) |
05 Diagnostik | 21.43% (n=3) | 78.57% (n=11) |
06 Therapie | 21.92% (n=16) | 78.08% (n=57) |
Overall | ||
20.90% (n=56) | 79.10% (n=212) |