View on GitHub

snomed-nkduc

The data and the analysis of our SNOMED CT mapping project. In this project, we mapped clinical concepts of the NKDUC (National Consensus for the Documentation of Leg Ulcers) with the international clinical terminology SNOMED CT.

knitr::opts_chunk$set(echo = TRUE, include = TRUE, message=FALSE, warning=FALSE)
library(tidyverse) # for data manipulation
library(googlesheets4) # Read raw data stored in a google sheet document
library(vcd) # Calculation reliability statistics, e.g. Kappa
library(gt) # This packages supports the creation of tables

fct_label_inorder <- c("Equivalence of meaning; lexical, as well as conceptual",
                       "Equivalence of meaning, but with synonymy.",
                       "Source concept is broader and has a less specific meaning than the target concept",
                       "Source concept is narrower and has a more specific meaning than the target concept",
                       "No map is possible")

# Data preprocessing
# Among other tasks: relabels factors
# concepts <- read_csv(file = "./data/mapping-results.csv", col_types = cols(.default = "c")) %>% 
concepts <- read_csv(file = "~/Downloads/überarbeitet-snomed-mapping - Tabellenblatt1 (2).csv", col_types = cols(.default = "c")) %>% 
  mutate_at(.vars = c("equ_jens", "equ_mareike", "equi_final"), 
            .funs = ~ factor(x = .x, levels = fct_label_inorder, labels = fct_label_inorder, ordered = TRUE))

Reliabilität Mapping

# Restructure/ Pivot Data

mapping <- concepts %>% 
  select(id, id_mapper, kapitel, kapitelbezeichnung, bezeichnung, snomed_code, kapitel) %>% 
  pivot_wider(id_cols = c("id", "kapitelbezeichnung", "bezeichnung", "kapitel"),
              names_from = "id_mapper",
              values_from = "snomed_code")

# Caluclate Kappa values

# Kappa Overall
kappa_overall <- irr::kappam.fleiss(mapping[, c("9f6", "83f", "605")])

# Kappa by Chapter
by_chapter <- mapping %>% 
  select(-id, -bezeichnung) %>% 
  mutate_at("kapitelbezeichnung", stringr::str_to_title) %>% 
  nest(data = c(`9f6`, `83f`, `605`)) %>% 
  mutate(kappa = map(data, ~ irr::kappam.fleiss(ratings = .x))) %>% 
  mutate(kappa_val = map_dbl(kappa, pluck, "value")) %>% 
  mutate(kappa_subjects = map_dbl(kappa, pluck, "subjects")) %>% 
  mutate(group = "By Chapter")

overall <- tibble(group = "Overall",
       kappa_val = kappa_overall$value,
       kappa_subjects = kappa_overall$subjects) 

# Calculate Agreement
rater <- mapping %>% 
  set_names(c("id", "kapitelbezeichnung", "bezeichnung", "kapitel", "phil", "lisa", "anne")) 
  
agreement <- rater %>% 
  mutate(phil_anne = phil == anne) %>% 
  mutate(anne_lisa = anne == lisa) %>% 
  mutate(phil_lisa = phil == lisa) %>% 
  mutate(agreement = (phil_anne + anne_lisa + phil_lisa) / 3) 

overall <- agreement %>% 
  summarise(agreement = mean(agreement)) %>% 
  bind_cols(overall)

by_chapter <- agreement %>% 
  mutate_at("kapitelbezeichnung", stringr::str_to_title) %>% 
  group_by(kapitelbezeichnung) %>% 
  summarise(agreement = mean(agreement)) %>% 
  full_join(by_chapter, by = "kapitelbezeichnung") %>% 
  select(kapitelbezeichnung, data, kappa, agreement, everything())

mapping_results <- by_chapter %>% 
  select(-data, -kappa) %>% 
  bind_rows(overall)
  
mapping_results %>% 
  mutate_at("kapitelbezeichnung", stringr::str_to_title) %>% 
  set_names(c("Chapter", "Percent", "Chapter_Nr", "Fleiss-Kappa", "Number of Items", "Group")) %>% 
  mutate(Chapter = glue::glue("{Chapter_Nr} - {Chapter}", .na =  NULL)) %>% 
  select(-Chapter_Nr) %>% 
  arrange(Chapter) %>% 
  mutate_at("Chapter", ~ if_else(condition = is.na(.x), true = "", false = as.character(.x))) %>% 
  gt::gt(rowname_col = "Chapter", groupname_col = "Group") %>%
  gt::fmt_percent(columns = vars("Percent"), decimals = 2) %>% 
  gt::fmt(columns = vars("Fleiss-Kappa"), fns = function(x) sprintf(x, fmt = '%#.3f')) %>% 
  gt::tab_style(
    style = list(
      gt::cell_text(align = "center") 
    ),
    locations = gt::cells_body()  
  ) %>% 
  gt::tab_spanner(label = "Agreement", columns = matches("Percent|Fleiss")) %>% 
  gt::tab_header(
    title = gt::md("Reliability of the Mapping between three Mappers"),
  )
Reliability of the Mapping between three Mappers
Agreement Number of Items
Percent Fleiss-Kappa
By Chapter
1 - 01 Stammdaten 60.78% 0.575 34
2 - 02 Allgemeinstatus 75.76% 0.754 66
3 - 03 Wundanamnese 58.33% 0.568 24
4 - 04 Wundstatus 38.60% 0.366 57
5 - 05 Diagnostik 33.33% 0.280 14
6 - 06 Therapie 39.73% 0.367 73
Overall
52.36% 0.512 268

Reliability of the Equivalence Testing ======================================

overall_equ <- irr::kappam.fleiss(concepts[, c("equ_jens", "equ_mareike")])

by_chapter_equ <- concepts %>% 
  select(id, kapitelbezeichnung, equ_jens, equ_mareike) %>% 
  nest(data = c(id, equ_jens, equ_mareike)) %>% 
  mutate(kappa = map(data, ~ irr::kappam.fleiss(ratings = .x[, c("equ_jens", "equ_mareike")]))) %>% 
  mutate(kappa_val = map_dbl(kappa, pluck, "value")) %>% 
  mutate(kappa_subjects = map_dbl(kappa, pluck, "subjects")) %>% 
  unnest(data) %>% 
  mutate(agreement = equ_jens == equ_mareike) %>% 
  group_by(kapitelbezeichnung) %>% 
  mutate(agreement = sum(agreement) / length(agreement)) %>% 
  summarise_at(c("kappa_val", "kappa_subjects", "agreement"), .funs = mean) %>% 
  mutate(Group = "By Chapter")

overall_equ <- tibble(
  Chapter = "",
  agreement = sum(concepts$equ_jens == concepts$equ_mareike) / nrow(concepts),
  kappa_val = overall_equ$value,
  kappa_subjects = overall_equ$subjects,
  Group = "Overall"
)

by_chapter_equ %>% 
  rename(Chapter = kapitelbezeichnung) %>% 
  mutate_at("Chapter", as.character) %>% 
  bind_rows(overall_equ) %>% 
  select(Chapter, Percent = agreement, `Fleiss-Kappa` = kappa_val, `Number of Items` = kappa_subjects, Group) %>% 
  gt::gt(rowname_col = "Chapter", groupname_col = "Group") %>%
  gt::fmt_percent(columns = vars("Percent"), decimals = 2) %>% 
  gt::fmt(columns = vars("Fleiss-Kappa"), fns = function(x) sprintf(x, fmt = '%#.3f')) %>% 
  gt::tab_style(
    style = list(
      gt::cell_text(align = "center") 
    ),
    locations = gt::cells_body()  
  ) %>% 
  gt::tab_spanner(label = "Agreement", columns = matches("Percent|Fleiss")) %>% 
  gt::tab_header(
    title = gt::md("Reliability of the Equivalence Rating between two Rater"),
  )
Reliability of the Equivalence Rating between two Rater
Agreement Number of Items
Percent Fleiss-Kappa
By Chapter
01 Stammdaten 83.33% 0.772 102
02 Allgemeinstatus 89.90% 0.835 198
03 Wundanamnese 70.83% 0.583 72
04 Wundstatus 74.27% 0.641 171
05 Diagnostik 59.52% 0.408 42
06 Therapie 75.34% 0.667 219
Overall
78.48% 0.702 804

Coverage ========

coverage_results <- concepts %>% 
  group_by(id) %>% 
  filter(!is.na(finales_konzept)) %>% 
  distinct(id, .keep_all = TRUE) %>% 
  group_by(equi_final) %>% 
  count() %>% 
  ungroup() %>% 
  mutate(proportion = n / sum(n)) %>% 
  mutate(map = c("Map", "Map", "Map", "Map", "No Map")) %>% 
  group_by(map) %>% 
  mutate(proportion_map = sum(proportion)) %>% 
  mutate(n_map = sum(n)) 
 
# Vielviel Prozent der Konzepte können insgesamt abgedeckt werden?
coverage_results %>% 
  rename("Number of Items" = n, "Proportion" = proportion) %>% 
  select(-n_map, -proportion_map) %>% 
  gt(rowname_col = "equi_final", groupname_col = "map") %>% 
  fmt_percent(columns = starts_with("proportion"), decimals = 2) %>%
  tab_spanner(label = "Equivalence Rating", columns = vars("Number of Items", "Proportion")) %>% 
  gt::tab_style(
    style = list(
      gt::cell_text(align = "center") 
    ),
    locations = gt::cells_body()  
  )
Equivalence Rating
Number of Items Proportion
Map
Equivalence of meaning; lexical, as well as conceptual 117 43.66%
Equivalence of meaning, but with synonymy. 63 23.51%
Source concept is broader and has a less specific meaning than the target concept 6 2.24%
Source concept is narrower and has a more specific meaning than the target concept 26 9.70%
No Map
No map is possible 56 20.90%

In this study, three mappers researched SNOMED CT for 268 NKDUC items. Among them, 117 concepts had a direct match in SNOMED CT, 63 concepts had a match but which was classified as a match through synonymy. For 48 NKDUK items, a corresponding concept in the target terminology was missing. For the remainding 39 NKDUC items, the matach was asymmetrical, i.e., the meannig of the source concept was broader or narrower that its corresponding target concept.

results_equivalence <- concepts %>% 
  filter(!is.na(finales_konzept)) %>% 
  distinct(id, .keep_all = TRUE) %>% 
  group_by(equi_final, kapitelbezeichnung) %>% 
  count() %>% 
  group_by(kapitelbezeichnung) %>% 
  mutate(proportion = n / sum(n)) %>% 
  arrange(kapitelbezeichnung, equi_final) %>% 
  ungroup()
  
overall_equ_results <- results_equivalence %>% 
  group_by(equi_final) %>% 
  select(-proportion) %>% 
  summarize(n = sum(n)) %>% 
  ungroup %>% 
  mutate(proportion = n / sum(n)) %>% 
  mutate(kapitelbezeichnung = "Overall") %>% 
  mutate(section = "Overall") %>% 
  mutate_at("proportion", ~ scales::percent(x = .x, accuracy = 0.1)) %>% 
  mutate(n_prop = glue::glue("{proportion} (n={n})")) %>% 
  select(Overall = n_prop)
  

results_equivalence %>% 
  mutate_at("proportion", ~ scales::percent(x = .x, accuracy = 0.1)) %>% 
  mutate(n_prop = glue::glue("{proportion} (n={n})")) %>% 
  select(-n, -proportion) %>% 
  pivot_wider(id_cols = equi_final, names_from = kapitelbezeichnung, values_from = n_prop, values_fill = list(n_prop = "-")) %>%
  mutate_all(~ stringr::str_replace(string = .x, pattern = "\\% \\(", replacement = "%\n(")) %>% 
  bind_cols(overall_equ_results) %>% 
  select(equi_final, Overall, dplyr::everything()) %>% 
  gt::gt(rowname_col = "equi_final") %>% 
  tab_spanner(label = "Chapter", columns = matches("^\\d\\d")) %>% 
  gt::tab_stubhead(label = "Equivalence Categories") %>% 
  gt::tab_style(
    style = list(
      gt::cell_text(align = "center") 
    ),
    locations = gt::cells_body()  
  )
Equivalence Categories Overall Chapter
01 Stammdaten 02 Allgemeinstatus 03 Wundanamnese 04 Wundstatus 05 Diagnostik 06 Therapie
Equivalence of meaning; lexical, as well as conceptual 43.7% (n=117) 23.5% (n=8) 59.1% (n=39) 50.0% (n=12) 43.9% (n=25) 35.7% (n=5) 38.4% (n=28)
Equivalence of meaning, but with synonymy. 23.5% (n=63) 26.5% (n=9) 24.2% (n=16) 25.0% (n=6) 21.1% (n=12) 21.4% (n=3) 23.3% (n=17)
Source concept is broader and has a less specific meaning than the target concept 2.2% (n=6) 2.9% (n=1) 3.0% (n=2) 4.2% (n=1) 1.8% (n=1) \- 1.4% (n=1)
Source concept is narrower and has a more specific meaning than the target concept 9.7% (n=26) 11.8% (n=4) 3.0% (n=2) \- 10.5% (n=6) 21.4% (n=3) 15.1% (n=11)
No map is possible 20.9% (n=56) 35.3% (n=12) 10.6% (n=7) 20.8% (n=5) 22.8% (n=13) 21.4% (n=3) 21.9% (n=16)
results_equivalence <- results_equivalence %>% 
  mutate(equi_simple = as.integer(equi_final)) %>% 
  mutate(equi_simple = case_when(
    equi_simple == 1 ~ "1",
    equi_simple == 2 ~ "1",
    equi_simple == 3 ~ "2",
    equi_simple == 4 ~ "2",
    equi_simple == 5 ~ "3"
  )) %>% 
  mutate(equi_simple = factor(x = equi_simple,
                              levels = 1:3, 
                              labels = c("Semantic Match present (Degree 1 and 2)",
                                         "Semantic Asymmetry present (Degree 3 and 4)",
                                         "Semantic Match absent (Degree 5)"),
                              ordered = TRUE
                              )) 
overall_equ_results_simple <- results_equivalence %>% 
  group_by(equi_simple) %>% 
  select(-proportion) %>% 
  summarize(n = sum(n)) %>% 
  ungroup %>% 
  mutate(proportion = n / sum(n)) %>% 
  mutate(kapitelbezeichnung = "Overall") %>% 
  mutate(section = "Overall") %>% 
  mutate_at("proportion", ~ scales::percent(x = .x, accuracy = 0.1)) %>% 
  mutate(n_prop = glue::glue("{proportion} (n={n})")) %>% 
  select(Overall = n_prop)
  

results_equivalence %>% 
  group_by(equi_simple, kapitelbezeichnung) %>% 
  summarise(n = sum(n), proportion = sum(proportion)) %>% 
  mutate_at("proportion", ~ scales::percent(x = .x, accuracy = 0.1)) %>% 
  mutate(n_prop = glue::glue("{proportion} (n={n})")) %>% 
  select(-n, -proportion) %>% 
  pivot_wider(id_cols = equi_simple, names_from = kapitelbezeichnung, values_from = n_prop, values_fill = list(n_prop = "-")) %>%
  ungroup() %>% 
  mutate_all(~ stringr::str_replace(string = .x, pattern = "\\% \\(", replacement = "%\n(")) %>% 
  bind_cols(overall_equ_results_simple) %>% 
  select(equi_simple, Overall, dplyr::everything()) %>% 
  gt::gt(rowname_col = "equi_simple") %>% 
  tab_spanner(label = "Chapter", columns = matches("^\\d\\d")) %>% 
  gt::tab_stubhead(label = "Equivalence Categories") %>% 
  gt::tab_style(
    style = list(
      gt::cell_text(align = "center") 
    ),
    locations = gt::cells_body()  
  )
Equivalence Categories Overall Chapter
01 Stammdaten 02 Allgemeinstatus 03 Wundanamnese 04 Wundstatus 05 Diagnostik 06 Therapie
Semantic Match present (Degree 1 and 2) 67.2% (n=180) 50.0% (n=17) 83.3% (n=55) 75.0% (n=18) 64.9% (n=37) 57.1% (n=8) 61.6% (n=45)
Semantic Asymmetry present (Degree 3 and 4) 11.9% (n=32) 14.7% (n=5) 6.1% (n=4) 4.2% (n=1) 12.3% (n=7) 21.4% (n=3) 16.4% (n=12)
Semantic Match absent (Degree 5) 20.9% (n=56) 35.3% (n=12) 10.6% (n=7) 20.8% (n=5) 22.8% (n=13) 21.4% (n=3) 21.9% (n=16)
concepts %>% 
  group_by(id) %>% 
  filter(finales_konzept == min(finales_konzept)) %>% 
  distinct(id, .keep_all = TRUE)
## # A tibble: 0 x 15
## # Groups:   id [0]
## # … with 15 variables: id <chr>, id_mapper <chr>, chapter <chr>,
## #   kapitelbezeichnung <chr>, bezeichnung <chr>, finaler_beschluss <chr>,
## #   snomed_code <chr>, descriptor <chr>, equi_final <ord>,
## #   finales_konzept <chr>, equ_jens <ord>, equ_mareike <ord>, agreement <chr>,
## #   map <chr>, kapitel <chr>
results_equivalence_agg <- results_equivalence %>% 
  mutate_at(.vars = "equi_final", 
            .funs =  ~ if_else(
              condition = .x == "No map is possible",
              true = "No",
              false = "Yes"
            )) 

coverage_overall <- results_equivalence_agg %>% 
  group_by(equi_final) %>% 
  summarise(n = sum(n), proportion = sum(proportion)) %>% 
  mutate(kapitelbezeichnung = "") %>% 
  mutate(group = "Overall") %>% 
  mutate(proportion = n / sum(n)) %>% 
  mutate(proportion = glue::glue("{scales::percent(proportion, accuracy = .01)} (n={n})")) %>% 
  select(-n) %>% 
  pivot_wider(id_cols = c("kapitelbezeichnung", "group"), names_from = equi_final, values_from = proportion)

coverage_by_chapter <- results_equivalence_agg %>% 
  group_by(equi_final, kapitelbezeichnung) %>% 
  summarise(n = sum(n), proportion = sum(proportion)) %>% 
  mutate(group = "By Chapter") %>% 
  arrange(kapitelbezeichnung) %>% 
  mutate(proportion = glue::glue("{scales::percent(proportion, accuracy = .01)} (n={n})")) %>% 
  mutate_at(c("kapitelbezeichnung", "proportion"), as.character) %>% 
  select(-n) %>% 
  pivot_wider(id_cols = c("kapitelbezeichnung", "group"), names_from = equi_final, values_from = proportion)

coverage_by_chapter %>% 
  bind_rows(coverage_overall) %>% 
  gt::gt(rowname_col = "kapitelbezeichnung", groupname_col = "group") %>% 
  gt::tab_spanner(label = "SNOMED CT identified", columns = c("No", "Yes"))
SNOMED CT identified
No Yes
By Chapter
01 Stammdaten 35.29% (n=12) 64.71% (n=22)
02 Allgemeinstatus 10.61% (n=7) 89.39% (n=59)
03 Wundanamnese 20.83% (n=5) 79.17% (n=19)
04 Wundstatus 22.81% (n=13) 77.19% (n=44)
05 Diagnostik 21.43% (n=3) 78.57% (n=11)
06 Therapie 21.92% (n=16) 78.08% (n=57)
Overall
20.90% (n=56) 79.10% (n=212)