knitr::opts_chunk$set(echo = TRUE, include = TRUE, message=FALSE, warning=FALSE)

library(tidyverse) # for data manipulation
library(googlesheets4) # Read raw data stored in a google sheet document
library(vcd) # Calculation reliability statistics, e.g. Kappa
library(gt) # This packages supports the creation of tables

fct_label_inorder <- c("Equivalence of meaning; lexical, as well as conceptual",
                       "Equivalence of meaning, but with synonymy.",
                       "Source concept is broader and has a less specific meaning than the target concept",
                       "Source concept is narrower and has a more specific meaning than the target concept",
                       "No map is possible")

# Data preprocessing
# Among other tasks: relabels factors
# concepts <- read_csv(file = "./data/mapping-results.csv", col_types = cols(.default = "c")) %>% 
concepts <- read_csv(file = "~/Downloads/überarbeitet-snomed-mapping - Tabellenblatt1 (2).csv", col_types = cols(.default = "c")) %>% 
  mutate_at(.vars = c("equ_jens", "equ_mareike", "equi_final"), 
            .funs = ~ factor(x = .x, levels = fct_label_inorder, labels = fct_label_inorder, ordered = TRUE))

Reliabilität Mapping

# Restructure/ Pivot Data

mapping <- concepts %>% 
  select(id, id_mapper, kapitel, kapitelbezeichnung, bezeichnung, snomed_code, kapitel) %>% 
  pivot_wider(id_cols = c("id", "kapitelbezeichnung", "bezeichnung", "kapitel"),
              names_from = "id_mapper",
              values_from = "snomed_code")

# Caluclate Kappa values

# Kappa Overall
kappa_overall <- irr::kappam.fleiss(mapping[, c("9f6", "83f", "605")])

# Kappa by Chapter
by_chapter <- mapping %>% 
  select(-id, -bezeichnung) %>% 
  mutate_at("kapitelbezeichnung", stringr::str_to_title) %>% 
  nest(data = c(`9f6`, `83f`, `605`)) %>% 
  mutate(kappa = map(data, ~ irr::kappam.fleiss(ratings = .x))) %>% 
  mutate(kappa_val = map_dbl(kappa, pluck, "value")) %>% 
  mutate(kappa_subjects = map_dbl(kappa, pluck, "subjects")) %>% 
  mutate(group = "By Chapter")

overall <- tibble(group = "Overall",
       kappa_val = kappa_overall$value,
       kappa_subjects = kappa_overall$subjects) 

# Calculate Agreement
rater <- mapping %>% 
  set_names(c("id", "kapitelbezeichnung", "bezeichnung", "kapitel", "phil", "lisa", "anne")) 
  
agreement <- rater %>% 
  mutate(phil_anne = phil == anne) %>% 
  mutate(anne_lisa = anne == lisa) %>% 
  mutate(phil_lisa = phil == lisa) %>% 
  mutate(agreement = (phil_anne + anne_lisa + phil_lisa) / 3) 

overall <- agreement %>% 
  summarise(agreement = mean(agreement)) %>% 
  bind_cols(overall)

by_chapter <- agreement %>% 
  mutate_at("kapitelbezeichnung", stringr::str_to_title) %>% 
  group_by(kapitelbezeichnung) %>% 
  summarise(agreement = mean(agreement)) %>% 
  full_join(by_chapter, by = "kapitelbezeichnung") %>% 
  select(kapitelbezeichnung, data, kappa, agreement, everything())

mapping_results <- by_chapter %>% 
  select(-data, -kappa) %>% 
  bind_rows(overall)
  
mapping_results %>% 
  mutate_at("kapitelbezeichnung", stringr::str_to_title) %>% 
  set_names(c("Chapter", "Percent", "Chapter_Nr", "Fleiss-Kappa", "Number of Items", "Group")) %>% 
  mutate(Chapter = glue::glue("{Chapter_Nr} - {Chapter}", .na =  NULL)) %>% 
  select(-Chapter_Nr) %>% 
  arrange(Chapter) %>% 
  mutate_at("Chapter", ~ if_else(condition = is.na(.x), true = "", false = as.character(.x))) %>% 
  gt::gt(rowname_col = "Chapter", groupname_col = "Group") %>%
  gt::fmt_percent(columns = vars("Percent"), decimals = 2) %>% 
  gt::fmt(columns = vars("Fleiss-Kappa"), fns = function(x) sprintf(x, fmt = '%#.3f')) %>% 
  gt::tab_style(
    style = list(
      gt::cell_text(align = "center") 
    ),
    locations = gt::cells_body()  
  ) %>% 
  gt::tab_spanner(label = "Agreement", columns = matches("Percent|Fleiss")) %>% 
  gt::tab_header(
    title = gt::md("Reliability of the Mapping between three Mappers"),
  )

Reliability of the Mapping between three Mappers

	Agreement		Number of Items
	Percent	Fleiss-Kappa	Number of Items
By Chapter
1 - 01 Stammdaten	60.78%	0.575	34
2 - 02 Allgemeinstatus	75.76%	0.754	66
3 - 03 Wundanamnese	58.33%	0.568	24
4 - 04 Wundstatus	38.60%	0.366	57
5 - 05 Diagnostik	33.33%	0.280	14
6 - 06 Therapie	39.73%	0.367	73
Overall
	52.36%	0.512	268

Reliability of the Equivalence Testing ======================================

overall_equ <- irr::kappam.fleiss(concepts[, c("equ_jens", "equ_mareike")])

by_chapter_equ <- concepts %>% 
  select(id, kapitelbezeichnung, equ_jens, equ_mareike) %>% 
  nest(data = c(id, equ_jens, equ_mareike)) %>% 
  mutate(kappa = map(data, ~ irr::kappam.fleiss(ratings = .x[, c("equ_jens", "equ_mareike")]))) %>% 
  mutate(kappa_val = map_dbl(kappa, pluck, "value")) %>% 
  mutate(kappa_subjects = map_dbl(kappa, pluck, "subjects")) %>% 
  unnest(data) %>% 
  mutate(agreement = equ_jens == equ_mareike) %>% 
  group_by(kapitelbezeichnung) %>% 
  mutate(agreement = sum(agreement) / length(agreement)) %>% 
  summarise_at(c("kappa_val", "kappa_subjects", "agreement"), .funs = mean) %>% 
  mutate(Group = "By Chapter")

overall_equ <- tibble(
  Chapter = "",
  agreement = sum(concepts$equ_jens == concepts$equ_mareike) / nrow(concepts),
  kappa_val = overall_equ$value,
  kappa_subjects = overall_equ$subjects,
  Group = "Overall"
)

by_chapter_equ %>% 
  rename(Chapter = kapitelbezeichnung) %>% 
  mutate_at("Chapter", as.character) %>% 
  bind_rows(overall_equ) %>% 
  select(Chapter, Percent = agreement, `Fleiss-Kappa` = kappa_val, `Number of Items` = kappa_subjects, Group) %>% 
  gt::gt(rowname_col = "Chapter", groupname_col = "Group") %>%
  gt::fmt_percent(columns = vars("Percent"), decimals = 2) %>% 
  gt::fmt(columns = vars("Fleiss-Kappa"), fns = function(x) sprintf(x, fmt = '%#.3f')) %>% 
  gt::tab_style(
    style = list(
      gt::cell_text(align = "center") 
    ),
    locations = gt::cells_body()  
  ) %>% 
  gt::tab_spanner(label = "Agreement", columns = matches("Percent|Fleiss")) %>% 
  gt::tab_header(
    title = gt::md("Reliability of the Equivalence Rating between two Rater"),
  )

Reliability of the Equivalence Rating between two Rater

	Agreement		Number of Items
	Percent	Fleiss-Kappa	Number of Items
By Chapter
01 Stammdaten	83.33%	0.772	102
02 Allgemeinstatus	89.90%	0.835	198
03 Wundanamnese	70.83%	0.583	72
04 Wundstatus	74.27%	0.641	171
05 Diagnostik	59.52%	0.408	42
06 Therapie	75.34%	0.667	219
Overall
	78.48%	0.702	804

Coverage ========

coverage_results <- concepts %>% 
  group_by(id) %>% 
  filter(!is.na(finales_konzept)) %>% 
  distinct(id, .keep_all = TRUE) %>% 
  group_by(equi_final) %>% 
  count() %>% 
  ungroup() %>% 
  mutate(proportion = n / sum(n)) %>% 
  mutate(map = c("Map", "Map", "Map", "Map", "No Map")) %>% 
  group_by(map) %>% 
  mutate(proportion_map = sum(proportion)) %>% 
  mutate(n_map = sum(n)) 
 
# Vielviel Prozent der Konzepte können insgesamt abgedeckt werden?
coverage_results %>% 
  rename("Number of Items" = n, "Proportion" = proportion) %>% 
  select(-n_map, -proportion_map) %>% 
  gt(rowname_col = "equi_final", groupname_col = "map") %>% 
  fmt_percent(columns = starts_with("proportion"), decimals = 2) %>%
  tab_spanner(label = "Equivalence Rating", columns = vars("Number of Items", "Proportion")) %>% 
  gt::tab_style(
    style = list(
      gt::cell_text(align = "center") 
    ),
    locations = gt::cells_body()  
  )

	Equivalence Rating
	Number of Items	Proportion
Map
Equivalence of meaning; lexical, as well as conceptual	117	43.66%
Equivalence of meaning, but with synonymy.	63	23.51%
Source concept is broader and has a less specific meaning than the target concept	6	2.24%
Source concept is narrower and has a more specific meaning than the target concept	26	9.70%
No Map
No map is possible	56	20.90%

In this study, three mappers researched SNOMED CT for 268 NKDUC items. Among them, 117 concepts had a direct match in SNOMED CT, 63 concepts had a match but which was classified as a match through synonymy. For 48 NKDUK items, a corresponding concept in the target terminology was missing. For the remainding 39 NKDUC items, the matach was asymmetrical, i.e., the meannig of the source concept was broader or narrower that its corresponding target concept.

results_equivalence <- concepts %>% 
  filter(!is.na(finales_konzept)) %>% 
  distinct(id, .keep_all = TRUE) %>% 
  group_by(equi_final, kapitelbezeichnung) %>% 
  count() %>% 
  group_by(kapitelbezeichnung) %>% 
  mutate(proportion = n / sum(n)) %>% 
  arrange(kapitelbezeichnung, equi_final) %>% 
  ungroup()
  
overall_equ_results <- results_equivalence %>% 
  group_by(equi_final) %>% 
  select(-proportion) %>% 
  summarize(n = sum(n)) %>% 
  ungroup %>% 
  mutate(proportion = n / sum(n)) %>% 
  mutate(kapitelbezeichnung = "Overall") %>% 
  mutate(section = "Overall") %>% 
  mutate_at("proportion", ~ scales::percent(x = .x, accuracy = 0.1)) %>% 
  mutate(n_prop = glue::glue("{proportion} (n={n})")) %>% 
  select(Overall = n_prop)
  

results_equivalence %>% 
  mutate_at("proportion", ~ scales::percent(x = .x, accuracy = 0.1)) %>% 
  mutate(n_prop = glue::glue("{proportion} (n={n})")) %>% 
  select(-n, -proportion) %>% 
  pivot_wider(id_cols = equi_final, names_from = kapitelbezeichnung, values_from = n_prop, values_fill = list(n_prop = "-")) %>%
  mutate_all(~ stringr::str_replace(string = .x, pattern = "\\% \\(", replacement = "%\n(")) %>% 
  bind_cols(overall_equ_results) %>% 
  select(equi_final, Overall, dplyr::everything()) %>% 
  gt::gt(rowname_col = "equi_final") %>% 
  tab_spanner(label = "Chapter", columns = matches("^\\d\\d")) %>% 
  gt::tab_stubhead(label = "Equivalence Categories") %>% 
  gt::tab_style(
    style = list(
      gt::cell_text(align = "center") 
    ),
    locations = gt::cells_body()  
  )

Equivalence Categories	Overall	Chapter
Equivalence Categories	Overall	01 Stammdaten	02 Allgemeinstatus	03 Wundanamnese	04 Wundstatus	05 Diagnostik	06 Therapie
Equivalence of meaning; lexical, as well as conceptual	43.7% (n=117)	23.5% (n=8)	59.1% (n=39)	50.0% (n=12)	43.9% (n=25)	35.7% (n=5)	38.4% (n=28)
Equivalence of meaning, but with synonymy.	23.5% (n=63)	26.5% (n=9)	24.2% (n=16)	25.0% (n=6)	21.1% (n=12)	21.4% (n=3)	23.3% (n=17)
Source concept is broader and has a less specific meaning than the target concept	2.2% (n=6)	2.9% (n=1)	3.0% (n=2)	4.2% (n=1)	1.8% (n=1)	\-	1.4% (n=1)
Source concept is narrower and has a more specific meaning than the target concept	9.7% (n=26)	11.8% (n=4)	3.0% (n=2)	\-	10.5% (n=6)	21.4% (n=3)	15.1% (n=11)
No map is possible	20.9% (n=56)	35.3% (n=12)	10.6% (n=7)	20.8% (n=5)	22.8% (n=13)	21.4% (n=3)	21.9% (n=16)

results_equivalence <- results_equivalence %>% 
  mutate(equi_simple = as.integer(equi_final)) %>% 
  mutate(equi_simple = case_when(
    equi_simple == 1 ~ "1",
    equi_simple == 2 ~ "1",
    equi_simple == 3 ~ "2",
    equi_simple == 4 ~ "2",
    equi_simple == 5 ~ "3"
  )) %>% 
  mutate(equi_simple = factor(x = equi_simple,
                              levels = 1:3, 
                              labels = c("Semantic Match present (Degree 1 and 2)",
                                         "Semantic Asymmetry present (Degree 3 and 4)",
                                         "Semantic Match absent (Degree 5)"),
                              ordered = TRUE
                              )) 
overall_equ_results_simple <- results_equivalence %>% 
  group_by(equi_simple) %>% 
  select(-proportion) %>% 
  summarize(n = sum(n)) %>% 
  ungroup %>% 
  mutate(proportion = n / sum(n)) %>% 
  mutate(kapitelbezeichnung = "Overall") %>% 
  mutate(section = "Overall") %>% 
  mutate_at("proportion", ~ scales::percent(x = .x, accuracy = 0.1)) %>% 
  mutate(n_prop = glue::glue("{proportion} (n={n})")) %>% 
  select(Overall = n_prop)
  

results_equivalence %>% 
  group_by(equi_simple, kapitelbezeichnung) %>% 
  summarise(n = sum(n), proportion = sum(proportion)) %>% 
  mutate_at("proportion", ~ scales::percent(x = .x, accuracy = 0.1)) %>% 
  mutate(n_prop = glue::glue("{proportion} (n={n})")) %>% 
  select(-n, -proportion) %>% 
  pivot_wider(id_cols = equi_simple, names_from = kapitelbezeichnung, values_from = n_prop, values_fill = list(n_prop = "-")) %>%
  ungroup() %>% 
  mutate_all(~ stringr::str_replace(string = .x, pattern = "\\% \\(", replacement = "%\n(")) %>% 
  bind_cols(overall_equ_results_simple) %>% 
  select(equi_simple, Overall, dplyr::everything()) %>% 
  gt::gt(rowname_col = "equi_simple") %>% 
  tab_spanner(label = "Chapter", columns = matches("^\\d\\d")) %>% 
  gt::tab_stubhead(label = "Equivalence Categories") %>% 
  gt::tab_style(
    style = list(
      gt::cell_text(align = "center") 
    ),
    locations = gt::cells_body()  
  )

Equivalence Categories	Overall	Chapter
Equivalence Categories	Overall	01 Stammdaten	02 Allgemeinstatus	03 Wundanamnese	04 Wundstatus	05 Diagnostik	06 Therapie
Semantic Match present (Degree 1 and 2)	67.2% (n=180)	50.0% (n=17)	83.3% (n=55)	75.0% (n=18)	64.9% (n=37)	57.1% (n=8)	61.6% (n=45)
Semantic Asymmetry present (Degree 3 and 4)	11.9% (n=32)	14.7% (n=5)	6.1% (n=4)	4.2% (n=1)	12.3% (n=7)	21.4% (n=3)	16.4% (n=12)
Semantic Match absent (Degree 5)	20.9% (n=56)	35.3% (n=12)	10.6% (n=7)	20.8% (n=5)	22.8% (n=13)	21.4% (n=3)	21.9% (n=16)

concepts %>% 
  group_by(id) %>% 
  filter(finales_konzept == min(finales_konzept)) %>% 
  distinct(id, .keep_all = TRUE)

## # A tibble: 0 x 15
## # Groups:   id [0]
## # … with 15 variables: id <chr>, id_mapper <chr>, chapter <chr>,
## #   kapitelbezeichnung <chr>, bezeichnung <chr>, finaler_beschluss <chr>,
## #   snomed_code <chr>, descriptor <chr>, equi_final <ord>,
## #   finales_konzept <chr>, equ_jens <ord>, equ_mareike <ord>, agreement <chr>,
## #   map <chr>, kapitel <chr>

results_equivalence_agg <- results_equivalence %>% 
  mutate_at(.vars = "equi_final", 
            .funs =  ~ if_else(
              condition = .x == "No map is possible",
              true = "No",
              false = "Yes"
            )) 

coverage_overall <- results_equivalence_agg %>% 
  group_by(equi_final) %>% 
  summarise(n = sum(n), proportion = sum(proportion)) %>% 
  mutate(kapitelbezeichnung = "") %>% 
  mutate(group = "Overall") %>% 
  mutate(proportion = n / sum(n)) %>% 
  mutate(proportion = glue::glue("{scales::percent(proportion, accuracy = .01)} (n={n})")) %>% 
  select(-n) %>% 
  pivot_wider(id_cols = c("kapitelbezeichnung", "group"), names_from = equi_final, values_from = proportion)

coverage_by_chapter <- results_equivalence_agg %>% 
  group_by(equi_final, kapitelbezeichnung) %>% 
  summarise(n = sum(n), proportion = sum(proportion)) %>% 
  mutate(group = "By Chapter") %>% 
  arrange(kapitelbezeichnung) %>% 
  mutate(proportion = glue::glue("{scales::percent(proportion, accuracy = .01)} (n={n})")) %>% 
  mutate_at(c("kapitelbezeichnung", "proportion"), as.character) %>% 
  select(-n) %>% 
  pivot_wider(id_cols = c("kapitelbezeichnung", "group"), names_from = equi_final, values_from = proportion)

coverage_by_chapter %>% 
  bind_rows(coverage_overall) %>% 
  gt::gt(rowname_col = "kapitelbezeichnung", groupname_col = "group") %>% 
  gt::tab_spanner(label = "SNOMED CT identified", columns = c("No", "Yes"))

	SNOMED CT identified
	No	Yes
By Chapter
01 Stammdaten	35.29% (n=12)	64.71% (n=22)
02 Allgemeinstatus	10.61% (n=7)	89.39% (n=59)
03 Wundanamnese	20.83% (n=5)	79.17% (n=19)
04 Wundstatus	22.81% (n=13)	77.19% (n=44)
05 Diagnostik	21.43% (n=3)	78.57% (n=11)
06 Therapie	21.92% (n=16)	78.08% (n=57)
Overall
	20.90% (n=56)	79.10% (n=212)

snomed-nkduc

The data and the analysis of our SNOMED CT mapping project. In this project, we mapped clinical concepts of the NKDUC (National Consensus for the Documentation of Leg Ulcers) with the international clinical terminology SNOMED CT.

Reliabilität Mapping