Group-based trajectory modelling

Author

Kuan Liu

1. Hands-on example of fitting a group-based trajectory model

This tutorial aims to provide a quick example for implementing flexmix R package to run a group-based trajectory model.

1.1 The ICTUS (Impact of Cholinergic Treatment USe) cohort

  • Design: Prospective, multicenter longitudinal observational cohort of Alzheimer’s disease (AD) patients in Europe

    • A cohort of 1380 patients with Alzheimer disease followed-up in 12 European countries.

    • These patients were included between February 2003 and July 2005 in 29 centres specialized in neurology, geriatrics, psychiatry, or psycho-geriatrics with a recognized experience in the diagnosis and management of Alzheimer disease.

    • These patients were examined at six-month intervals over two years. Each examination included (though not exclusively) an Mini Mental Score (MMS) assessment.

  • Recruitment period: 2003–2005

  • Study aims including to describe natural history and clinical course of mild–moderate AD

  • Reynish, E., Cortes, F., Andrieu, S., Cantet, C., Olde Rikkert, M., Melis, R., Froelich, L., Frisoni, G., Jonsson, L., Visser, P., et al., 2007. The ictus study: A prospective longitudinal observational study of 1,380 ad patients in europe. Neuroepidemiology 29 (1-2), 29-38

  • Vellas, B., Hausner, L., Frolich, L., Cantet, C., Gardette, V., Reynish, E., Gillette, S., Aguera-Morales, E., Auriacombe, S., Boada, M., et al., 2012. Progression of alzheimer disease in europe: Data from the european ictus study. Current Alzheimer Research 9 (8), 902-912.

library(tidyverse)
library(DT)

ictusShort <- read.csv("ictusShort.csv", header = TRUE)

# look at new data;
datatable(ictusShort,
          rownames = FALSE,
          options = list(dom = 't')) 
str(ictusShort)
'data.frame':   1374 obs. of  16 variables:
 $ id    : chr  "I-1" "I-2" "I-3" "I-4" ...
 $ MMS.1 : int  18 22 26 19 22 20 15 26 24 23 ...
 $ MMS.2 : int  17 20 26 20 22 20 15 27 25 24 ...
 $ MMS.3 : int  16 22 26 19 21 19 13 26 25 23 ...
 $ MMS.4 : int  16 20 25 20 23 18 10 25 23 25 ...
 $ MMS.5 : int  16 20 28 19 21 20 11 24 21 22 ...
 $ MMS.6 : int  17 19 27 18 23 20 9 26 21 22 ...
 $ MMS.7 : int  17 21 27 19 23 20 7 22 23 21 ...
 $ MMS.8 : int  17 21 26 20 21 21 7 23 21 20 ...
 $ MMS.9 : int  17 20 27 18 21 17 7 22 22 21 ...
 $ MMS.10: int  18 20 26 19 22 17 6 24 22 19 ...
 $ MMS.11: int  18 21 27 18 22 20 5 25 21 18 ...
 $ MMS.12: int  17 14 27 19 22 21 5 23 22 17 ...
 $ MMS.13: int  15 12 27 17 20 21 7 23 23 16 ...
 $ MMS.14: int  13 12 29 16 22 21 3 24 22 16 ...
 $ MMS.15: int  14 14 28 16 22 7 5 23 23 17 ...

1.2 Descriptive analysis

library(gtsummary)
library(stringr)

# reshape to long: one row per person per visit
ictus_long <- ictusShort %>%
  pivot_longer(
    cols = starts_with("MMS."),
    names_to = "visit_raw",
    values_to = "MMS"
  ) %>%
  mutate(
    # extract the integer after "MMS."
    visit = as.integer(str_extract(visit_raw, "\\d+"))
  ) %>%
  select(id, visit, MMS) %>%
  arrange(id, visit)

mms_summary_by_visit <- ictus_long %>%
  group_by(visit) %>%
  summarise(
    n          = sum(!is.na(MMS)),
    mean_MMS   = mean(MMS, na.rm = TRUE),
    sd_MMS     = sd(MMS, na.rm = TRUE),
    median_MMS = median(MMS, na.rm = TRUE),
    p25_MMS    = quantile(MMS, 0.25, na.rm = TRUE),
    p75_MMS    = quantile(MMS, 0.75, na.rm = TRUE),
    .groups    = "drop"
  ) %>%
  mutate(
    across(
      c(mean_MMS, sd_MMS, median_MMS, p25_MMS, p75_MMS),
      ~ round(.x, 1)
    )
  )

datatable(
  mms_summary_by_visit,
  rownames = FALSE,
  options = list(dom = "t", pageLength = 15)
)
ictus_long %>%
  ggplot(aes(x = visit, y = MMS, group = id)) +
  geom_line(alpha = 0.05) +
  geom_point(alpha = 0.05, size = 0.4) +
  scale_x_continuous(breaks = 1:15) +
  labs(
    x = "Visit",
    y = "MMS score",
    title = "Individual MMS trajectories"
  ) +
  theme_minimal()

1.3 Group-based trajectory modelling with flexmix R package

Model 1 Three classes

library(flexmix)

set.seed(123)

# 3-class model
mod3 <- flexmix(
  MMS ~ poly(visit, 2, raw = TRUE) | id,
  data = ictus_long,
  k = 3,
  control = list(minprior=0) # by default flexmix drop a class if miniprior threshold less than 0.05;
)

# BIC for 3-class
BIC(mod3)
[1] 103097.3

Model 2 Four classes

set.seed(123)

mod4 <- flexmix(
  MMS ~ poly(visit, 2, raw = TRUE) | id,
  data = ictus_long,
  k = 4,
  control = list(minprior=0)
)

BIC(mod4)
[1] 98081.2

Model 3 Five classes

set.seed(123)

mod5 <- flexmix(
  MMS ~ poly(visit, 2, raw = TRUE) | id,
  data = ictus_long,
  k = 5,
  control = list(minprior=0)
)

BIC(mod5)
[1] 96094.5

Compare models

# comparing BIC (lower better);
# Five clusters best model;
bic_comp <- tibble(
  k   = c(3, 4, 5),
  BIC = c(BIC(mod3), BIC(mod4), BIC(mod5))
)

bic_comp
# A tibble: 3 × 2
      k     BIC
  <dbl>   <dbl>
1     3 103097.
2     4  98081.
3     5  96095.
# we can also compare using likelihood ratio test Lo–Mendell–Rubin test;
library(tidyLPA)
# LMR for k vs k–1 needs the k–1 model, so we add a 2-class model
mod2 <- flexmix(
  MMS ~ poly(visit, 2, raw = TRUE) | id,
  data = ictus_long,
  k = 2
)

BIC(mod2)
[1] 110131
# log-likelihoods
ll2 <- as.numeric(logLik(mod2))
ll3 <- as.numeric(logLik(mod3))
ll4 <- as.numeric(logLik(mod4))
ll5 <- as.numeric(logLik(mod5))

# number of parameters (flexmix stores this in attr(logLik, "df"))
p2 <- attr(logLik(mod2), "df")
p3 <- attr(logLik(mod3), "df")
p4 <- attr(logLik(mod4), "df")
p5 <- attr(logLik(mod5), "df")

# 2-class (null) vs 3-class (alt)
lmr_2_vs_3 <- calc_lrt(
  n            = 1374,
  null_ll      = ll2,
  null_param   = p2,
  null_classes = 2,
  alt_ll       = ll3,
  alt_param    = p3,
  alt_classes  = 3
)

lmr_2_vs_3
Lo-Mendell-Rubin ad-hoc adjusted likelihood ratio rest:

LR = 7083.361, LMR LR (df = 5) = 6770.995, p < 0.001
# 3-class (null) vs 4-class (alt)
lmr_3_vs_4 <- calc_lrt(
  n            = 1374,
  null_ll      = ll3,
  null_param   = p3,
  null_classes = 3,
  alt_ll       = ll4,
  alt_param    = p4,
  alt_classes  = 4
)

lmr_3_vs_4
Lo-Mendell-Rubin ad-hoc adjusted likelihood ratio rest:

LR = 5065.769, LMR LR (df = 5) = 4842.375, p < 0.001
# 4-class (null) vs 5-class (alt)
lmr_4_vs_5 <- calc_lrt(
  n            = 1374,
  null_ll      = ll4,
  null_param   = p4,
  null_classes = 4,
  alt_ll       = ll5,
  alt_param    = p5,
  alt_classes  = 5
)

lmr_4_vs_5
Lo-Mendell-Rubin ad-hoc adjusted likelihood ratio rest:

LR = 2036.369, LMR LR (df = 5) = 1946.568, p < 0.001

Final model: five classes

# assess class certainty;
# For everyone assigned to class k, what is the mean posterior probability of that class?;

post5_mat <- posterior(mod5)
head(post5_mat)
             [,1]         [,2]          [,3]      [,4]       [,5]
[1,] 4.427551e-38 0.0004083933 4.856156e-116 0.9297276 0.06986404
[2,] 4.427551e-38 0.0004083933 4.856156e-116 0.9297276 0.06986404
[3,] 4.427551e-38 0.0004083933 4.856156e-116 0.9297276 0.06986404
[4,] 4.427551e-38 0.0004083933 4.856156e-116 0.9297276 0.06986404
[5,] 4.427551e-38 0.0004083933 4.856156e-116 0.9297276 0.06986404
[6,] 4.427551e-38 0.0004083933 4.856156e-116 0.9297276 0.06986404
# hard class label for each row by highest proability;
class_vec <- clusters(mod5)   

# posterior probability *for the assigned class* for each row
assigned_prob <- post5_mat[cbind(seq_len(nrow(post5_mat)), class_vec)]

# mean posterior for assigned class by class
mean_assigned_by_class <- tapply(assigned_prob, class_vec, mean)
mean_assigned_by_class
        1         2         3         4         5 
0.9852494 0.9835952 0.9860250 0.9811287 0.9822495 
ictus_long5 <- ictus_long %>%
  mutate(class5 = factor(clusters(mod5)))

n_per_class <- ictus_long5 %>%
  distinct(id, class5) %>%
  count(class5, name = "n_patients")

n_per_class
# A tibble: 5 × 2
  class5 n_patients
  <fct>       <int>
1 1             427
2 2             122
3 3             198
4 4             341
5 5             286
mms_class_mean <- ictus_long5 %>%
  group_by(class5, visit) %>%
  summarise(
    mean_MMS = mean(MMS, na.rm = TRUE),
    .groups  = "drop"
  )

ggplot() +
  # raw spaghetti in faint black
  geom_line(
    data  = ictus_long5,
    aes(x = visit, y = MMS, group = id),
    color = "black",
    alpha = 0.01
  ) +
  # mean trajectories per class as dashed coloured lines
  geom_line(
    data  = mms_class_mean,
    aes(x = visit, y = mean_MMS, color = class5),
    linetype = "dashed",
    size = 1.1
  ) +
  scale_x_continuous(breaks = 1:15) +
  scale_color_brewer(palette = "Dark2") +
  labs(
    x = "Visit",
    y = "MMS",
    color = "Class",
    title = "Latent trajectory classes (k = 5)"
  ) +
  theme_minimal()