NHANES Mortality Analysis

Code
# load packages 
library(tidyverse)
library(gtsummary)
library(gt)
library(tidymodels)
library(censored)
library(paletteer)
library(survey)
library(patchwork)
# paletteer_d("colorBlindness::PairedColor12Steps")
col1 = "#FF9932"; col2 = "#65CCFF"
col1 = "#CC5151"; col2 = "#422CB2"
# paletteer_d("ggthemes::colorblind")
col_vec = c("#000000FF", "#009E73FF", "#CC79A7FF", "#E69F00FF", "#D55E00FF", "#56B4E9FF", "#0072B2FF")

# load data 
# results from univariate
wt_single_male = readRDS(here::here("results", "metrics_wtd_100_singlevar_male.rds"))
wt_single_female = readRDS(here::here("results", "metrics_wtd_100_singlevar_female.rds"))
wt_single = readRDS(here::here("results", "metrics_wtd_100_singlevar.rds")) %>% 
  group_by(variable) %>% 
  mutate(ind = row_number(),
         rep = floor((row_number()-1)/10)) %>% 
  group_by(variable, rep) %>% 
  summarize(concordance = mean(concordance))


# results from multivariable
wt_male = readRDS(here::here("results", "metrics_wtd_100_male.rds"))
wt_female = readRDS(here::here("results", "metrics_wtd_100_female.rds"))
wt_all = readRDS(here::here("results", "metrics_wtd_100.rds")) %>% 
   group_by(model) %>% 
  mutate(ind = row_number(),
         rep = floor((row_number()-1)/10)) %>% 
  group_by(model, rep) %>% 
  summarize(concordance = mean(concordance))


# covariate/pa df 
df_all = readRDS(here::here("data", "covariates_accel_mortality_df.rds")) 

Loading and preparing data

See code_README.md for description of the pipeline to process the data. For this analysis, we have one estimate for each physical activity variable, along with demographic variables for all participants who received an accelerometer. We create a separate dataset for the mortality analysis restricted to just individuals with valid accelerometry data and who are between 50 and 79 years old, and who are not missing any covariate data that will be used in the models. Finally, we winsorize the PA variables at the 99th percentile, to remove any extremely high outliers.

Code
df_mortality =
  df_all %>%
  filter(num_valid_days >= 3) %>%
  filter(age_in_years_at_screening >= 50 &
           age_in_years_at_screening < 80) 

df_accel = 
  df_all %>% 
  filter(num_valid_days >= 3 & age_in_years_at_screening >= 18)

df_mortality_win =
  df_mortality %>%
  filter(if_all(.cols = c(age_in_years_at_screening, gender,
                          race_hispanic_origin, cat_education,
                          cat_bmi, chd, chf, heartattack, stroke, cancer,
                          bin_diabetes, cat_alcohol, cat_smoke, bin_mobilityproblem,
                          general_health_condition, mortstat, permth_exm, total_PAXMTSM),
                ~!is.na(.x))) %>%
  mutate(event_time = permth_exm / 12) %>% 
  ungroup() %>%
  mutate(across(c(contains("total"), contains("peak")), ~DescTools::Winsorize(.x, quantile(.x, probs = c(0, 0.99))))) 

df_accel_win =
  df_accel %>%
  mutate(across(c(contains("total"), contains("peak")), ~DescTools::Winsorize(.x, quantile(.x, probs = c(0, 0.99))))) 
Code
# TO DO: check these models 
# sample sizes for exclusion diagram

df_all %>% 
  count(data_release_cycle)

df_all %>% 
  group_by(data_release_cycle) %>% 
  count(has_accel)

acc_subset = df_all %>% 
  filter(has_accel)

acc_subset %>% 
  group_by(data_release_cycle) %>% 
  count()

acc_subset %>% 
  group_by(data_release_cycle) %>% 
  count(valid_accel)

acc_subset %>% 
  group_by(data_release_cycle) %>% 
  count(inclusion_type)

acc_subset %>% 
  filter(valid_accel) %>% 
  mutate(adult = age_in_years_at_screening >= 18) %>% 
  group_by(data_release_cycle) %>% 
  count(adult)

acc_subset %>% 
  filter(valid_accel) %>% 
  mutate(b50 = age_in_years_at_screening < 50,
         o80 = age_in_years_at_screening >= 80) %>% 
  group_by(data_release_cycle) %>% 
  count(b50, o80)

acc_subset %>% 
  filter(num_valid_days >= 3 & age_in_years_at_screening >= 50 & age_in_years_at_screening < 80) %>% 
  filter(!(if_all(.cols = c(age_in_years_at_screening, gender,
                          race_hispanic_origin, cat_education,
                          cat_bmi, chd, chf, heartattack, stroke, cancer,
                          bin_diabetes, cat_alcohol, cat_smoke, bin_mobilityproblem,
                          general_health_condition, mortstat, permth_exm),
                ~!is.na(.x)))) %>% 
  group_by(data_release_cycle) %>%
  count()

4351+269+34
6497-(4351+269+34)
3913+264+48

6020-4225

Figures

Distribution of PA Variables

Probably supplemental

Code
df_accel %>%
  select(contains("steps") & contains("total"), SEQN) %>% 
  pivot_longer(cols = -c(SEQN)) %>% 
  mutate(type = "Raw") %>% 
  bind_rows(df_accel_win %>%
              select(contains("steps") & contains("total"), SEQN) %>% 
              pivot_longer(cols = -c(SEQN)) %>% 
              mutate(type = "Winsorized")) %>%
  mutate(name = factor(name, labels = c("Actilife steps", "ADEPT", "Oak", "Stepcount RF", "Stepcount SSL", "Verisense", "Verisense rev."))) %>% 
  ggplot(aes(x = value / 1000, fill = type, col = type))+
  geom_density() + 
  facet_wrap(type~name, scales = "free", nrow = 2)+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Daily Steps x 1000", y = "Density", title = "Distribution of Mean Daily Step Counts",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
dir.create(here::here("manuscript", "figures"), showWarnings = FALSE,
           recursive = TRUE)
ggsave(here::here("manuscript", "figures", "step_distributions.png"), dpi = 400, width = 10, height = 8)

df_accel %>%
  select(!contains("steps") & contains("total"), SEQN) %>% 
  pivot_longer(cols = -c(SEQN)) %>% 
  mutate(type = "Raw") %>% 
  bind_rows(df_accel_win %>%
              select(!contains("steps") & contains("total"), SEQN) %>% 
              pivot_longer(cols = -c(SEQN)) %>% 
              mutate(type = "Winsorized")) %>%
  mutate(name = factor(name, labels = c("AC", "log10 AC", "MIMS", "log10 MIMS"))) %>% 
  ggplot(aes(x = value / 1000, fill = type, col = type))+
  geom_density() + 
  facet_wrap(type~name, scales = "free", nrow = 2)+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Daily Value x 1000", y = "Density", title = "Distribution of Mean PA",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
ggsave(here::here("manuscript", "figures", "pa_distributions.png"), dpi = 400, width = 10, height = 8)

Mean Step Counts and Peak Cadence by Age and Sex

Tent. Figure 1

Code
df_means = df_accel %>%
  filter(age_in_years_at_screening >= 18 & age_in_years_at_screening < 80) %>% 
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>% 
  ungroup() 

# survey_design <- svydesign(ids = ~1, weights = ~weight, data = df)
survey_design = survey::svydesign(
  id = ~ masked_variance_pseudo_psu,
  strata = ~ masked_variance_pseudo_stratum,
  weights = ~ weight_norm,
  data = df_means,
  nest = TRUE
)

# Calculate mean estimate by age
calc_by_age =
  function(var, df) {
    formula = as.formula(paste("~", var))
    total_by_age_gender = svyby(formula,
                               ~ age_in_years_at_screening,
                               df,
                               svymean) %>%
      rename(mean = contains(var)) %>%
      mutate(metric = var)
  }

means_df = 
  map_dfr(.x = df_means %>% select(contains("total") | contains("peak")) %>% colnames(),
          .f = calc_by_age, df = survey_design)

models = means_df %>%
   mutate(lb = mean - 1.96 * se,
         ub = mean + 1.96 * se) %>% 
        tidyr::nest(data = -c(metric)) %>%
        dplyr::mutate(
                # Perform loess calculation on each group
                m = purrr::map(data, loess,
                               formula = mean ~ age_in_years_at_screening, span = .75),
                # Retrieve the fitted values from each model
                fitted_mean = purrr::map(m, `[[`, "fitted"),
                l = purrr::map(data, loess,
                               formula = lb ~ age_in_years_at_screening, span = .75),
                # Retrieve the fitted values from each model
                fitted_lb = purrr::map(l, `[[`, "fitted"),
                u = purrr::map(data, loess,
                               formula = ub ~ age_in_years_at_screening, span = .75),
                # Retrieve the fitted values from each model
                fitted_ub = purrr::map(u, `[[`, "fitted")
        )

# Apply fitted y's as a new column
results = models %>%
        dplyr::select(-m, -l, -u) %>%
        tidyr::unnest(cols = c(data, contains("fitted")))
Code
names(col_vec) = c("Actilife", "ADEPT", "Oak", "Stepcount RF","Stepcount SSL", "Verisense", "Verisense rev.")
p1 = results %>% 
  filter(grepl("step", metric) & grepl("total", metric)) %>%
  mutate(across(contains("fitted"), ~.x / 1000),
                metric = factor(metric, levels = c("total_actisteps", 
                                            "total_adeptsteps",
                                            "total_oaksteps",
                                              "total_scrfsteps",
                                            "total_scsslsteps",
                                            "total_vssteps",
                                            "total_vsrevsteps"),
                         labels = c("Actilife", "ADEPT", "Oak", "Stepcount RF", "Stepcount SSL",
                                    "Verisense", "Verisense rev."))) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = fitted_mean,
             ymin = fitted_lb, ymax = fitted_ub, color = metric, fill = metric)) +
  facet_grid(. ~ metric) +
  geom_line(linewidth = 1) + 
  geom_ribbon(alpha = .2, linetype = 0) +
  scale_fill_manual(values = col_vec, name = "Algorithm") + 
  theme_light() + 
  scale_color_manual(values = col_vec, name = "Algorithm") + 
  geom_hline(aes(yintercept = 10), col = "darkgrey", linetype = "dashed") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        strip.text = element_text(size = 12)) +
  labs(x = "Age (years)", y = "Mean Daily Steps x 1000",
       title = "Smoothed Survey Weighted Mean Daily Steps by Age ")+
  scale_y_continuous(breaks=seq(0,16,1))+
    scale_x_continuous(breaks=seq(20,80,10))

p1

Code
p2 = results %>% 
  filter(grepl("peak", metric) & grepl("step", metric)) %>% 
  mutate(type = sub("_.*", "", metric),
         method = sub("^[^_]*_", "", metric),
         type = factor(type, labels = c("1-minute", "30-minute"))) %>% 
  filter(grepl("step", metric)) %>%
  mutate(method = factor(method, levels = c("actisteps", 
                                            "adeptsteps",
                                            "oaksteps",
                                            "scrfsteps",
                                            "scsslsteps",
                                            "vssteps",
                                            "vsrevsteps"),
                         labels = c("Actilife", "ADEPT", "Oak", "Stepcount RF", "Stepcount SSL", "Verisense", "Verisense rev."))) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = fitted_mean,
             ymin = fitted_lb, ymax = fitted_ub, fill = method, color = method)) +
  facet_grid(. ~ type) +
  # geom_line(aes(linetype = type)) + 
  geom_line() + 
  geom_ribbon(alpha = 0.2, color = NA) + 
  # geom_ribbon(alpha = .2, aes(linetype = type), color = NA) +
  scale_fill_manual(values = col_vec, name = "Algorithm") + 
  scale_color_manual(values = col_vec, name = "Algorithm") + 
  theme_light() +
  # scale_linetype_discrete(labels = c("1 Minute", "30 Minute"), guide = F) + 
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        strip.text =  element_text(size = 12)) +
  labs(x = "Age (years)", y = "Peak Cadence (steps/min)",
       title = "Smoothed Survey Weighted Peak Cadence by Age")+
    scale_y_continuous(breaks=seq(20, 120,10))+
  scale_x_continuous(breaks=seq(20,80,10))
p2

Code
p3 = means_df %>% 
  filter(grepl("step", metric) & grepl("total", metric)) %>% 
  mutate(metric = factor(metric, levels = c("total_actisteps", 
                                            "total_adeptsteps",
                                            "total_oaksteps",
                                              "total_scrfsteps",
                                            "total_scsslsteps",
                                            "total_vssteps",
                                            "total_vsrevsteps"),
                         labels = c("Actilife", "ADEPT", "Oak", "Stepcount RF", "Stepcount SSL",
                                    "Verisense", "Verisense rev."))) %>% 
    group_by(metric) %>% 
  mutate(pct_chg = (mean - lag(mean))/lag(mean)*100) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = pct_chg, color = metric))+
  geom_smooth(method = "loess", se = FALSE) +
  # geom_line() +
  scale_color_manual(values = col_vec, name = "Algorithm") + 
  geom_hline(aes(yintercept = 0), col = "darkgrey", linetype = "dashed") + 
  theme_light() + 
  labs(x = "Age (years)", y = "% change from previous year",
       title = "Survey weighted estimated per-year difference in mean daily steps")+
  theme(legend.position = c(.1, .4))+
    scale_x_continuous(breaks=seq(20,80,10))+
  scale_y_continuous(breaks=seq(-5, 5, 1))

p3 

Code
means_df %>% 
  filter(grepl("total", metric) & grepl("step", metric)) %>% 
  mutate(metric = factor(metric, levels = c("total_actisteps",
                                            "total_adeptsteps",
                                            "total_oaksteps",
                                            "total_vssteps",
                                            "total_vsrevsteps",
                                            "total_scsslsteps",
                                            "total_scrfsteps"),
                         labels = c("Actilife", "ADEPT", "Oak", "Verisense", "Verisense rev.", "Stepcount SSL", "Stepcount RF"))) %>%
    group_by(metric) %>% 
  mutate(pct_chg = (mean - lag(mean))/lag(mean)*100) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = pct_chg, color = metric))+
  geom_smooth(method = "loess", se = FALSE) + 
  scale_color_brewer(palette = "Dark2", name = "") + 
  geom_hline(aes(yintercept = 0), col = "darkgrey", linetype = "dashed") + 
  theme_light() + 
  labs(x = "Age (yrs)", y = "Percent Change in Mean Daily Steps",
       title = "Percent Change in Mean Daily Steps by Age")+
  theme(legend.position = c(.1, .4),
        legend.title = element_blank())

Code
means_df %>% 
  filter(grepl("total", metric) & grepl("step", metric)) %>% 
  mutate(metric = factor(metric, levels = c("total_actisteps",
                                            "total_adeptsteps",
                                            "total_oaksteps",
                                            "total_vssteps",
                                            "total_vsrevsteps",
                                            "total_scsslsteps",
                                            "total_scrfsteps"),
                         labels = c("Actilife", "ADEPT", "Oak", "Verisense", "Verisense rev.", "Stepcount SSL", "Stepcount RF"))) %>%
    group_by(metric) %>% 
  mutate(pct_chg = (mean - lag(mean))/lag(mean)*100,
         chg = mean - lag(mean)) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = chg, color = metric))+
  geom_smooth(method = "loess", se = FALSE) + 
  scale_color_brewer(palette = "Dark2", name = "") + 
  geom_hline(aes(yintercept = 0), col = "darkgrey", linetype = "dashed") + 
  theme_light() + 
  labs(x = "Age (yrs)", y = "Percent Change in Mean Daily Steps",
       title = "Change in Mean Daily Steps by Age")+
  theme(legend.position = c(.1, .27),
        legend.title = element_blank())

Code
p1 / (p2 + p3 ) + plot_layout(guides = "collect", nrow = 2, axis_titles = "collect") + plot_annotation(tag_levels = 'A') & theme(legend.position = 'bottom')

Code
ggsave(here::here("manuscript", "figures", "distributions.png"), dpi = 400, width = 10, height = 8)
Code
# calculate in age grps of 5 
df_means = df_accel %>%
  filter(age_in_years_at_screening >= 18 & age_in_years_at_screening < 80) %>% 
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>% 
  ungroup() %>% 
  mutate(age_grp = cut(age_in_years_at_screening, breaks = c(seq(18, 73, 5), 80), include.lowest = TRUE))

# survey_design <- svydesign(ids = ~1, weights = ~weight, data = df)
survey_design = survey::svydesign(
  id = ~ masked_variance_pseudo_psu,
  strata = ~ masked_variance_pseudo_stratum,
  weights = ~ weight_norm,
  data = df_means,
  nest = TRUE
)

# Calculate mean estimate by age
calc_by_age_group =
  function(var, df) {
    # var = "total_oaksteps"
    formula = as.formula(paste("~", var))
    total_by_age_gender = svyby(formula,
                               ~ age_grp,
                               survey_design,
                               svymean) %>%
      rename(mean = contains(var)) %>%
      mutate(metric = var)
  }

cats_df = 
  map_dfr(.x = df_means %>% select(contains("total") | contains("peak")) %>% colnames(),
          .f = calc_by_age_group, df = df)

chg_df = 
  cats_df %>% 
  group_by(metric) %>% 
  mutate(pct_chg = (mean - lag(mean)) / lag(mean) * 100)

chg_df %>% 
  filter(grepl("step", metric) & grepl("total", metric)) %>% 
  ggplot(aes(x = age_grp, y = pct_chg, color = metric, group = metric)) + 
  geom_step()+
  facet_wrap(.~metric, nrow = 1)

means_df %>% 
  filter(grepl("step", metric) & grepl("total", metric)) %>% 
    group_by(metric) %>% 
  mutate(pct_chg = (mean - lag(mean))/lag(mean)*100) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = pct_chg, color = metric))+
  geom_smooth(method = "loess", se = FALSE)
  # facet_wrap(.~metric, nrow  = 1)

Correlations between Step Counts and AC/MIMS

Figure 2?

Code
mean_mat <- function(df, abs = FALSE) {
  # Ensure the input is a dataframe
  if (!is.data.frame(df)) {
    stop("Input must be a dataframe")
  }
  
  # Check if all columns are numeric
  if (!all(sapply(df, is.numeric))) {
    stop("All columns in the dataframe must be numeric")
  }
  
  # Get the number of columns
  num_cols <- ncol(df)
  
  # Initialize a matrix to store the mean absolute differences
  mad_matrix <- matrix(NA, nrow = num_cols, ncol = num_cols)
  rownames(mad_matrix) <- colnames(df)
  colnames(mad_matrix) <- colnames(df)
  
  # Calculate the mean absolute difference for each pair of columns
  for (i in 1:num_cols) {
    for (j in 1:num_cols) {
      if (i <= j) {
        # Calculate mean absolute difference, handling potential NAs
        if(abs){
          d = abs(df[, i] - df[, j])
        }
        else{
          d = df[, i] - df[, j]
        }
        # Debug output
        # print(paste("Calculating MAD for columns", colnames(df)[i], "and", colnames(df)[j]))
        # print(head(abs_diff))
        
        # Calculate mean
        # mad_matrix[i, j] <- mean(abs_diff %>% unlist, na.rm = TRUE)
        mad_matrix[i, j] <- mean(d %>% unlist, na.rm = TRUE)
        mad_matrix[j, i] <- mad_matrix[i, j]  # Symmetric matrix
      }
    }
  }
  
  return(mad_matrix)
}
Code
cor_mat = 
  df_accel %>% select(contains("total")) %>% 
  select(contains("acti"), contains("adept"), contains("oak"), contains("sc"), contains("vss"), contains("vsr"),
         total_AC, total_PAXMTSM, total_log10AC, total_log10PAXMTSM) %>% 
  cor(., use = "complete", method = "spearman") 
pvals = df_accel %>% select(contains("total")) %>% 
  select(contains("acti"), contains("adept"), contains("oak"), contains("sc"), contains("vss"), contains("vsr"),
        total_AC, total_PAXMTSM, total_log10AC, total_log10PAXMTSM) %>% 
  rstatix::cor_pmat(.) %>% 
  select(-rowname) %>% 
  as.matrix()


colnames(cor_mat) = rownames(cor_mat) =colnames(pvals) = rownames(pvals) = c("Actilife steps", "ADEPT", "Oak", 
                                                                             "Stepcount RF", "Stepcount SSL", 
                                                                             "Verisense", "Verisense rev.", "AC", 
                                                                            "MIMS",  "log10 AC", "log10 MIMS")

r = c("ADEPT","Actilife steps", "Verisense rev.",  "Verisense")

corrplot::corrplot(cor_mat,  
         method="color", 
         type="upper", 
        col= paletteer_d("colorBlindness::Blue2Orange8Steps"),
         tl.col="black", 
         tl.srt=30, 
         p.mat = pvals,
         col.lim=c(0.2,1),
         sig.level = 0.05, 
         insig = "blank",
         is.corr = FALSE,
        addgrid.col = "white",
        diag = FALSE,
        title = "Spearman Correlations",
        addCoef.col = 'grey50') %>% 
  corrplot::corrRect(namesMat  = r)

Code
# min(diff_mat[diff_mat != 0], na.rm = TRUE)

diff_mat = 
  df_accel %>% select(contains("total")) %>% 
  select(contains("acti"), contains("adept"), contains("oak"), contains("sc"), contains("vss"), contains("vsr"),
         total_AC, total_PAXMTSM, total_log10AC, total_log10PAXMTSM) %>% 
  mean_mat()

abs_diff_mat = 
  df_accel %>% select(contains("total")) %>% 
  select(contains("acti"), contains("adept"), contains("oak"), contains("sc"), contains("vss"), contains("vsr"),
         total_AC, total_PAXMTSM, total_log10AC, total_log10PAXMTSM) %>% 
  mean_mat(., abs = TRUE)

# mean_diff_mat = round(mean_diff_mat, digits = 0)

diff_mat[,c("total_AC", "total_log10AC", "total_PAXMTSM", "total_log10PAXMTSM")] <- NA
diff_mat[c("total_AC", "total_log10AC", "total_PAXMTSM", "total_log10PAXMTSM"),] <- NA
colnames(diff_mat) = rownames(diff_mat) = c("Actilife steps", "ADEPT", "Oak", 
                                                                             "Stepcount RF", "Stepcount SSL", 
                                                                             "Verisense", "Verisense rev.", "AC", 
                                                                            "MIMS",  "log10 AC", "log10 MIMS")
diff_mat = diff_mat / 1000
corrplot::corrplot(diff_mat,  
         method="color", 
         type="lower", 
        col= paletteer_c("grDevices::Purple-Green", 10),
        col.lim=c(-10, 10),
         tl.col="black", 
         tl.srt=30, 
         insig = "blank",
         is.corr = FALSE,
         diag = FALSE,
        title = "Mean Difference in Steps (x1000)",
         addgrid.col = "white",
        # addCoef.col = 'black',
        na.label = "-")

Code
abs_diff_mat[,c("total_AC", "total_log10AC", "total_PAXMTSM", "total_log10PAXMTSM")] <- NA
abs_diff_mat[c("total_AC", "total_log10AC", "total_PAXMTSM", "total_log10PAXMTSM"),] <- NA
colnames(abs_diff_mat) =rownames(abs_diff_mat) = c("Actilife steps", "ADEPT", "Oak", 
                                                                             "Stepcount RF", "Stepcount SSL", 
                                                                             "Verisense", "Verisense rev.", "AC", 
                                                                            "MIMS",  "log10 AC", "log10 MIMS")
abs_diff_mat = abs_diff_mat / 1000
p = corrplot::corrplot(abs_diff_mat,  
         method="color", 
         type="lower", 
        col= paletteer_c("grDevices::Purple-Green", 10, direction = -1),
        col.lim=c(0, 10),
         tl.col="black", 
         tl.srt=30, 
         insig = "blank",
         is.corr = FALSE,
         diag = FALSE,
        title = "Mean Absolute Difference",
         addgrid.col = "white",
        # addCoef.col = 'black',
        na.label = "-")

Code
mean_diff_mat = df_accel %>% 
  select((contains("total"))) %>% 
  filter(!is.na(total_vssteps)) %>% 
  mean_mat(abs = TRUE)

# mean_diff_mat = round(mean_diff_mat, digits = 0)

mean_diff_mat[,c("total_AC", "total_log10AC", "total_PAXMTSM", "total_log10PAXMTSM")] <- NA
mean_diff_mat[c("total_AC", "total_log10AC", "total_PAXMTSM", "total_log10PAXMTSM"),] <- NA
cor_mat = 
  df_accel %>% select(contains("total")) %>%
  cor(., use = "complete", method = "spearman") 
pvals = df_accel %>% select(contains("total")) %>%
  rstatix::cor_pmat(.) %>% 
  select(-rowname) %>% 
  as.matrix()
colnames(cor_mat) = rownames(cor_mat) =colnames(pvals) = rownames(pvals) = c("AC", "Actilife", "ADEPT", "log10 AC", "log10 MIMS", "Oak", "MIMS", "Sc. RF", "Sc. SSL", "Vs rev.", "Vs")


colnames(mean_diff_mat) = rownames(mean_diff_mat) = c("AC", "Actilife", "ADEPT", "log10 AC", "log10 MIMS", "Oak", "MIMS", "Sc. RF", "Sc. SSL", "Vs rev.", "Vs")

corrplot::corrplot(cor_mat,  
         method="circle", 
         type="lower", 
         # col=paletteer_d("colorBlindness::Blue2DarkRed12Steps"), 
        col= paletteer_d("colorBlindness::Blue2Orange8Steps"),
         tl.col="black", 
         tl.srt=45, 
         p.mat = pvals,
         col.lim=c(0.2,1),
         sig.level = 0.05, 
         insig = "blank",
         is.corr = FALSE,
        diag = FALSE,
         order = "AOE",
        title = "Spearman Correlations",
        addCoef.col = 'black') 

Code
corrplot::corrplot(mean_diff_mat,  
         method="color", 
         type="lower", 
         # col=paletteer_d("colorBlindness::Blue2DarkRed12Steps"), 
        col= paletteer_d("colorBlindness::Blue2Orange8Steps"),
         tl.col="black", 
         tl.srt=45, 
         # p.mat = pvals,
         # col.lim=c(0.2,1),
         # sig.level = 0.05, 
         insig = "blank",
         is.corr = FALSE,
         diag = FALSE,
         # order = "AOE",
        title = "Mean Absolute Difference",
        addCoef.col = 'black',
        na.label = "-") 

Code
corrplot::corrplot(mean_diff_mat,  
         method="color", 
         type="upper", 
         # col=paletteer_d("colorBlindness::Blue2DarkRed12Steps"), 
        col= paletteer_d("colorBlindness::Blue2Orange8Steps"),
         tl.col="black", 
         tl.srt=45, 
         # p.mat = pvals,
         # col.lim=c(0.2,1),
         # sig.level = 0.05, 
         insig = "blank",
         is.corr = FALSE,
         diag = FALSE,
         # order = "AOE",
        title = "Mean Absolute Difference",
        addCoef.col = 'black',
        na.label = "-") 

Single variable mortality prediction

Figure 3?

Code
# make label df 
var_labels = 
  tibble(names = 
           unique(wt_single$variable),
         labels = c("Age at screening", "Diabetes", "Mobility problem",
                    "Cancer", "Alcohol use", 
                    "BMI Category", "Education level", "Smoking status",
                    "CHF", "CHD", "Gender", "Self-reported health", "Heart attack", "Peak1 AC", "Peak1 MIMS", "Peak1 Actilife steps",
                    "Peak1 ADEPT steps", "Peak1 log10 AC", "Peak1 log10 MIMS", "Peak1 Oak steps",
                    "Peak1 Stepcount RF steps", "Peak1 Stepcount SSL steps", "Peak1 Verisense rev. steps",
                    "Peak1 Verisense steps", 
                    "Peak30 AC", "Peak30 MIMS", "Peak30 Actilife steps",
                    "Peak30 ADEPT steps", "Peak30 log10 AC", "Peak30 log10 MIMS", "Peak30 Oak steps",
                    "Peak30 Stepcount RF steps", "Peak30 Stepcount SSL steps", "Peak30 Verisense rev. steps",
                    "Peak30 Verisense steps",
                    "Race/ethnicity", "Stroke",
                    "AC", "MIMS", "Actilife steps",
                    "ADEPT steps", "log10 AC", "log10 MIMS", "Oak steps",
                    "Stepcount RF steps", "Stepcount SSL steps", "Verisense rev. steps",
                    "Verisense steps"))
                    

# paletteer_d("ggthemes::colorblind")
wt_single %>% 
  filter(!grepl("peak", variable)) %>% 
  mutate(var_group = case_when(
    grepl("steps", variable) ~ "Step variable",
    grepl("total", variable) ~ "Non-step accelerometry variable",
    TRUE ~ "Non-accelerometry variable"
  ),
  var_group = factor(var_group, levels = c("Step variable", "Non-step accelerometry variable", "Non-accelerometry variable"))) %>% 
  group_by(variable, var_group) %>% 
  summarize(mean = mean(concordance),
            sd = sd(concordance),
            se = sd(concordance)/sqrt(n())) %>% 
  mutate(ci_low = mean - 1.96*se,
         ci_high = mean + 1.96*se) %>% 
  ungroup() %>% 
  left_join(var_labels, by = c("variable" = "names")) %>%
  mutate(labels = factor(labels),
         labels = fct_reorder(labels, mean)) %>% 
  ggplot(aes(y = labels, x = mean, xmin = ci_low, xmax = ci_high, color = var_group))+
  geom_point(size = 1.3) + 
  # geom_errorbarh(height = .3) + 
  theme_bw() + 
  # scale_color_manual(values = c("#CC79A7FF", "#009E73FF", "#0072B2FF"), name = "")+
    scale_color_manual(values = c("#FF6DB6", "#009292", "#006DDB"), name = "")+
  scale_x_continuous(limits=c(0.5, 0.75), breaks=seq(0.5, 0.75, .05))+
  theme(legend.position = c(.3, .75),
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.title = element_text(size = 14),
      axis.text.y = element_text(size = 12),
      strip.text = element_text(size = 14))+
  labs(x = "Mean 100-times repeated 10-fold Cross-Validated Survey-Weighted Concordance", y = "")

Code
# ggsave(here::here("manuscript", "figures", "single_concordance.svg"), width = 8, height = 8)
ggsave(here::here("manuscript", "figures", "single_concordance.png"), dpi = 400, width = 10, height = 8)

Comparison with cadence estimates

Supplement?

Code
wt_single %>% 
  filter(grepl("peak", variable) | grepl("steps", variable)) %>% 
  mutate(var_group = factor(case_when(
    grepl("peak1", variable) ~ "Peak 1-min variable",
    grepl("peak30", variable) ~ "Peak 30-min variable",
    TRUE ~ "Mean daily total variable"
  ), levels = c("Peak 1-min variable", "Peak 30-min variable", "Mean daily total variable"))) %>% 
  group_by(variable, var_group) %>% 
  summarize(mean = mean(concordance),
            sd = sd(concordance),
            se = sd(concordance)/sqrt(n())) %>% 
  mutate(ci_low = mean - 1.96*se,
         ci_high = mean + 1.96*se) %>% 
  ungroup() %>% 
  left_join(var_labels, by = c("variable" = "names")) %>%
  mutate(labels = factor(labels),
         labels = fct_reorder(labels, mean)) %>% 
  ggplot(aes(y = labels, x = mean, xmin = ci_low, xmax = ci_high, color = var_group))+
  geom_point() + 
  geom_errorbarh() + 
  theme_bw() + 
  # facet_wrap(.~grp) +
  # scale_color_manual(values = c("#E69F00FF", "#D55E00FF", "#56B4E9FF"), name = "")+
  scale_color_manual(values = c("#FF7F00", "#FFBF7F", "#19B2FF"), name = "")+
  scale_x_continuous(limits=c(0.687, 0.738), breaks=seq(0.675, 0.775, .0125))+
  # scale_x_continuous(limits=c(0.687, 0.738), breaks=seq(0.675, 0.775, .0125))+
  theme(legend.position = c(.3, .7),
        legend.title = element_blank(),
        axis.text.y = element_text(size = 12),
        axis.title= element_text(size = 14),
        legend.text = element_text(size = 14),
        strip.text = element_text(size = 14))+
  labs(x = "Mean 100-times repeated 10-fold Cross-Validated Survey-Weighted Concordance", y = "")

Code
#   
# ggsave(here::here("manuscript", "figures", "single_concordance_cadence.png"), width = 8, height = 8,
#        dpi = 350)

Hazard ratio forest plots

Figure 4?

Code
pa_vars = df_mortality_win %>% 
  select(contains("total") & contains("steps")) %>% 
  colnames()

df_mortality_win_scaled = 
  df_mortality_win %>% 
  mutate(across(c(contains("total")), ~scale(.x)))


sds = 
  df_mortality_win %>% 
  summarize(across(c(contains("total")), ~sd(.x)/1000)) %>% 
  pivot_longer(cols = contains("total")) %>% 
  mutate(value = round(value, digits = 1),
         value = sprintf("%.1f",value)) %>% 
  left_join(var_labels, by = c("name" = "names")) 

fit_model = function(var, df){
  
  df = df %>% 
    mutate(weight = full_sample_2_year_mec_exam_weight / 2, weight_norm = weight / mean(weight))
  


  formula = as.formula(paste0("Surv(event_time, mortstat) ~", var, "+ 
      age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi +
      race_hispanic_origin +
      gender + 
      cat_bmi +
      cat_education +
      chf +
      chd +
      heartattack +
      stroke +
      cancer +
      bin_diabetes +
      cat_alcohol +
      cat_smoke +
      bin_mobilityproblem +
      general_health_condition"))
    coxph(formula, data = df, weights = weight_norm) %>% 
      tidy() %>% 
      filter(grepl(var, term))
    
}
names(col_vec) = paste("total_", c("acti", "adept", "oak", "scrf", "scssl", "vsrev",
                                  "vs"), "steps", sep = "")
  

steps_res = 
  map_dfr(.x = pa_vars,
          .f = fit_model,
          df = df_mortality_win) %>% 
  mutate(hr = exp(estimate * 500),
         ci_low = exp(500*(estimate - (1.96 * std.error))),
         ci_high = exp(500*(estimate + (1.96 * std.error))))

steps_res %>% 
  left_join(var_labels, by = c("term" = "names")) %>%
  mutate(labels = factor(labels),
         labels = fct_reorder(labels, hr, mean, .desc = TRUE),
         grp = "Raw") %>%
  ggplot(aes(color = term)) +
  geom_point(aes(y = labels, x = hr), size = 2)+
  geom_errorbarh(aes(xmin = ci_low, xmax = ci_high, y = labels), linewidth = 1.1,
                 height = .3)+
  theme_bw() + 
  scale_color_manual(values = col_vec) + 
  theme(legend.position = "none",
        strip.text = element_text(size = 14),
        axis.text = element_text(size = 14)) + 
    scale_x_continuous(limits=c(0.725, 1), breaks=seq(0.7, 1, .05))+
  facet_wrap(.~grp)+
  geom_vline(aes(xintercept =  1), linetype = "dashed") +
  labs(x = "Adjusted HR Associated with 500-step Increase in Mean Steps per Day", y = "")

Code
# ggsave(here::here("manuscript", "figures", "forest_raw.png"), width = 8, height = 8, dpi = 400)


steps_res_scaled = 
  map_dfr(.x = c(pa_vars, "total_PAXMTSM", "total_AC"),
          .f = fit_model,
          df = df_mortality_win_scaled) %>% 
  mutate(hr = exp(estimate),
         ci_low = exp(estimate - (1.96 * std.error)),
         ci_high = exp(estimate + (1.96 * std.error))) %>% 
  left_join(var_labels, by = c("term" = "names"))



steps_res_scaled %>% 
  filter(grepl("steps", term)) %>% 
  mutate(term2 = factor(term),
         term2 = fct_reorder(term2, hr, .desc = TRUE),
         labels = factor(labels),
         labels = fct_reorder(labels, hr, .desc = TRUE),
         grp = "Scaled") %>%
  ggplot(aes(color = term)) +
  geom_point(aes(y = labels, x = hr), size = 2)+
  geom_errorbarh(aes(xmin = ci_low, xmax = ci_high, y = labels), linewidth = 1.1)+
  theme_bw() + 
  facet_wrap(.~grp) + 
   scale_color_manual(values = c("#000000FF", "#265DABFF", "#DF5C24FF", "#059748FF", "#E5126FFF", "#9D722AFF", 
                                "#7B3A96FF", "#C7B42EFF", "#CB2027FF"),
                                labels = c("Actilife", "ADEPT", "Oak", "Verisense", "Verisense rev.",
                                           "Stepcount SSL", "Stepcount RF")) + 
  # scale_color_paletteer_d("ggthemes::few_Dark")+
  theme(legend.position = "none",
        strip.text = element_text(size = 14)) + 
  scale_x_continuous(limits=c(0.45, 1), breaks=seq(0.35, 1, .05))+
  geom_vline(aes(xintercept = 1), linetype = "dashed")+
  labs(x = "Adjusted HR Associated with One SD Increase in Mean Steps per Day", y = "")

Code
# ggsave(here::here("manuscript", "figures", "forest_scaled.png"), width = 8, height = 8, dpi = 400)

tb = steps_res %>% 
  select(term, hr, p.value, ci_low, ci_high) %>% 
  mutate(type = "raw") %>% 
  bind_rows(steps_res_scaled %>% select(term, hr, p.value, ci_low, ci_high) %>% mutate(type = "scaled")) %>% 
  pivot_wider(names_from = type, values_from = -term) %>% 
  filter(grepl("step", term)) %>% 
  left_join(sds, by = c("term" = "name")) %>% 
    arrange(desc(hr_scaled)) %>% 
    mutate(variable_fac = forcats::fct_reorder(term, hr_scaled, mean), 
           ci_raw = paste0(sprintf("%0.3f",round(hr_raw, 3)),
                       " (", sprintf("%0.3f",round(ci_low_raw, 3)), ", ", sprintf("%0.3f",round(ci_high_raw, 3)), ")"),
           ci_sc = paste0(sprintf("%0.3f",round(hr_scaled, 3)),
                       " (", sprintf("%0.3f",round(ci_low_scaled, 3)), ", ", sprintf("%0.3f",round(ci_high_scaled, 3)), ")")) %>%
    select(variable_fac, ci_raw, ci_sc, value) %>% 
    gt::gt() %>%
    cols_align(columns = everything(), align = "left") %>% 
    as.data.frame() %>% 
    kableExtra::kbl("latex", booktabs = TRUE)


steps_res %>% 
  left_join(var_labels, by = c("term" = "names")) %>% 
  mutate(type = "raw") %>% 
  bind_rows(steps_res_scaled %>% mutate(type = "scaled")) %>%
  filter(grepl("steps", term)) %>% 
  mutate(term2 = factor(term),
         term2 = fct_reorder(term2, hr, .desc = TRUE),
         labels = factor(labels),
         labels = fct_reorder(labels, hr, .desc = TRUE)) %>% 
  ggplot(aes(color = term, shape = type, linetype = type)) +
  geom_point(aes(y = labels, x = hr), size = 2)+
  geom_errorbarh(aes(xmin = ci_low, xmax = ci_high, y = labels), linewidth = 1.1,
                 height = .4)+
  theme_bw() + 
  scale_color_manual(values = col_vec)+
  theme(legend.position = "none",
        axis.text = element_text(size = 12),
        axis.title = element_text(size = 14)) + 
  scale_x_continuous(limits=c(0.40, 1), breaks=seq(0.35, 1, .05))+
  geom_vline(aes(xintercept = 1), linetype = "dashed")+
  labs(x = "Hazard Ratio", y = "")+
  geom_label(data = sds %>% filter(grepl("steps", name)), aes(x = 0.40, y = labels, label = paste0("SD=", value)), inherit.aes = F, hjust = 0)

Tables

Table 1: Demographics

Code
# add labels 

name_vec = colnames(df_accel)
labs = c("SEQN", "Data release cycle",
         "Interview Examination Status",
         "Sex", "Age (yrs)", "Age (mos)", "Race/ethnicity",
         "Six month time period",
         "Educ. level adults", "Marital status",
         "2 yr int weight", "2 yr exam weight", "Pseudo PSU",
         "Psueudo stratum", 
         "Annual HH income", 
         "Weight (kg)", "Height (cm)", "BMI (kg/m2)", 
         "Overweight", "Diabetes orig.", "Diabetes", "Arthritis", 
         "Coronary Heart Failure", "Congenital Heart Disease", "Angina", 
         "Heart attack", "Stroke", "Cancer", colnames(df_all)[29:33],
         "Alcohol use", "BMI Category", "Smoking status", "Mobility problem", "General health condition",
         "Eligbility", "Died by 5 years follow up", "COD", "COD Diabetes", "COD Hypertension", 
         "Person-months follow up from interview", "Person-months follow up from exam", "Eligibility category", "COD category",  "AC", "MIMS",
                     "Actilife steps", "ADEPT steps",
                    "log10 AC", "log10 MIMS", "Oak steps",
                     "Sc. RF steps", "Sc. SSL steps",
                    "Verisense rev. steps", "Verisense steps",
                    "Peak1 AC", "Peak1 MIMS",
                     "Peak1 Actilife steps", "Peak1 ADEPT steps",
                    "Peak1 log10 AC", "Peak1 log10 MIMS", "Peak1 Oak steps",
                     "Peak1 Sc. RF steps", "Peak1 Sc. SSL steps",
                    "Peak1 Verisense rev. steps", "Peak1 Verisense steps",
                     "Peak30 AC", "Peak30 MIMS",
                     "Peak30 Actilife steps", "Peak30 ADEPT steps",
                    "Peak30 log10 AC", "Peak30 log10 MIMS", "Peak30 Oak steps",
                     "Peak30 Sc. RF steps", "Peak30 Sc. SSL steps",
                    "Peak30 Verisense rev. steps", "Peak30 Verisense steps", "No. valid days", "Received accelerometer", 
         "Valid accelerometry", "Inclusion category", "Education level")



names(labs) = name_vec

df_all = 
  df_accel %>% 
  labelled::set_variable_labels(!!!labs)
# survey weighted table 
# question about this!! 
df_svy =
  df_all %>% 
  filter(has_accel) %>% 
  filter(valid_accel) %>% ### do we want to do this? 
  select(
    gender,
    age_in_years_at_screening,
    race_hispanic_origin,
    cat_education,
    cat_bmi,
    bin_diabetes,
    chf,
    chd,
    stroke,
    cat_alcohol,
    cat_smoke,
    bin_mobilityproblem,
    general_health_condition,
    mortstat,
    data_release_cycle,
    masked_variance_pseudo_psu, masked_variance_pseudo_stratum,
    full_sample_2_year_mec_exam_weight
  )  %>%
  mutate(WTMEC4YR = full_sample_2_year_mec_exam_weight/2,
         WTMEC4YR_norm = WTMEC4YR/mean(WTMEC4YR, na.rm = TRUE)) %>%
  select(-full_sample_2_year_mec_exam_weight) %>%
  svydesign(ids = ~masked_variance_pseudo_psu, weights = ~WTMEC4YR_norm, 
            strata = ~masked_variance_pseudo_stratum, data=., nest=TRUE)

df_svy %>%
  tbl_svysummary(
    by = data_release_cycle,
    include = -c(masked_variance_pseudo_psu, masked_variance_pseudo_stratum, WTMEC4YR,
                 WTMEC4YR_norm),
    statistic = list(
      all_continuous() ~ "{mean} ({sd})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 2,
    missing_text = "Missing",
  ) %>%
  add_overall() %>% 
  modify_caption("Demographic Characteristics, All Adults")
Demographic Characteristics, All Adults
Characteristic Overall
N = 8,6641
7
N = 4,3671
8
N = 4,2971
Sex


    Female 4,552 (53%) 2,281 (52%) 2,271 (53%)
    Male 4,112 (47%) 2,086 (48%) 2,027 (47%)
Age (yrs) 48.01 (17.41) 47.99 (17.29) 48.04 (17.53)
Race/ethnicity


    Non-Hispanic White 5,785 (67%) 2,918 (67%) 2,867 (67%)
    Non-Hispanic Black 979 (11%) 505 (12%) 475 (11%)
    Other Race - Including Multi-Rac 640 (7.4%) 312 (7.2%) 327 (7.6%)
    Mexican American 736 (8.5%) 344 (7.9%) 393 (9.1%)
    Other Hispanic 524 (6.0%) 288 (6.6%) 236 (5.5%)
Education level


    More than HS 5,279 (63%) 2,664 (63%) 2,615 (63%)
    Less than HS 1,330 (16%) 703 (17%) 627 (15%)
    HS/HS equivalent 1,788 (21%) 877 (21%) 911 (22%)
    Missing 267 123 144
BMI Category


    Normal 2,436 (28%) 1,258 (29%) 1,178 (28%)
    Obese 3,176 (37%) 1,538 (36%) 1,639 (38%)
    Overweight 2,825 (33%) 1,445 (33%) 1,380 (32%)
    Underweight 156 (1.8%) 84 (2.0%) 71 (1.7%)
    Missing 70 41 29
Diabetes 887 (10%) 430 (9.9%) 457 (11%)
    Missing 2 0 2
Coronary Heart Failure 247 (2.9%) 135 (3.2%) 113 (2.7%)
    Missing 270 127 143
Congenital Heart Disease 316 (3.8%) 145 (3.4%) 171 (4.1%)
    Missing 283 133 150
Stroke 263 (3.1%) 140 (3.3%) 124 (3.0%)
    Missing 267 124 143
Alcohol use


    Never drinker 1,057 (12%) 477 (11%) 581 (14%)
    Former drinker 1,233 (14%) 619 (14%) 614 (14%)
    Moderate drinker 2,657 (31%) 1,401 (32%) 1,256 (29%)
    Heavy drinker 669 (7.7%) 374 (8.6%) 296 (6.9%)
    Missing alcohol 3,048 (35%) 1,496 (34%) 1,552 (36%)
Smoking status


    Never smoker 4,820 (56%) 2,351 (55%) 2,470 (57%)
    Former smoker 2,087 (24%) 1,071 (25%) 1,016 (24%)
    Current smoker 1,630 (19%) 820 (19%) 810 (19%)
    Missing 127 126 1
Mobility problem 1,411 (17%) 638 (15%) 773 (19%)
    Missing 270 123 146
General health condition


    Poor 232 (2.7%) 106 (2.4%) 126 (2.9%)
    Fair 1,323 (15%) 612 (14%) 711 (17%)
    Good 3,408 (39%) 1,670 (38%) 1,738 (40%)
    Very good 2,742 (32%) 1,442 (33%) 1,300 (30%)
    Excellent 959 (11%) 536 (12%) 423 (9.8%)
Died by 5 years follow up 648 (7.5%) 354 (8.1%) 294 (6.8%)
    Missing 15 12 4
1 n (%); Mean (SD)
Code
df_svy =
  df_all %>%
  filter(has_accel) %>%
  filter(valid_accel) %>%
  mutate(
    mortstat_fac = factor(
      mortstat,
      levels = c(0, 1),
      labels = c("Alive", "Deceased")
    ),
    svy_year_fac = factor(
      data_release_cycle,
      levels = c(7, 8),
      labels = c("2011-2012", "2013-2014")
    ),
    WTMEC4YR = full_sample_2_year_mec_exam_weight / 2,
    WTMEC4YR_norm = WTMEC4YR / mean(WTMEC4YR)
  ) %>%
  svydesign(
    ids = ~ masked_variance_pseudo_psu,
    weights = ~ WTMEC4YR_norm,
    strata = ~ masked_variance_pseudo_stratum,
    data = .,
    nest = TRUE
  )
vars_table1 = c("age_years_interview","gender","race","BMI_cat","education_adult",
                 "overall_health_combined","diabetes","heart_attack","CHF","CHD","stroke","cancer",
                 "mobility_problem", "alcohol_consumption_fac","cigarette_smoking","TMIMS_mean","TAT_mean","TMVT_mean","ASTP_mean","RA_MIMS_mean",
                 "mortstat_fac")

vars_table1 = c( "gender",
   "age_in_years_at_screening",
    "race_hispanic_origin",
    "cat_education",
    "cat_bmi",
    "bin_diabetes",
    "chf",
    "chd",
    "stroke",
    "cat_alcohol",
    "cat_smoke",
    "bin_mobilityproblem",
    "general_health_condition",
    "mortstat_fac")
  
table_1 = tableone::svyCreateTableOne(vars=vars_table1, strata="svy_year_fac", test=TRUE, data=df_svy, addOverall = TRUE)

Table 2: Physical Activity

Option 1: by wave and age

Code
# with PA

df_adult = df_accel %>% 
  filter(age_in_years_at_screening >= 18) %>%
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>%
  mutate(group = "All adults")

df_mort = df_accel %>% 
  filter(age_in_years_at_screening >= 50) %>%
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>%
  mutate(group = "50-80y.o") 


df = bind_rows(df_adult, df_mort) 

# df = 
#   df %>% 
#     labelled::set_variable_labels(!!!labs)

df = df %>% 
  rename(Verisense = "total_vssteps",
         "Verisense rev" = "total_vsrevsteps",
         Oak = "total_oaksteps",
         ADEPT = "total_adeptsteps",
         SCRF =  "total_scrfsteps",
         SCSSL =  "total_scsslsteps",
         MIMS = "total_PAXMTSM",
         Actilife = "total_actisteps",
         log10MIMS = "total_log10PAXMTSM",
         log10AC = "total_log10AC",
         AC = "total_AC") %>% 
  select(!contains("peak"))

df_analysis_svy = survey::svydesign(
  id = ~ masked_variance_pseudo_psu,
  strata = ~ masked_variance_pseudo_stratum,
  weights = ~ weight_norm,
  data = df,
  nest = TRUE
)


df_analysis_svy %>% 
  tbl_strata(
    strata = data_release_cycle,
    .tbl_fun =
      ~ .x %>%
      tbl_svysummary(.,
                     include = c(contains("Verisense"), contains("AC", ignore.case = F),
                                 contains("Oak"), contains("ADEPT"), contains("SCRF"), contains("SCSSL"), contains("MIMS"), contains("actilife")),
                     by = group,
                     statistic = list(
                       all_continuous() ~ "{mean} ({sd})",
                       all_categorical() ~ "{n} ({p}%)"
                     )),
    .header = "**Wave {strata}**, N = {n}"
  ) %>% 
  modify_caption("Physical Activity Mean Totals Stratified by Age and Wave")
Physical Activity Mean Totals Stratified by Age and Wave
Characteristic
Wave 7, N = 6513
Wave 8, N = 6404
50-80y.o
N = 2,1461
All adults
N = 4,3671
50-80y.o
N = 2,1071
All adults
N = 4,2971
Verisense rev 7,532 (4,521) 9,102 (4,756) 7,122 (4,402) 8,725 (4,730)
Verisense 8,337 (4,027) 9,497 (4,062) 7,974 (4,019) 9,163 (4,065)
AC 2,333,927 (791,711) 2,573,262 (815,192) 2,291,942 (766,303) 2,549,846 (816,305)
log10AC 2,878 (383) 2,955 (366) 2,847 (394) 2,933 (370)
Oak 10,254 (5,027) 11,794 (5,061) 9,733 (4,886) 11,381 (5,065)
ADEPT 2,342 (1,593) 2,659 (1,569) 2,151 (1,680) 2,453 (1,575)
SCRF 9,888 (5,542) 11,509 (5,722) 9,502 (5,442) 11,263 (5,764)
SCSSL 8,358 (4,402) 9,144 (4,400) 8,027 (4,388) 8,846 (4,399)
log10MIMS 960 (158) 998 (154) 952 (160) 994 (155)
MIMS 12,435 (3,642) 13,572 (3,758) 12,243 (3,574) 13,467 (3,784)
Actilife 11,195 (4,001) 12,169 (3,995) 10,850 (4,008) 11,902 (4,048)
1 Mean (SD)
Code
df %>% 
  tbl_strata(
    strata = data_release_cycle,
    .tbl_fun =
      ~ .x %>%
      tbl_summary(.,
                     include = c(contains("Verisense"), contains("AC", ignore.case = F),
                                 contains("Oak"), contains("ADEPT"), contains("SCRF"), contains("SCSSL"), contains("MIMS"), contains("actilife")),
                     by = group,
                     statistic = list(
                       all_continuous() ~ "{mean} ({sd})",
                       all_categorical() ~ "{n} ({p}%)"
                     )),
    .header = "**Wave {strata}**, N = {n}"
  ) %>% 
  modify_caption("Physical Activity Mean Totals Stratified by Age and Wave - Unweighted")
Physical Activity Mean Totals Stratified by Age and Wave - Unweighted
Characteristic
Wave 7, N = 6410
Wave 8, N = 6507
50-80y.o
N = 2,1071
All adults
N = 4,3031
50-80y.o
N = 2,1461
All adults
N = 4,3611
Verisense rev 7,165 (4,746) 8,895 (4,967) 7,006 (4,582) 8,740 (4,951)
Verisense 8,074 (4,318) 9,359 (4,291) 7,920 (4,216) 9,204 (4,258)
AC 2,288,434 (832,666) 2,547,733 (850,056) 2,286,525 (807,467) 2,554,633 (851,047)
log10AC 2,874 (413) 2,958 (384) 2,858 (406) 2,942 (380)
Oak 9,791 (5,290) 11,532 (5,314) 9,610 (5,125) 11,393 (5,316)
ADEPT 2,152 (1,633) 2,538 (1,626) 2,070 (1,626) 2,426 (1,583)
SCRF 9,381 (5,807) 11,220 (5,987) 9,378 (5,720) 11,264 (6,059)
SCSSL 7,931 (4,639) 8,893 (4,599) 7,891 (4,603) 8,807 (4,610)
log10MIMS 956 (170) 997 (162) 954 (166) 996 (160)
MIMS 12,238 (3,849) 13,467 (3,923) 12,231 (3,757) 13,497 (3,939)
Actilife 10,971 (4,297) 12,057 (4,216) 10,841 (4,187) 11,953 (4,216)
1 Mean (SD)

Supplement: peak values

Code
df_adult = df_accel %>% 
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>%
  mutate(group = "All adults")

df_mort = df_mortality %>%
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>%
  mutate(group = "50-79y.o") 


df = bind_rows(df_adult, df_mort) 

df_analysis_svy = survey::svydesign(
  id = ~ masked_variance_pseudo_psu,
  strata = ~ masked_variance_pseudo_stratum,
  weights = ~ weight_norm,
  data = df,
  nest = TRUE
)
df_analysis_svy %>% 
  tbl_strata(
    strata = gender,
    .tbl_fun =
      ~ .x %>%
      tbl_svysummary(.,
                     include = contains("peak1"),
                     by = group,
                     statistic = list(
                       all_continuous() ~ "{mean} ({sd})",
                       all_categorical() ~ "{n} ({p}%)"
                     )),
    .header = "**{strata}**, N = {n}"
  ) %>% 
  modify_caption("Physical Activity Peak 1 Min Values Stratified by Age and Sex")
Physical Activity Peak 1 Min Values Stratified by Age and Sex
Characteristic
Female, N = 6552
Male, N = 5832
50-79y.o
N = 2,0001
All adults
N = 4,5521
50-79y.o
N = 1,7201
All adults
N = 4,1121
peak1_AC 12,348 (2,710) 13,379 (3,459) 12,056 (3,418) 13,383 (3,867)
peak1_actisteps 71 (18) 74 (18) 82 (18) 84 (17)
peak1_adeptsteps 63 (26) 66 (25) 68 (23) 70 (21)
peak1_log10AC 4.08 (0.08) 4.11 (0.10) 4.06 (0.11) 4.10 (0.11)
peak1_log10PAXMTSM 1.73 (0.08) 1.76 (0.10) 1.73 (0.10) 1.77 (0.12)
peak1_oaksteps 95 (18) 98 (18) 97 (15) 100 (14)
peak1_PAXMTSM 54 (11) 59 (16) 55 (16) 62 (20)
peak1_scrfsteps 102 (14) 104 (14) 101 (11) 104 (11)
peak1_scsslsteps 101 (17) 103 (17) 101 (15) 104 (15)
peak1_vsrevsteps 87 (20) 91 (20) 91 (18) 95 (17)
peak1_vssteps 82 (20) 84 (19) 86 (17) 88 (16)
1 Mean (SD)
Code
df_analysis_svy %>% 
  tbl_strata(
    strata = gender,
    .tbl_fun =
      ~ .x %>%
      tbl_svysummary(.,
                     include = contains("peak30"),
                     by = group,
                     statistic = list(
                       all_continuous() ~ "{mean} ({sd})",
                       all_categorical() ~ "{n} ({p}%)"
                     )),
    .header = "**{strata}**, N = {n}"
  ) %>% 
  modify_caption("Physical Activity Peak 30 Min Values Stratified by Age and Sex")
Physical Activity Peak 30 Min Values Stratified by Age and Sex
Characteristic
Female, N = 6552
Male, N = 5832
50-79y.o
N = 2,0001
All adults
N = 4,5521
50-79y.o
N = 1,7201
All adults
N = 4,1121
peak30_AC 8,957 (1,693) 9,618 (2,216) 8,424 (2,111) 9,255 (2,381)
peak30_actisteps 47 (13) 49 (13) 56 (16) 58 (15)
peak30_adeptsteps 31 (19) 32 (18) 37 (18) 38 (17)
peak30_log10AC 3.94 (0.08) 3.96 (0.10) 3.91 (0.10) 3.94 (0.10)
peak30_log10PAXMTSM 1.60 (0.07) 1.63 (0.09) 1.58 (0.09) 1.62 (0.10)
peak30_oaksteps 66 (18) 69 (18) 71 (18) 74 (17)
peak30_PAXMTSM 40 (7) 43 (10) 39 (10) 43 (12)
peak30_scrfsteps 79 (18) 82 (18) 82 (16) 85 (15)
peak30_scsslsteps 76 (20) 79 (20) 80 (18) 84 (18)
peak30_vsrevsteps 58 (19) 62 (20) 64 (20) 68 (19)
peak30_vssteps 53 (17) 55 (17) 59 (18) 61 (16)
1 Mean (SD)

Single variable mortality prediction

Table 3 or supplement since duplicative of figure

Code
vlabs = c(
      "Age (yrs)",
      "Diabetes",
      "Mobility problem",
      "Cancer",
      "Alcohol use",
      "BMI Category",
      "Education category",
      "Smoking status",
      "Congenital Heart Disease",
      "Coronary Heart Failure",
      "Gender",
      "General health condition",
      "Heart attack",
      "Peak1 AC",
      "Peak1 Actilife steps",
      "Peak1 ADEPT steps",
      "Peak1 log10 AC",
      "Peak1 log10 MIMS",
      "Peak1 Oak steps",
      "Peak1 MIMS",
      "Peak1 Stepcount RF steps",
      "Peak1 Stepcount SSL steps",
      "Peak1 Verisense rev. steps",
      "Peak1 Verisense steps",
       "Peak30 AC",
      "Peak30 Actilife steps",
      "Peak30 ADEPT steps",
      "Peak30 log10 AC",
      "Peak30 log10 MIMS",
      "Peak30 Oak steps",
      "Peak30 MIMS",
      "Peak30 Stepcount RF steps",
      "Peak30 Stepcount SSL steps",
      "Peak30 Verisense rev. steps",
      "Peak30 Verisense steps",
      "Race/ethnicity",
      "Stroke",
      "AC",
      "Actilife steps",
      "ADEPT steps",
      "log10 AC",
      "log10 MIMS",
      "Oak steps",
      "MIMS",
      "Stepcount RF steps",
      "Stepcount SSL steps",
      "Verisense rev. steps",
      "Verisense steps"
    )

 
data_summary = wt_single %>% 
  group_by(variable) %>% 
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  mutate(variable_fac = factor(
    variable,
    labels = vlabs
  )) %>% 
  filter(!grepl("peak", variable))

df_means =
  df_mortality_win %>%
  group_by(mortstat) %>%
  summarize(across(c(contains("total"), contains("peak")), ~ mean(.x))) %>%
  pivot_longer(cols = -mortstat) %>%
  pivot_wider(names_from = mortstat,
              values_from = value,
              names_prefix = 'died_') %>%
  rename(variable = name)

# get the best variable based on concodance
best_var = data_summary %>%
  arrange(desc(concordance_mean)) %>%
  slice(1) %>%
  pull(variable)

best_var_vec = wt_single %>%
  filter(variable == best_var) %>%
  pull(concordance)

t_tests = wt_single %>%
  group_by(variable) %>%
  filter(variable != best_var) %>%
  nest() %>%
  mutate(t_test = map(
    data,
    ~ t.test(
      .x$concordance,
      best_var_vec,
      var.eq = FALSE,
      paired = FALSE,
      alternative = "less"
    )
  ),
  res = map(t_test, tidy)) %>%
  unnest(res) %>%
  ungroup() %>%
  select(variable, p.value)

# join t tests with summary data and make table
data_summary %>%
  left_join(t_tests) %>%
  left_join(df_means) %>%
  arrange(desc(concordance_mean)) %>%
  mutate(
    variable_fac = forcats::fct_reorder(variable_fac, concordance_mean),
    lb = concordance_mean - 1.96 * concordance_se,
    ub = concordance_mean + 1.96 * concordance_se,
    ci = paste0(
      sprintf("%0.3f", round(concordance_mean, 3)),
      " (",
      sprintf("%0.3f", round(lb, 3)),
      ", ",
      sprintf("%0.3f", round(ub, 3)),
      ")"
    )
  ) %>%
  select(variable_fac,
         ci,
         p.value,
         total_alive = died_0,
         total_deceased = died_1) %>%
  mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
  mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
         across(starts_with("mean"), ~ if_else(is.na(.x), "---", as.character(round(
           .x, 0
         ))))) %>%
  rename(
    Variable = variable_fac,
    "Mean (95% CI)" = ci,
    "p-value" = pvalue,
    "Mean value among alive" = total_alive,
    "Mean value among deceased" = total_deceased
  ) %>%
  gt::gt() %>%
  gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>%
  gt::tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_body(columns = everything(), rows = p.value > 0.05 |
                             is.na(p.value))
  ) %>%
  gt::cols_hide(p.value) %>%
  tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance", subtitle = "") %>%
  tab_footnote(footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
               locations = cells_column_labels(columns = "p-value")) %>%
  cols_align(columns = everything(), align = "left")
100x 10-fold Cross-Validated Single Variable Concordance
Variable Mean (95% CI) Mean value among alive Mean value among deceased p-value1
Stepcount RF steps 0.732 (0.731, 0.733) 10,591 6,502 ---
ADEPT steps 0.725 (0.724, 0.726) 2,391 1,380 <0.001
Oak steps 0.723 (0.722, 0.724) 10,845 7,067 <0.001
Verisense rev. steps 0.720 (0.719, 0.721) 8,021 4,878 <0.001
Verisense steps 0.716 (0.715, 0.717) 8,885 5,885 <0.001
Actilife steps 0.710 (0.709, 0.711) 11,790 8,740 <0.001
Stepcount SSL steps 0.702 (0.701, 0.703) 8,786 5,784 <0.001
AC 0.688 (0.687, 0.689) 2,454,243 1,900,197 <0.001
MIMS 0.682 (0.681, 0.683) 13,002 10,438 <0.001
Mobility problem 0.675 (0.675, 0.676) NA NA <0.001
Age (yrs) 0.673 (0.672, 0.674) NA NA <0.001
General health condition 0.662 (0.662, 0.663) NA NA <0.001
log10 MIMS 0.647 (0.646, 0.648) 987 884 <0.001
log10 AC 0.630 (0.629, 0.631) 2,939 2,708 <0.001
Diabetes 0.594 (0.593, 0.594) NA NA <0.001
Smoking status 0.589 (0.588, 0.590) NA NA <0.001
Education category 0.572 (0.571, 0.573) NA NA <0.001
Alcohol use 0.551 (0.549, 0.553) NA NA <0.001
Congenital Heart Disease 0.551 (0.551, 0.552) NA NA <0.001
Coronary Heart Failure 0.549 (0.548, 0.549) NA NA <0.001
Gender 0.547 (0.546, 0.547) NA NA <0.001
Heart attack 0.546 (0.545, 0.546) NA NA <0.001
Cancer 0.539 (0.538, 0.540) NA NA <0.001
Stroke 0.533 (0.532, 0.533) NA NA <0.001
BMI Category 0.524 (0.523, 0.526) NA NA <0.001
Race/ethnicity 0.524 (0.523, 0.525) NA NA <0.001
1 p-value for one sided t-test comparing variable to variable with higest mean concordance
Code
  tb = data_summary %>%
    left_join(t_tests) %>%
    left_join(df_means) %>% 
    arrange(desc(concordance_mean)) %>% 
    mutate(variable_fac = forcats::fct_reorder(variable_fac, concordance_mean), 
          lb = concordance_mean - 1.96 * concordance_se,
           ub = concordance_mean + 1.96 * concordance_se,
           ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")")) %>%
    select(variable_fac, ci, p.value, total_alive = died_0, total_deceased = died_1) %>%
    mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
    mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
           across(starts_with("mean"), ~if_else(is.na(.x), "---", as.character(round(.x, 0))))) %>% 
    select(-p.value) %>% 
    rename(Variable = variable_fac,
           "Mean (95% CI)" = ci,
           "p-value" = pvalue,
           "Mean value among alive" = total_alive,
           "Mean value among deceased" = total_deceased) %>%
    gt::gt() %>%
    gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance") %>%
    tab_footnote(
      footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
      locations = cells_column_labels(columns = "p-value")
    ) %>%
    cols_align(columns = everything(), align = "left") %>% 
    as.data.frame() %>% 
    kableExtra::kbl("latex", booktabs = TRUE)

Multivariable mortality prediction - concordance and model summaries

Still trying to figure out how to present this

Code
wt_small = 
  wt_all %>% 
  filter(model %in% c("Demo only", "scrfsteps+PAXMTSM", "scrfsteps", "PAXMTSM"))

summary = 
  wt_small %>% 
  group_by(model) %>% 
  summarize(across(concordance, list(mean = ~mean(.x),
                                       se = ~sd(.x)/sqrt(n())))) %>% 
  mutate(model_fac = factor(model, labels = c("Demographics only",
                                              "Demographics + MIMS", 
                                              "Demographics + SCRF steps", 
                                              "Demographics + SCRF steps + MIMS")))

coef_scrf = 
  df_mortality_win %>%  
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         model = "scrfsteps")
  
coef_mims = 
  df_mortality_win %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_PAXMTSM + total_scrfsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         model = "scrfsteps+PAXMTSM")
  
coefs = bind_rows(coef_scrf, coef_mims)
summary %>% 
  left_join(coefs)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         hr_ci = case_when(!is.na(hr) ~ paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"),
                       TRUE ~ "---"),
         p.value = format.pval(p.value, digits = 3)) %>% 
  select(model_fac, hr_ci,p.value, conc_ci) %>% 
  rename(Model = model_fac,
         "Steps HR" = hr_ci,
         "Steps p-value" = p.value,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Multivariable Model Comparison") %>%
    cols_align(columns = everything(), align = "left")
Multivariable Model Comparison
Model Steps HR Steps p-value Model concordance
Demographics only --- NA 0.769 (0.769, 0.770)
Demographics + MIMS --- NA 0.773 (0.772, 0.774)
Demographics + SCRF steps 0.955 (0.940, 0.970) 5.85e-05 0.776 (0.775, 0.777)
Demographics + SCRF steps + MIMS 0.961 (0.939, 0.983) 0.0361 0.774 (0.773, 0.775)
Code
tb = 
  summary %>% 
  left_join(coefs)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         hr_ci = case_when(!is.na(hr) ~ paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"),
                       TRUE ~ "---"),
         p.value = format.pval(p.value, digits = 3)) %>% 
  select(model_fac, hr_ci,p.value, conc_ci) %>% 
  rename(Model = model_fac,
         "Steps HR" = hr_ci,
         "Steps p-value" = p.value,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Multivariable Model Comparison",
               subtitle = "Male") %>%
    cols_align(columns = everything(), align = "left") %>% 
  data.frame() %>%
  kableExtra::kbl(format = "latex")
Code
model_steps_vs = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_vsrevsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .)

model_steps_vs %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_mims = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_PAXMTSM + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_mims %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_vs_mims = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_vsrevsteps + total_PAXMTSM + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_vs_mims %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_vs_ac = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_vsrevsteps + total_AC + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_vs_ac %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 
Code
model_steps_sc = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .)

model_steps_sc %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_mims = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_PAXMTSM + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_mims %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_sc_mims = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + total_PAXMTSM + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_sc_mims %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_sc_ac = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + total_AC + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_sc_ac %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

With step cadence

Code
wt_female_cad = readRDS(here::here("results", "metrics_wtd_100_female_cadence.rds"))


wt_female_small = 
  wt_female_cad %>% 
  filter(model %in% c("scrfsteps", "scrfsteps+peak1_scrfsteps",
                      "peak1_scrfsteps"))

summary = 
  wt_female_small %>% 
  group_by(model) %>% 
  summarize(across(concordance, list(mean = ~mean(.x),
                                       se = ~sd(.x)/sqrt(n())))) %>% 
  mutate(model_fac = factor(model, labels = c("Demographics + Peak1 Cadence", 
                                                "Demographics + Total Steps", 
                                                "Demographics + Steps + Peak1 Cadence")))

coef_steps = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         step_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "scrfsteps") %>% 
  rename(step_p = p.value) %>% 
  select(model, step_ci, step_p)

  
coef_cad = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate *10), 
         lb = exp((estimate - 1.96 * std.error)*10 ),
         ub = exp((estimate + 1.96 * std.error)*10),
         cad_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "peak1_scrfsteps") %>% 
  rename(cad_p = p.value) %>% 
  select(model, cad_ci, cad_p)


coef_both = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ peak1_scrfsteps + total_scrfsteps +
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term))
  
coef_cad_steps = 
  coef_both %>% 
  filter(grepl("peak", term)) %>% 
  mutate(hr = exp(estimate*10), 
         lb = exp((estimate - 1.96 * std.error)*10 ),
         ub = exp((estimate + 1.96 * std.error)*10),
         cad_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "scrfsteps+peak1_scrfsteps") %>% 
  rename(cad_p = p.value)

coef_cad_steps2 = 
  coef_both %>% 
  filter(!grepl("peak", term)) %>% 
  mutate(hr = exp(estimate *500), 
         lb = exp((estimate - 1.96 * std.error) *500),
         ub = exp((estimate + 1.96 * std.error)*500 ),
         step_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "scrfsteps+peak1_scrfsteps") %>% 
  rename(step_p = p.value)

coef2 = coef_cad_steps %>% 
  left_join(coef_cad_steps2, by = "model") %>% 
  select(model, cad_ci, cad_p, step_ci, step_p) %>% 
  bind_rows(coef_steps) %>% 
  bind_rows(coef_cad)


summary %>% 
  left_join(coef2)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         across(contains("_p"), ~format.pval(.x, digits = 3))) %>% 
  select(model_fac, step_ci, step_p, cad_ci, cad_p, conc_ci)   %>% 
  rename(Model = model_fac,
         "Steps HR" = step_ci,
         "Steps p-value" = step_p,
         "Cadence HR" = cad_ci,
         "Cadence p-value" = cad_p,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Comparison of Addition of Cadence Variables",
               subtitle = "Female") %>%
    cols_align(columns = everything(), align = "left")
Code
wt_male_cad = readRDS(here::here("results", "metrics_wtd_100_male_cadence.rds"))
make_multivar_tab(wt_male_cad, subt = "Male")

model_steps_sc_cad = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_vsrevsteps + peak1_vsrevsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_sc_cad %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("revised", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_sc_cad = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_vsrevsteps + peak30_vsrevsteps +
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 


model_steps_sc_cad %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("revised", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 
Code
wt_female_cad = readRDS(here::here("results", "metrics_wtd_100_female_cadence.rds"))
make_multivar_tab(wt_female_cad, subt = "Female")

model_steps_sc_cad = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + peak1_scrfsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_sc_cad %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("scrf", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_sc_cad = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + peak30_scrfsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 


model_steps_sc_cad %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("scrf", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

Sensivity analysis

Use anyone with at least one valid day, compute results

Single variable

Code
sens_single = readRDS(here::here("results", "metrics_wtd_100_singlevar_sens.rds"))  %>% 
  filter(!grepl("peak", variable))


df_mortality_sens =
  df_all %>%
  filter(num_valid_days > 0) %>%
  filter(age_in_years_at_screening >= 50 &
           age_in_years_at_screening < 80)  %>% 
  filter(if_all(.cols = c(age_in_years_at_screening, gender,
                          race_hispanic_origin, cat_education,
                          cat_bmi, chd, chf, heartattack, stroke, cancer,
                          bin_diabetes, cat_alcohol, cat_smoke, bin_mobilityproblem,
                          general_health_condition, mortstat, permth_exm, total_PAXMTSM),
                ~!is.na(.x))) %>%
  mutate(event_time = permth_exm / 12) %>% 
  ungroup() %>%
  mutate(across(c(contains("total"), contains("peak")), ~DescTools::Winsorize(.x, quantile(.x, probs = c(0, 0.99))))) 

data_summary = sens_single %>%
  group_by(variable) %>%
  mutate(ind = row_number()) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  mutate(variable_fac = factor(
    variable
    # labels = vlabs
  ))

df_means =
  df_mortality_sens %>%
  group_by(mortstat) %>%
  summarize(across(c(contains("total"), contains("peak")), ~ mean(.x))) %>%
  pivot_longer(cols = -mortstat) %>%
  pivot_wider(names_from = mortstat,
              values_from = value,
              names_prefix = 'died_') %>%
  rename(variable = name)

# get the best variable based on concodance
best_var = data_summary %>%
  arrange(desc(concordance_mean)) %>%
  slice(1) %>%
  pull(variable)

best_var_vec = sens_single %>%
  filter(variable == best_var) %>%
  pull(concordance)

t_tests = sens_single %>%
  group_by(variable) %>%
  filter(variable != best_var) %>%
  nest() %>%
  mutate(t_test = map(
    data,
    ~ t.test(
      .x$concordance,
      best_var_vec,
      var.eq = FALSE,
      paired = FALSE,
      alternative = "less"
    )
  ),
  res = map(t_test, tidy)) %>%
  unnest(res) %>%
  ungroup() %>%
  select(variable, p.value)

# join t tests with summary data and make table
data_summary %>%
  left_join(t_tests) %>%
  left_join(df_means) %>%
  arrange(desc(concordance_mean)) %>%
  mutate(
    variable_fac = forcats::fct_reorder(variable_fac, concordance_mean),
    lb = concordance_mean - 1.96 * concordance_se,
    ub = concordance_mean + 1.96 * concordance_se,
    ci = paste0(
      sprintf("%0.3f", round(concordance_mean, 3)),
      " (",
      sprintf("%0.3f", round(lb, 3)),
      ", ",
      sprintf("%0.3f", round(ub, 3)),
      ")"
    )
  ) %>%
  select(variable_fac,
         ci,
         p.value,
         total_alive = died_0,
         total_deceased = died_1) %>%
  mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
  mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
         across(starts_with("mean"), ~ if_else(is.na(.x), "---", as.character(round(
           .x, 0
         ))))) %>%
  rename(
    Variable = variable_fac,
    "Mean (95% CI)" = ci,
    "p-value" = pvalue,
    "Mean value among alive" = total_alive,
    "Mean value among deceased" = total_deceased
  ) %>%
  gt::gt() %>%
  gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>%
  gt::tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_body(columns = everything(), rows = p.value > 0.05 |
                             is.na(p.value))
  ) %>%
  gt::cols_hide(p.value) %>%
  tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance") %>%
  tab_footnote(footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
               locations = cells_column_labels(columns = "p-value")) %>%
  cols_align(columns = everything(), align = "left")
100x 10-fold Cross-Validated Single Variable Concordance
Variable Mean (95% CI) Mean value among alive Mean value among deceased p-value1
total_scrfsteps 0.732 (0.728, 0.736) 10,591 6,502 ---
total_adeptsteps 0.724 (0.720, 0.728) 2,391 1,380 0.002
total_oaksteps 0.723 (0.720, 0.727) 10,845 7,067 <0.001
total_vsrevsteps 0.719 (0.715, 0.723) 8,021 4,878 <0.001
total_vssteps 0.716 (0.712, 0.720) 8,885 5,885 <0.001
total_actisteps 0.711 (0.707, 0.714) 11,790 8,740 <0.001
total_scsslsteps 0.703 (0.699, 0.707) 8,786 5,784 <0.001
total_AC 0.689 (0.685, 0.692) 2,454,243 1,900,197 <0.001
total_PAXMTSM 0.683 (0.680, 0.687) 13,002 10,438 <0.001
bin_mobilityproblem 0.675 (0.672, 0.678) NA NA <0.001
age_in_years_at_screening 0.672 (0.668, 0.675) NA NA <0.001
general_health_condition 0.664 (0.661, 0.668) NA NA <0.001
total_log10PAXMTSM 0.650 (0.646, 0.654) 987 884 <0.001
total_log10AC 0.633 (0.629, 0.637) 2,939 2,708 <0.001
bin_diabetes 0.591 (0.588, 0.594) NA NA <0.001
cat_smoke 0.587 (0.584, 0.591) NA NA <0.001
cat_education 0.574 (0.571, 0.577) NA NA <0.001
chd 0.551 (0.549, 0.553) NA NA <0.001
cat_alcohol 0.550 (0.546, 0.554) NA NA <0.001
chf 0.549 (0.547, 0.551) NA NA <0.001
gender 0.546 (0.542, 0.549) NA NA <0.001
heartattack 0.546 (0.544, 0.547) NA NA <0.001
cancer 0.538 (0.536, 0.541) NA NA <0.001
stroke 0.532 (0.530, 0.533) NA NA <0.001
race_hispanic_origin 0.526 (0.525, 0.528) NA NA <0.001
cat_bmi 0.519 (0.515, 0.522) NA NA <0.001
1 p-value for one sided t-test comparing variable to variable with higest mean concordance
Code
sens_male_single = readRDS(here::here("results", "metrics_wtd_100_singlevar_male_sens.rds"))  %>% 
  filter(!grepl("peak", variable))
sens_female_single = readRDS(here::here("results", "metrics_wtd_100_singlevar_female_sens.rds")) %>% 
  filter(!grepl("peak", variable))

df_mortality_sens =
  df_all %>%
  filter(num_valid_days > 0) %>%
  filter(age_in_years_at_screening >= 50 &
           age_in_years_at_screening < 80)  %>% 
  filter(if_all(.cols = c(age_in_years_at_screening, gender,
                          race_hispanic_origin, cat_education,
                          cat_bmi, chd, chf, heartattack, stroke, cancer,
                          bin_diabetes, cat_alcohol, cat_smoke, bin_mobilityproblem,
                          general_health_condition, mortstat, permth_exm, total_PAXMTSM),
                ~!is.na(.x))) %>%
  mutate(event_time = permth_exm / 12) %>% 
  ungroup() %>%
  mutate(across(c(contains("total"), contains("peak")), ~DescTools::Winsorize(.x, quantile(.x, probs = c(0, 0.99))))) 

data_summary = sens_male_single %>%
  group_by(variable) %>%
  mutate(ind = row_number()) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  mutate(variable_fac = factor(
    variable
    # labels = vlabs
  ))

df_means =
  df_mortality_sens %>%
  filter(gender == "Male") %>%
  group_by(mortstat) %>%
  summarize(across(c(contains("total"), contains("peak")), ~ mean(.x))) %>%
  pivot_longer(cols = -mortstat) %>%
  pivot_wider(names_from = mortstat,
              values_from = value,
              names_prefix = 'died_') %>%
  rename(variable = name)

# get the best variable based on concodance
best_var = data_summary %>%
  arrange(desc(concordance_mean)) %>%
  slice(1) %>%
  pull(variable)

best_var_vec = sens_male_single %>%
  filter(variable == best_var) %>%
  pull(concordance)

t_tests = sens_male_single %>%
  group_by(variable) %>%
  filter(variable != best_var) %>%
  nest() %>%
  mutate(t_test = map(
    data,
    ~ t.test(
      .x$concordance,
      best_var_vec,
      var.eq = FALSE,
      paired = FALSE,
      alternative = "less"
    )
  ),
  res = map(t_test, tidy)) %>%
  unnest(res) %>%
  ungroup() %>%
  select(variable, p.value)

# join t tests with summary data and make table
data_summary %>%
  left_join(t_tests) %>%
  left_join(df_means) %>%
  arrange(desc(concordance_mean)) %>%
  mutate(
    variable_fac = forcats::fct_reorder(variable_fac, concordance_mean),
    lb = concordance_mean - 1.96 * concordance_se,
    ub = concordance_mean + 1.96 * concordance_se,
    ci = paste0(
      sprintf("%0.3f", round(concordance_mean, 3)),
      " (",
      sprintf("%0.3f", round(lb, 3)),
      ", ",
      sprintf("%0.3f", round(ub, 3)),
      ")"
    )
  ) %>%
  select(variable_fac,
         ci,
         p.value,
         total_alive = died_0,
         total_deceased = died_1) %>%
  mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
  mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
         across(starts_with("mean"), ~ if_else(is.na(.x), "---", as.character(round(
           .x, 0
         ))))) %>%
  rename(
    Variable = variable_fac,
    "Mean (95% CI)" = ci,
    "p-value" = pvalue,
    "Mean value among alive" = total_alive,
    "Mean value among deceased" = total_deceased
  ) %>%
  gt::gt() %>%
  gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>%
  gt::tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_body(columns = everything(), rows = p.value > 0.05 |
                             is.na(p.value))
  ) %>%
  gt::cols_hide(p.value) %>%
  tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance", subtitle = "Male") %>%
  tab_footnote(footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
               locations = cells_column_labels(columns = "p-value")) %>%
  cols_align(columns = everything(), align = "left")




data_summary = sens_female_single %>%
  group_by(variable) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  mutate(variable_fac = factor(
    variable
    # labels = vlabs
    ))

df_means =
  df_mortality_sens %>%
  filter(gender == "Female") %>%
  group_by(mortstat) %>%
  summarize(across(c(contains("total"), contains("peak")), ~ mean(.x))) %>%
  pivot_longer(cols = -mortstat) %>%
  pivot_wider(names_from = mortstat,
              values_from = value,
              names_prefix = 'died_') %>%
  rename(variable = name)

# get the best variable based on concodance
best_var = data_summary %>%
  arrange(desc(concordance_mean)) %>%
  slice(1) %>%
  pull(variable)

best_var_vec = sens_female_single %>%
  filter(variable == best_var) %>%
  pull(concordance)

t_tests = sens_female_single%>%
  group_by(variable) %>%
  filter(variable != best_var) %>%
  nest() %>%
  mutate(t_test = map(
    data,
    ~ t.test(
      .x$concordance,
      best_var_vec,
      var.eq = FALSE,
      paired = FALSE,
      alternative = "less"
    )
  ),
  res = map(t_test, tidy)) %>%
  unnest(res) %>%
  ungroup() %>%
  select(variable, p.value)

# join t tests with summary data and make table
data_summary %>%
  left_join(t_tests) %>%
  left_join(df_means) %>%
  arrange(desc(concordance_mean)) %>%
  mutate(
    variable_fac = forcats::fct_reorder(variable_fac, concordance_mean),
    lb = concordance_mean - 1.96 * concordance_se,
    ub = concordance_mean + 1.96 * concordance_se,
    ci = paste0(
      sprintf("%0.3f", round(concordance_mean, 3)),
      " (",
      sprintf("%0.3f", round(lb, 3)),
      ", ",
      sprintf("%0.3f", round(ub, 3)),
      ")"
    )
  ) %>%
  select(variable_fac,
         ci,
         p.value,
         total_alive = died_0,
         total_deceased = died_1) %>%
  mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
  mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
         across(starts_with("mean"), ~ if_else(is.na(.x), "---", as.character(round(
           .x, 0
         ))))) %>%
  rename(
    Variable = variable_fac,
    "Mean (95% CI)" = ci,
    "p-value" = pvalue,
    "Mean value among alive" = total_alive,
    "Mean value among deceased" = total_deceased
  ) %>%
  gt::gt() %>%
  gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>%
  gt::tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_body(columns = everything(), rows = p.value > 0.05 |
                             is.na(p.value))
  ) %>%
  gt::cols_hide(p.value) %>%
  tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance", subtitle = "Female") %>%
  tab_footnote(footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
               locations = cells_column_labels(columns = "p-value")) %>%
  cols_align(columns = everything(), align = "left")

Multivariable

Code
sens_all = readRDS(here::here("results", "metrics_wtd_100_sens.rds"))
wt_small = 
  sens_all %>% 
  filter(model %in% c("Demo only", "scrfsteps+PAXMTSM", "scrfsteps", "PAXMTSM"))

summary = 
  wt_small %>% 
  group_by(model) %>% 
  summarize(across(concordance, list(mean = ~mean(.x),
                                       se = ~sd(.x)/sqrt(n())))) %>% 
  mutate(model_fac = factor(model, labels = c("Demographics only",
                                              "Demographics + MIMS", 
                                              "Demographics + Stepcount RF",
                                              "Demographics + Stepcount RF + MIMS")))

coef_scrf = 
  df_mortality_sens %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + gender + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         model = "scrfsteps")
  
coef_mims = 
  df_mortality_sens %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_PAXMTSM + total_scrfsteps + gender + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         model = "scrfsteps+PAXMTSM")
  
coefs = bind_rows(coef_scrf, coef_mims)
summary %>% 
  left_join(coefs)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         hr_ci = case_when(!is.na(hr) ~ paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"),
                       TRUE ~ "---"),
         p.value = format.pval(p.value, digits = 3)) %>% 
  select(model_fac, hr_ci,p.value, conc_ci) %>% 
  rename(Model = model_fac,
         "Steps HR" = hr_ci,
         "Steps p-value" = p.value,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Multivariable Model Comparison") %>%
    cols_align(columns = everything(), align = "left")
Multivariable Model Comparison
Model Steps HR Steps p-value Model concordance
Demographics only --- NA 0.768 (0.765, 0.771)
Demographics + MIMS --- NA 0.773 (0.770, 0.776)
Demographics + Stepcount RF 0.952 (0.937, 0.967) 1.94e-05 0.776 (0.773, 0.779)
Demographics + Stepcount RF + MIMS 0.943 (0.920, 0.966) 0.00218 0.775 (0.771, 0.778)
Code
tb = 
  summary %>% 
  left_join(coefs)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         hr_ci = case_when(!is.na(hr) ~ paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"),
                       TRUE ~ "---"),
         p.value = format.pval(p.value, digits = 3)) %>% 
  select(model_fac, hr_ci,p.value, conc_ci) %>% 
  rename(Model = model_fac,
         "Steps HR" = hr_ci,
         "Steps p-value" = p.value,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Multivariable Model Comparison",
               subtitle = "Female") %>%
    cols_align(columns = everything(), align = "left") %>% 
  data.frame() %>% 
  kableExtra::kbl(format = "latex")
Code
sens_male = readRDS(here::here("results", "metrics_wtd_100_male_sens.rds"))  
sens_female = readRDS(here::here("results", "metrics_wtd_100_female_sens.rds"))  

sens_male %>%
  group_by(model) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  arrange(desc(concordance_mean))

sens_female %>%
  group_by(model) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  arrange(desc(concordance_mean))