Data Download Vignette

The following code can be used to download minute-level step counts and other data from NHANES 2011-2014 from our Physionet repository

if(!dir.exists(here::here("data", "accelerometry", "minute_level"))) {
  dir.create(here::here("data", "accelerometry", "minute_level"),
             recursive = TRUE)
}

download_file = function(name) {
  out_file =
    here::here("data",
               "accelerometry",
               "minute_level",
               paste0("nhanes_1440_", name, ".csv.xz"))
  if (!file.exists(out_file)) {
    url = paste0(
      "https://physionet.org/files/minute-level-step-count-nhanes/1.0.0/nhanes_1440_",
      name,
      ".csv.xz"
    )
    system(paste(
      "wget -q --show-progress -O",
      shQuote(out_file),
      shQuote(url)
    ),
    wait = TRUE)
    
  }
}

file_names = c(
  "AC",
  "PAXFLGSM",
  "PAXPREDM",
  "actisteps",
  "adeptsteps",
  "log10AC",
  "log10PAXMTSM",
  "oaksteps",
  "scrfsteps",
  "scsslsteps",
  "vsrevsteps",
  "vssteps"
)
purrr::walk(.x = file_names, .f = download_file)

We can read in one file to get a sense of the structure. There are columns for ID, day, weekday, and each minute of the day (min_1 to min_1440). Missingness indicates that either:

  1. data were missing for that minute (i.e. wear hadn’t started yet)
  2. NHANES gave a wear flag for that minute, or
  3. the NHANES prediction for that minute was nonwear
sc_steps = read_csv(
  here::here(
    "data",
    "accelerometry",
    "minute_level",
    "nhanes_1440_scsslsteps.csv.xz"
  )
)

sc_steps %>%
  select(
    SEQN,
    PAXDAYM,
    PAXDAYWM,
    min_1,
    min_100,
    min_200,
    min_300,
    min_400,
    min_600,
    min_800,
    min_1000
  ) %>%
  glimpse()
Rows: 130,186
Columns: 11
$ SEQN     <dbl> 73557, 73557, 73557, 73557, 73557, 73557, 73557, 73557, 73557…
$ PAXDAYM  <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3…
$ PAXDAYWM <dbl> 3, 4, 5, 6, 7, 1, 2, 3, 4, 2, 3, 4, 5, 6, 7, 1, 2, 3, 7, 1, 2…
$ min_1    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, …
$ min_100  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, …
$ min_200  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 37, 0,…
$ min_300  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, …
$ min_400  <dbl> NA, NA, NA, NA, 63, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, …
$ min_600  <dbl> NA, 0, NA, NA, 0, 46, 0, NA, NA, NA, 0, 0, 0, 16, 0, 0, 0, NA…
$ min_800  <dbl> NA, 0, NA, NA, 0, 58, 0, 59, 0, NA, 0, 0, 0, 54, 79, 0, 0, NA…
$ min_1000 <dbl> 0, 0, NA, NA, NA, 0, 41, NA, NA, NA, 0, 59, 0, 0, 0, 0, 0, NA…