Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 106 additions & 15 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -1,32 +1,43 @@
#' World Health Organization TB data
#'
#' @description
#' A subset of data from the World Health Organization Global Tuberculosis
#' Report, and accompanying global populations.
#' Report, and accompanying global populations. `who` uses the original
#' codes from the World Health Organization. The column names for columns
#' 5 through 60 are made by combining `new_` with:
#'
#' @format `who`: a data frame with 7,240 rows and the columns:
#' * the method of diagnosis (`rel` = relapse, `sn` = negative pulmonary
#' smear, `sp` = positive pulmonary smear, `ep` = extrapulmonary),
#' * gender (`f` = female, `m` = male), and
#' * age group (`014` = 0-14 yrs of age, `1524` = 15-24, `2534` = 25-34,
#' `3544` = 35-44 years of age, `4554` = 45-54, `5564` = 55-64,
#' `65` = 65 years or older).
#'
#' `who2` is a lightly modified version that makes teaching the basics
#' easier by tweaking the variables to be slightly more consistent and
#' dropping `iso2` and `iso3`. `newrel` is replaced by `new_rel`, and a
#' `_` is added after the gender.
#'
#' @format ## `who`
#' A data frame with 7,240 rows and 60 columns:
#' \describe{
#' \item{country}{Country name}
#' \item{iso2, iso3}{2 & 3 letter ISO country codes}
#' \item{year}{Year}
#' \item{new_sp_m014 - new_rel_f65}{Counts of new TB cases recorded by group.
#' Column names encode three variables that describe the group (see details).}
#' Column names encode three variables that describe the group.}
#' }
#' @details The data uses the original codes given by the World Health
#' Organization. The column names for columns five through 60 are made by
#' combining `new_` to a code for method of diagnosis (`rel` =
#' relapse, `sn` = negative pulmonary smear, `sp` = positive
#' pulmonary smear, `ep` = extrapulmonary) to a code for gender
#' (`f` = female, `m` = male) to a code for age group (`014` =
#' 0-14 yrs of age, `1524` = 15-24 years of age, `2534` = 25 to
#' 34 years of age, `3544` = 35 to 44 years of age, `4554` = 45 to
#' 54 years of age, `5564` = 55 to 64 years of age, `65` = 65 years
#' of age or older).
#'
#' @source <https://www.who.int/teams/global-tuberculosis-programme/data>
"who"

#' @rdname who
#' @format `population`: a data frame with 4,060 rows and three columns:
#' @format ## `who2`
#' A data frame with 7,240 rows and 58 columns.
"who2"

#' @rdname who
#' @format ## `population`
#' A data frame with 4,060 rows and three columns:
#' \describe{
#' \item{country}{Country name}
#' \item{year}{Year}
Expand Down Expand Up @@ -173,3 +184,83 @@
#' The "Whitburn" project, <https://waxy.org/2008/05/the_whitburn_project/>,
#' (downloaded April 2008)
"billboard"


#' Household data
#'
#' This dataset is based on an example in
#' `vignette("datatable-reshape", package = "data.table")`
#'
#' @format A data frame with 5 rows and 5 columns:
#' \describe{
#' \item{family}{Family identifier}
#' \item{dob_child1}{Date of birth of first child}
#' \item{dob_child2}{Date of birth of second child}
#' \item{name_child1}{Name of first child}?
#' \item{name_child2}{Name of second child}
#' }
"household"

#' Data from the Centers for Medicare & Medicaid Services
#'
#' @description
#' Two datasets from public data provided the Centers for Medicare & Medicaid
#' Services, <https://data.cms.gov>.
#'
#' * `cms_patient_experience` contains some lightly cleaned data from
#' "Hospice - Provider Data", which provides a list of hospice agencies
#' along with some data on quality of patient care,
#' <https://data.cms.gov/provider-data/dataset/252m-zfp9>.
#'
#' * `cms_patient_care` "Doctors and Clinicians Quality Payment Program PY 2020
#' Virtual Group Public Reporting",
#' <https://data.cms.gov/provider-data/dataset/8c70-d353>
#'
#' @examples
#' cms_patient_experience %>%
#' dplyr::distinct(measure_cd, measure_title)
#'
#' cms_patient_experience %>%
#' pivot_wider(
#' id_cols = starts_with("org"),
#' names_from = measure_cd,
#' values_from = prf_rate
#' )
#'
#' cms_patient_care %>%
#' pivot_wider(
#' names_from = type,
#' values_from = score
#' )
#'
#' cms_patient_care %>%
#' pivot_wider(
#' names_from = measure_abbr,
#' values_from = score
#' )
#'
#' cms_patient_care %>%
#' pivot_wider(
#' names_from = c(measure_abbr, type),
#' values_from = score
#' )
#' @format `cms_patient_experience` is a data frame with 500 observations and
#' five variables:
#' \describe{
#' \item{org_pac_id,org_nm}{Organisation ID and name}
#' \item{measure_cd,measure_title}{Measure code and title}
#' \item{prf_rate}{Measure performance rate}
#' }
"cms_patient_experience"

#' @format `cms_patient_care` is a data frame with 252 observations and
#' five variables:
#' \describe{
#' \item{ccn,facility_name}{Facility ID and name}
#' \item{measure_abbr}{Abbreviated measurement title, suitable for use as variable name}
#' \item{score}{Measure score}
#' \item{type}{Whether score refers to the rating out of 100 ("observed"), or
#' the maximum possible value of the raw score ("denominator")}
#' }
#' @rdname cms_patient_experience
"cms_patient_care"
2 changes: 2 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,10 @@ reference:
- title: Data
contents:
- billboard
- cms_patient_experience
- construction
- fish_encounters
- household
- relig_income
- smiths
- table1
Expand Down
61 changes: 61 additions & 0 deletions data-raw/cms.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
library(tidyverse)

# Doctors and Clinicians Quality Payment Program PY 2020 Group Public Reporting:
# Patient Experience

# https://data.cms.gov/provider-data/dataset/8c70-d353
url <- "https://data.cms.gov/provider-data/api/1/datastore/query/8c70-d353/0?offset=0&count=true&results=true&schema=true&keys=true&format=json&rowIds=false"

json <- jsonlite::read_json(url)
cms_patient_experience <- json$results |>
map_df(as_tibble) |>
select(org_pac_id, org_nm, measure_cd, measure_title, prf_rate) |>
arrange(org_pac_id, stringi::stri_rank(measure_cd, list(numeric = TRUE))) |>
mutate(prf_rate = as.numeric(prf_rate))

write_csv(cms_patient_experience, "data-raw/cms_patient_experience.csv")
usethis::use_data(cms_patient_experience, overwrite = TRUE)


# -------------------------------------------------------------------------

# Hospice - Provider Data
# A list of hospice agencies with data on the quality of patient care measures.
# https://data.cms.gov/provider-data/dataset/252m-zfp9

# Recommended by
# https://twitter.com/hunter_boost/status/1500212341463339008

url <- "https://data.cms.gov/provider-data/api/1/datastore/query/252m-zfp9/0?limit=500&offset=0&count=true&results=true&schema=true&keys=true&format=json&rowIds=false"
json <- jsonlite::read_json(url)

abbr <- tribble(
~measure_name , ~measure_abbr,
"Hospice and Palliative Care Treatment Preferences" , "treat_pref",
"Beliefs & Values Addressed (if desired by the patient)" , "beliefs_addressed",
"Hospice and Palliative Care Pain Screening" , "pain_screening",
"Hospice and Palliative Care Pain Assessment" , "pain_assessment",
"Hospice and Palliative Care Dyspnea Screening" , "dyspnea_screening",
"Hospice and Palliative Care Dyspnea Treatment" , "dyspena_treatment",
"Patient Treated with an Opioid Who Are Given a Bowel Regimen", "opioid_bowel",
"Hospice and Palliative Care Composite Process Measure" , "composite_process",
"Hospice Visits When Death Is Imminent, Measure 1" , "visits_imminent",
)

cms_patient_care <- json$results |>
map_df(as_tibble) |>
select(ccn = cms_certification_number_ccn, facility_name, measure_name, measure_code, score) |>
mutate(measure_name = na_if(measure_name, "")) |>
fill(measure_name, .direction = "up") |>
filter(str_detect(measure_code, "^H")) |>
mutate(score = as.numeric(na_if(score, "Not Available"))) |>
mutate(
type = str_to_lower(str_remove(measure_code, "H_\\d{3}_\\d{2}_")),
measure_code = NULL
) |>
left_join(abbr, by = "measure_name") |>
select(ccn, facility_name, measure_abbr, score, type) |>
arrange(ccn, measure_abbr, type)

write_csv(cms_patient_care, "data-raw/cms_patient_care.csv")
usethis::use_data(cms_patient_care, overwrite = TRUE)
Loading