Generate report from data cleaning operations
Usage
print_report(
  data,
  what = NULL,
  print = FALSE,
  report_title = "{cleanepi} data cleaning report",
  output_file_name = NULL,
  format = "html"
)Arguments
- data
 A
<data.frame>or<linelist>object returned from theclean_dataor the main functions of each data cleaning module.- what
 A
<character>with the name of the specific data cleaning report which would be displayed. The possible values are:incorrect_date_sequenceTo display rows with the incorrect date sequences
colnamesTo display the column names before and after cleaning
converted_into_numericTo display the names of the columns that that have been converted into numeric
date_standardizationTo display rows in the cleaned data with date values that are outside of the specified time frame, and rows with date values that comply with multiple formats
misspelled_valuesTo display the detected misspelled values
removed_duplicatesTo display the duplicated rows that have been removed
found_duplicatesTo display the duplicated rows
constant_dataTo display the constant data i.e. constant columns, empty rows and columns
missing_values_replaced_atTo display the names of the columns where the missing value strings have been replaced with NA
incorrect_subject_idTo display the missing, duplicated and invalid subject subject IDs
scanning_resultTo display the output of the scan_data() function
A
<logical>that specifies whether to to open the report in your browser in the form of a HTML file or no. Default isFALSE.- report_title
 A
<character>with the title that will appear on the report- output_file_name
 A
<character>used to specify the name of the report file, excluding any file extension. If no file name is supplied, one will be automatically generated with the formatcleanepi_report_YYMMDD_HHMMSS.- format
 A
<character>with the file format of the report. Currently only"html"is supported.
Examples
if (FALSE) { # \dontrun{
  data <- readRDS(
    system.file("extdata", "test_df.RDS",package = "cleanepi")
  )
  test_dictionary <- readRDS(
    system.file("extdata", "test_dictionary.RDS", package = "cleanepi")
  )
  # scan through the data
  scan_res <- scan_data(data)
  # Perform data cleaning
  cleaned_data <- data %>%
   standardize_column_names(
     keep = NULL,
     rename = c("DOB" = "dateOfBirth")
   ) %>%
   replace_missing_values(target_columns = NULL, na_strings = "-99") %>%
   remove_constants(cutoff = 1.0) %>%
   remove_duplicates(target_columns = NULL) %>%
   standardize_dates(
     target_columns = NULL,
     error_tolerance = 0.4,
     format = NULL,
     timeframe = as.Date(c("1973-05-29", "2023-05-29"))
   ) %>%
   check_subject_ids(
     target_columns = "study_id",
     prefix = "PS",
     suffix = "P2",
     range = c(1L, 100L),
     nchar = 7L
   ) %>%
   convert_to_numeric(target_columns = "sex", lang = "en") %>%
   clean_using_dictionary(dictionary = test_dictionary)
  # add the data scanning result to the report
  cleaned_data <- add_to_report(
    x = cleaned_data,
    key = "scanning_result",
    value = scan_res
  )
  # save the report in the R temporary directory
  print_report(
    data = cleaned_data,
    report_title = "{cleanepi} data cleaning report",
    output_file_name = NULL,
    format = "html",
    print = FALSE
  )
} # }