### feat: added function combine_stats

useful to calculate aggregated statistics
 #' @title Combine aggregated statistics. #' @description This function provides the functionality to combine multiple #' statistics to a single statistical overview. This is e.g. useful if you #' are only allowed to export statistical characteristics from a site but #' not the data itself. So in this case you have e.g. mean, median and N #' from each site but want to say something about the mean, median and N #' over all sites like you had the data of all sites in one big pool and #' would do the statisitcs there. #' @param summaries (data.table) Data table containing all stats you want to #' combine as rows. This data.table must contain the columns #' Min, Q10, Q25, Median, Mean, SD, Q75, Q90, Max, N. #' Each row in this data table represents a site as of the example described #' above. #' @param demo (boolean, default = FALSE) Do you want to see how the function #' works? Then call combine_stats(summaries = NULL, demo = TRUE). #' @return A one-row data.table containing the calculated, aggregates #' statistics of the input. #' #' @export #' combine_stats <- function(summaries, demo = FALSE) { ## This is only for demonstration or validation: if (demo) { rows <- 4 summaries <- data.table::data.table( "Min" = numeric(), "Q10" = numeric(), "Q25" = numeric(), "Median" = numeric(), "Mean" = numeric(), "SD" = numeric(), "Q75" = numeric(), "Q90" = numeric(), "Max" = numeric(), "N" = numeric() ) control <- c() for (i in 1:rows) { ## Sample some numbers: y <- round(x = rnorm( n = sample(1:100, size = 1), mean = sample(0:100, size = 1), sd = sample(1:10, size = 1) ), digits = 0) ## Also save these numbers for later comparision of the calculated values ## with the real values: control <- c(control, y) ## Setting up the new row: tmp <- data.table::data.table( "Min" = min(y), "Q10" = quantile(y, 0.1), "Q25" = quantile(y, 0.25), "Median" = median(y), "Mean" = mean(y), "SD" = sd(y), "Q75" = quantile(y, 0.75), "Q90" = quantile(y, 0.9), "Max" = max(y), "N" = length(y) ) ## Add the new row to the existing dataset: summaries <- data.table::rbindlist(list(summaries, tmp), use.names = TRUE) } cat("\n\nYou used the function in demo-mode.\nThis is the randomly chosen demo-data:\n") print(summaries) } res <- data.table::data.table( "Min" = ifelse(all(is.na(summaries[["Min"]])), "", min(x = summaries[["Min"]], na.rm = T)), "Q10" = ifelse(all(is.na(summaries[["Q10"]])), NA, as.numeric( Hmisc::wtd.quantile( x = as.numeric(summaries[["Q10"]]), weights = summaries[["N"]], probs = c(0.1) ) )), "Q25" = ifelse(all(is.na(summaries[["Q25"]])), NA, as.numeric( Hmisc::wtd.quantile( x = summaries[["Q25"]], weights = summaries[["N"]], probs = c(0.25) ) )), "Median" = ifelse(all(is.na(summaries[["Median"]])), NA, as.numeric( Hmisc::wtd.quantile( x = summaries[["Median"]], weights = summaries[["N"]], probs = c(0.5) ) )), "Mean" = ifelse(all(is.na(summaries[["Mean"]])), NA, as.numeric( Hmisc::wtd.mean(x = summaries[["Mean"]], weights = summaries[["N"]]) )), "SD" = ifelse(all(is.na(summaries[["SD"]])), NA, sqrt( Hmisc::wtd.mean(x = summaries[["SD"]] ^ 2, weights = summaries[["N"]]) )), "Q75" = ifelse(all(is.na(summaries[["Q75"]])), NA, as.numeric( Hmisc::wtd.quantile( x = summaries[["Q75"]], weights = summaries[["N"]], probs = c(0.75) ) )), "Q90" = ifelse(all(is.na(summaries[["Q90"]])), NA, as.numeric( Hmisc::wtd.quantile( x = summaries[["Q90"]], weights = summaries[["N"]], probs = c(0.9) ) )), "Max" = ifelse(all(is.na(summaries[["Max"]])), NA, max(summaries[["Max"]], na.rm = T)), "N" = sum(summaries[["N"]]) ) if (demo) { ## Real combined stats: cat("\n\nThis is the 'real' statistics over the whole data-pool:\n") print(psych::describe(control, quant = c(.1, .25, .5, .75, .9))) ## Calculated summary statistics: cat( "\n\nAnd this is the calculated summary (which is the return value of this function):\n" ) # print(res) } return(res) }
 ... ... @@ -37,7 +37,7 @@ my_desc$set_authors(c( my_desc$del("Maintainer") # Set the version my_desc$set_version("0.0.6.9006") my_desc$set_version("0.0.6.9007") # The title of your package my_desc\$set(Title = "Utilities for 'DIZ' R Package Development") ... ... @@ -105,6 +105,10 @@ usethis::use_package("data.table", type = "Imports") # usethis::use_package("magrittr", type = "Imports") # usethis::use_package("polynom", type = "Imports") usethis::use_package("DBI", type = "Imports", min_version = "1.1.0") ## For combine_stats: usethis::use_package("Hmisc", type = "Imports") ## For combine_stats: usethis::use_package("psych", type = "Imports") usethis::use_package("RJDBC", type = "Imports") ## For xml_to_json: usethis::use_package("RJSONIO", type = "Imports") ... ...
