### feat: added function combine_stats

useful to calculate aggregated statistics
 #' @title Combine aggregated statistics. #' @description This function provides the functionality to combine multiple #' statistics to a single statistical overview. This is e.g. useful if you #' are only allowed to export statistical characteristics from a site but #' not the data itself. So in this case you have e.g. mean, median and N #' from each site but want to say something about the mean, median and N #' over all sites like you had the data of all sites in one big pool and #' would do the statisitcs there. #' @param summaries (data.table) Data table containing all stats you want to #' combine as rows. This data.table must contain the columns #' Min, Q10, Q25, Median, Mean, SD, Q75, Q90, Max, N. #' Each row in this data table represents a site as of the example described #' above. #' @param demo (boolean, default = FALSE) Do you want to see how the function #' works? Then call combine_stats(summaries = NULL, demo = TRUE). #' @return A one-row data.table containing the calculated, aggregates #' statistics of the input. #' #' @export #' combine_stats <- function(summaries, demo = FALSE) { ## This is only for demonstration or validation: if (demo) { rows <- 4 summaries <- data.table::data.table( "Min" = numeric(), "Q10" = numeric(), "Q25" = numeric(), "Median" = numeric(), "Mean" = numeric(), "SD" = numeric(), "Q75" = numeric(), "Q90" = numeric(), "Max" = numeric(), "N" = numeric() ) control <- c() for (i in 1:rows) { ## Sample some numbers: y <- round(x = rnorm( n = sample(1:100, size = 1), mean = sample(0:100, size = 1), sd = sample(1:10, size = 1) ), digits = 0) ## Also save these numbers for later comparision of the calculated values ## with the real values: control <- c(control, y) ## Setting up the new row: tmp <- data.table::data.table( "Min" = min(y), "Q10" = quantile(y, 0.1), "Q25" = quantile(y, 0.25), "Median" = median(y), "Mean" = mean(y), "SD" = sd(y), "Q75" = quantile(y, 0.75), "Q90" = quantile(y, 0.9), "Max" = max(y), "N" = length(y) ) ## Add the new row to the existing dataset: summaries <- data.table::rbindlist(list(summaries, tmp), use.names = TRUE) } cat("\n\nYou used the function in demo-mode.\nThis is the randomly chosen demo-data:\n") print(summaries) } res <- data.table::data.table( "Min" = ifelse(all(is.na(summaries[["Min"]])), "", min(x = summaries[["Min"]], na.rm = T)), "Q10" = ifelse(all(is.na(summaries[["Q10"]])), NA, as.numeric( Hmisc::wtd.quantile( x = as.numeric(summaries[["Q10"]]), weights = summaries[["N"]], probs = c(0.1) ) )), "Q25" = ifelse(all(is.na(summaries[["Q25"]])), NA, as.numeric( Hmisc::wtd.quantile( x = summaries[["Q25"]], weights = summaries[["N"]], probs = c(0.25) ) )), "Median" = ifelse(all(is.na(summaries[["Median"]])), NA, as.numeric( Hmisc::wtd.quantile( x = summaries[["Median"]], weights = summaries[["N"]], probs = c(0.5) ) )), "Mean" = ifelse(all(is.na(summaries[["Mean"]])), NA, as.numeric( Hmisc::wtd.mean(x = summaries[["Mean"]], weights = summaries[["N"]]) )), "SD" = ifelse(all(is.na(summaries[["SD"]])), NA, sqrt( Hmisc::wtd.mean(x = summaries[["SD"]] ^ 2, weights = summaries[["N"]]) )), "Q75" = ifelse(all(is.na(summaries[["Q75"]])), NA, as.numeric( Hmisc::wtd.quantile( x = summaries[["Q75"]], weights = summaries[["N"]], probs = c(0.75) ) )), "Q90" = ifelse(all(is.na(summaries[["Q90"]])), NA, as.numeric( Hmisc::wtd.quantile( x = summaries[["Q90"]], weights = summaries[["N"]], probs = c(0.9) ) )), "Max" = ifelse(all(is.na(summaries[["Max"]])), NA, max(summaries[["Max"]], na.rm = T)), "N" = sum(summaries[["N"]]) ) if (demo) { ## Real combined stats: cat("\n\nThis is the 'real' statistics over the whole data-pool:\n") print(psych::describe(control, quant = c(.1, .25, .5, .75, .9))) ## Calculated summary statistics: cat( "\n\nAnd this is the calculated summary (which is the return value of this function):\n" ) # print(res) } return(res) }
 ... ... @@ -37,7 +37,7 @@ my_desc$set_authors(c( my_desc$del("Maintainer") # Set the version my_desc$set_version("0.0.6.9006") my_desc$set_version("0.0.6.9007") # The title of your package my_desc\$set(Title = "Utilities for 'DIZ' R Package Development") ... ... @@ -105,6 +105,10 @@ usethis::use_package("data.table", type = "Imports") # usethis::use_package("magrittr", type = "Imports") # usethis::use_package("polynom", type = "Imports") usethis::use_package("DBI", type = "Imports", min_version = "1.1.0") ## For combine_stats: usethis::use_package("Hmisc", type = "Imports") ## For combine_stats: usethis::use_package("psych", type = "Imports") usethis::use_package("RJDBC", type = "Imports") ## For xml_to_json: usethis::use_package("RJSONIO", type = "Imports") ... ...
 % Generated by roxygen2: do not edit by hand % Please edit documentation in R/combine_stats.R \name{combine_stats} \alias{combine_stats} \title{Combine aggregated statistics.} \usage{ combine_stats(summaries, demo = FALSE) } \arguments{ \item{summaries}{(data.table) Data table containing all stats you want to combine as rows. This data.table must contain the columns Min, Q10, Q25, Median, Mean, SD, Q75, Q90, Max, N. Each row in this data table represents a site as of the example described above.} \item{demo}{(boolean, default = FALSE) Do you want to see how the function works? Then call combine_stats(summaries = NULL, demo = TRUE).} } \value{ A one-row data.table containing the calculated, aggregates statistics of the input. } \description{ This function provides the functionality to combine multiple statistics to a single statistical overview. This is e.g. useful if you are only allowed to export statistical characteristics from a site but not the data itself. So in this case you have e.g. mean, median and N from each site but want to say something about the mean, median and N over all sites like you had the data of all sites in one big pool and would do the statisitcs there. }