Commit 7ab9d585 authored by Jonathan Mang's avatar Jonathan Mang 💡
Browse files

feat: added function `combine_stats`

useful to calculate aggregated statistics
parent 74aacad3
Pipeline #22802 failed with stages
in 10 minutes and 59 seconds
Package: DIZutils
Title: Utilities for 'DIZ' R Package Development
Version: 0.0.6.9006
Date: 2021-01-19
Version: 0.0.6.9007
Date: 2021-01-25
Authors@R:
c(person(given = "Jonathan M.",
family = "Mang",
......@@ -30,6 +30,8 @@ Imports:
config,
data.table,
DBI (>= 1.1.0),
Hmisc,
psych,
RJDBC,
RJSONIO,
RPostgres,
......
......@@ -4,6 +4,7 @@ export("%notin%")
export(clean_path_name)
export(cleanup_old_logfile)
export(close_all_connections)
export(combine_stats)
export(db_connection)
export(feedback)
export(firstup)
......
#' @title Combine aggregated statistics.
#' @description This function provides the functionality to combine multiple
#' statistics to a single statistical overview. This is e.g. useful if you
#' are only allowed to export statistical characteristics from a site but
#' not the data itself. So in this case you have e.g. mean, median and N
#' from each site but want to say something about the mean, median and N
#' over all sites like you had the data of all sites in one big pool and
#' would do the statisitcs there.
#' @param summaries (data.table) Data table containing all stats you want to
#' combine as rows. This data.table must contain the columns
#' `Min`, `Q10`, `Q25`, `Median`, `Mean`, `SD`, `Q75`, `Q90`, `Max`, `N`.
#' Each row in this data table represents a site as of the example described
#' above.
#' @param demo (boolean, default = FALSE) Do you want to see how the function
#' works? Then call `combine_stats(summaries = NULL, demo = TRUE)`.
#' @return A one-row data.table containing the calculated, aggregates
#' statistics of the input.
#'
#' @export
#'
combine_stats <- function(summaries, demo = FALSE) {
## This is only for demonstration or validation:
if (demo) {
rows <- 4
summaries <- data.table::data.table(
"Min" = numeric(),
"Q10" = numeric(),
"Q25" = numeric(),
"Median" = numeric(),
"Mean" = numeric(),
"SD" = numeric(),
"Q75" = numeric(),
"Q90" = numeric(),
"Max" = numeric(),
"N" = numeric()
)
control <- c()
for (i in 1:rows) {
## Sample some numbers:
y <-
round(x = rnorm(
n = sample(1:100, size = 1),
mean = sample(0:100, size = 1),
sd = sample(1:10, size = 1)
),
digits = 0)
## Also save these numbers for later comparision of the calculated values
## with the real values:
control <- c(control, y)
## Setting up the new row:
tmp <- data.table::data.table(
"Min" = min(y),
"Q10" = quantile(y, 0.1),
"Q25" = quantile(y, 0.25),
"Median" = median(y),
"Mean" = mean(y),
"SD" = sd(y),
"Q75" = quantile(y, 0.75),
"Q90" = quantile(y, 0.9),
"Max" = max(y),
"N" = length(y)
)
## Add the new row to the existing dataset:
summaries <-
data.table::rbindlist(list(summaries, tmp), use.names = TRUE)
}
cat("\n\nYou used the function in demo-mode.\nThis is the randomly chosen demo-data:\n")
print(summaries)
}
res <- data.table::data.table(
"Min" = ifelse(all(is.na(summaries[["Min"]])), "", min(x = summaries[["Min"]], na.rm = T)),
"Q10" = ifelse(all(is.na(summaries[["Q10"]])), NA, as.numeric(
Hmisc::wtd.quantile(
x = as.numeric(summaries[["Q10"]]),
weights = summaries[["N"]],
probs = c(0.1)
)
)),
"Q25" = ifelse(all(is.na(summaries[["Q25"]])), NA, as.numeric(
Hmisc::wtd.quantile(
x = summaries[["Q25"]],
weights = summaries[["N"]],
probs = c(0.25)
)
)),
"Median" = ifelse(all(is.na(summaries[["Median"]])), NA, as.numeric(
Hmisc::wtd.quantile(
x = summaries[["Median"]],
weights = summaries[["N"]],
probs = c(0.5)
)
)),
"Mean" = ifelse(all(is.na(summaries[["Mean"]])), NA, as.numeric(
Hmisc::wtd.mean(x = summaries[["Mean"]], weights = summaries[["N"]])
)),
"SD" = ifelse(all(is.na(summaries[["SD"]])), NA, sqrt(
Hmisc::wtd.mean(x = summaries[["SD"]] ^ 2, weights = summaries[["N"]])
)),
"Q75" = ifelse(all(is.na(summaries[["Q75"]])), NA, as.numeric(
Hmisc::wtd.quantile(
x = summaries[["Q75"]],
weights = summaries[["N"]],
probs = c(0.75)
)
)),
"Q90" = ifelse(all(is.na(summaries[["Q90"]])), NA, as.numeric(
Hmisc::wtd.quantile(
x = summaries[["Q90"]],
weights = summaries[["N"]],
probs = c(0.9)
)
)),
"Max" = ifelse(all(is.na(summaries[["Max"]])), NA, max(summaries[["Max"]], na.rm = T)),
"N" = sum(summaries[["N"]])
)
if (demo) {
## Real combined stats:
cat("\n\nThis is the 'real' statistics over the whole data-pool:\n")
print(psych::describe(control, quant = c(.1, .25, .5, .75, .9)))
## Calculated summary statistics:
cat(
"\n\nAnd this is the calculated summary (which is the return value of this function):\n"
)
# print(res)
}
return(res)
}
......@@ -37,7 +37,7 @@ my_desc$set_authors(c(
my_desc$del("Maintainer")
# Set the version
my_desc$set_version("0.0.6.9006")
my_desc$set_version("0.0.6.9007")
# The title of your package
my_desc$set(Title = "Utilities for 'DIZ' R Package Development")
......@@ -105,6 +105,10 @@ usethis::use_package("data.table", type = "Imports")
# usethis::use_package("magrittr", type = "Imports")
# usethis::use_package("polynom", type = "Imports")
usethis::use_package("DBI", type = "Imports", min_version = "1.1.0")
## For `combine_stats`:
usethis::use_package("Hmisc", type = "Imports")
## For `combine_stats`:
usethis::use_package("psych", type = "Imports")
usethis::use_package("RJDBC", type = "Imports")
## For xml_to_json:
usethis::use_package("RJSONIO", type = "Imports")
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/combine_stats.R
\name{combine_stats}
\alias{combine_stats}
\title{Combine aggregated statistics.}
\usage{
combine_stats(summaries, demo = FALSE)
}
\arguments{
\item{summaries}{(data.table) Data table containing all stats you want to
combine as rows. This data.table must contain the columns
`Min`, `Q10`, `Q25`, `Median`, `Mean`, `SD`, `Q75`, `Q90`, `Max`, `N`.
Each row in this data table represents a site as of the example described
above.}
\item{demo}{(boolean, default = FALSE) Do you want to see how the function
works? Then call `combine_stats(summaries = NULL, demo = TRUE)`.}
}
\value{
A one-row data.table containing the calculated, aggregates
statistics of the input.
}
\description{
This function provides the functionality to combine multiple
statistics to a single statistical overview. This is e.g. useful if you
are only allowed to export statistical characteristics from a site but
not the data itself. So in this case you have e.g. mean, median and N
from each site but want to say something about the mean, median and N
over all sites like you had the data of all sites in one big pool and
would do the statisitcs there.
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment