Commit 0ee0f803 authored by Helene Koester's avatar Helene Koester
Browse files

two tables, first try

parent 5f997e53
Showing with 56 additions and 33 deletions
+56 -33
......@@ -23,9 +23,11 @@
#' @param results A list object. The list should contain the results
#' 'rv$results_descriptive'.
#'
#' @return A data.table with the difference in total, distinct, valid and ,
#' missing values of source and target database. Result is represented as a
#' string containing the absolute difference as well as the percentage
#' @return A list with two data.tables with the differences in total, distinct,
#' valid and missing values of source and target database.
#' In table one, called text, the results are represented as a string
#' containing the absolute difference as well as the percentage. Table two,
#' called percent, contains the percentage as a numeric value.
#'
#' @examples
#' \donttest{# runtime ~ 5 sec.
......@@ -133,14 +135,26 @@ difference_checks <- function(results) {
# get names
obj_names <- names(results)
# initialize output table
out <- data.table::data.table(
# initialize output tables. We need two tables: one for the textual
# representation of the result (for a nice display) and one with the
# numeric percentage value
out <- list()
out$text <- data.table::data.table(
"Variable" = character(0),
"Difference in Totals" = character(0),
"Difference in Distincts" = character(0),
"Difference in Valids" = character(0),
"Difference in Missings" = character(0)
)
out$percent <- data.table::data.table(
"Variable" = character(0),
"Difference in Totals" = numeric(0),
"Difference in Distincts" = numeric(0),
"Difference in Valids" = numeric(0),
"Difference in Missings" = numeric(0)
)
for (i in obj_names) {
......@@ -174,14 +188,25 @@ difference_checks <- function(results) {
)
}
out <- rbind(
out,
out$text <- rbind(
out$text,
data.table::data.table(
"Variable" = i,
"Difference in Totals" = check_total$text,
"Difference in Distincts" = check_distinct$text,
"Difference in Valids" = check_valids$text,
"Difference in Missings" = check_missings$text
)
)
out$percent <- rbind(
out$percent,
data.table::data.table(
"Variable" = i,
"Difference in Totals" = check_total,
"Difference in Distincts" = check_distinct,
"Difference in Valids" = check_valids,
"Difference in Missings" = check_missings
"Difference in Totals" = check_total$percent,
"Difference in Distincts" = check_distinct$percent,
"Difference in Valids" = check_valids$percent,
"Difference in Missings" = check_missings$percent
)
)
}
......@@ -191,17 +216,19 @@ difference_checks <- function(results) {
calculate_difference <- function(count_source, count_target) {
result <- NULL
result <- list()
absolut_diff <- count_target - count_source
if (absolut_diff == 0) {
result <- "no diff."
result$text <- "no diff."
result$percent <- 0
} else {
percent_diff <- (absolut_diff / count_source) * 100
result <- paste(absolut_diff, " (", signif(percent_diff, 2), "%)")
result$percent <- percent_diff
result$text <- paste(absolut_diff, " (", signif(percent_diff, 2), "%)")
}
return(result)
......
......@@ -116,7 +116,7 @@ Calculates the discrepancies between source (`r rv$source$system_name`) and targ
```{r results='asis'}
if (!is.null(rv$checks$differences)) {
render_difference_check(rv$checks$differences)
# render_difference_check(rv$checks$differences$text)
} else {
cat(no_data_to_display_msg)
}
......
......@@ -11,9 +11,11 @@ difference_checks(results)
'rv$results_descriptive'.}
}
\value{
A data.table with the difference in total, distinct, valid and ,
missing values of source and target database. Result is represented as a
string containing the absolute difference as well as the percentage
A list with two data.tables with the differences in total, distinct,
valid and missing values of source and target database.
In table one, called text, the results are represented as a string
containing the absolute difference as well as the percentage. Table two,
called percent, contains the percentage as a numeric value.
}
\description{
Internal function to calculate differences
......
......@@ -11,9 +11,8 @@
[1] "Constraining values/rules:"
$`Age in years`$kable
min max unit
<int> <int> <char>
1: 0 110 a
min max unit
1: 0 110 a
$`Age in years`$conformance_results
NULL
......@@ -27,9 +26,8 @@
[1] "Constraining values/rules:"
$`Amount of credit`$kable
min max unit
<int> <char> <char>
1: 0 Inf money
min max unit
1: 0 Inf money
$`Amount of credit`$conformance_results
NULL
......@@ -44,7 +42,6 @@
$Birthdate$kable
min max format
<char> <char> <char>
1: 1950-01-01 1989-12-31 %d.%m.%Y
$Birthdate$conformance_results
......@@ -73,9 +70,8 @@
[1] "Constraining values/rules:"
$`Current bank balance`$kable
min max unit
<char> <char> <char>
1: -Inf Inf money
min max unit
1: -Inf Inf money
$`Current bank balance`$conformance_results
[1] "Extrem values are not conform with constraints."
......@@ -90,7 +86,6 @@
$`Date of contact`$kable
min max format
<char> <char> <char>
1: 2012-01-01 2015-12-31 %d.%m.%Y
$`Date of contact`$conformance_results
......@@ -108,9 +103,8 @@
[1] "Constraining values/rules:"
$Income$kable
min max unit
<int> <char> <char>
1: 0 Inf money
min max unit
1: 0 Inf money
$Income$conformance_results
NULL
......
......@@ -122,7 +122,7 @@ test_that("correct functioning of etl_checks and difference_checks", {
expect_length(rv$checks$etl, n = 4)
expect_type(rv$checks$differences, "list")
expect_length(rv$checks$differences, n = 5)
expect_length(rv$checks$differences, n = 2)
do.call(
file.remove,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment