Commit d4578a44 authored by Petar Horki's avatar Petar Horki

Split functions.R into multiple files.

parent 74fb38b1
This diff is collapsed.
# omopRds
# DataPackage.R
#==============================================================================
#' Factory.DataPackage
#'
#' @description
#' Construct and return a function for mapping a data package.
#'
#' @details
#' Construct and return a function for mapping a data package composed of the\cr
#' following data groups:\cr
#' Condition(subset)\cr
#' Measurement(subset)\cr
#' Following (sub)methods are employed:\cr
#' `_MapClass.Get`\cr
#'
#' Assumptions:\cr
#' the global dictionary exists, and contains metadata for the data package.
#' the SQL template file is located in the 'inst \ sql \ postgres' folder\cr
#' the 'dataGroupDomainIDIn' parameter takes on the following values(s)\cr
#' 'Condition', 'Measurement'\cr
#' the following methods are defined\cr:
#' RenderCohortCondition, RenderMeasurementCohort\cr
#' @param dataGroupDomainIDIn Set of dataGroupDomainIDs to map.
#'
#' @export
Factory.DataPackage <- function(dataGroupDomainIDIn) {
# Aggregate concept IDS by the domain IDs.
myAggConceptIDsByDomainID <- AggConceptIDsByDomainID(dataGroupDomainIDIn)
myDataGroupFun = function(connectionIn, cohortDefinitionIDIn) {
# generate functions for creating the dataPackages
# FIXME use `_DF_DATA_GROUP_DOMAIN_NAME` in place of 'dataGroupDomainID'
pmap(list(dataGroupDomainID = myAggConceptIDsByDomainID$dataGroupDomainID,
aggConceptIDs = myAggConceptIDsByDomainID$aggConceptIDs),
function(dataGroupDomainID, aggConceptIDs) {
DataGroupClosure(dataGroupDomainID, aggConceptIDs)
}
) %>%
map( ~ .x(connectionIn, cohortDefinitionIDIn)) %>% # execute the above functions
reduce(left_join, by = c("SUBJECT_ID", "VISIT_OCCURRENCE_ID")) # join the dataPackages
}
return(myDataGroupFun)
}
#==============================================================================
#' DataGroupClosure
#'
#' @description
#' Create ...
#'
#' @details
#' Create ...
#'
#' @param dataGroupDomainIDIn
#' @param aggConceptIDIn
#'
#' @export
DataGroupClosure <- function(dataGroupDomainIDIn, aggConceptIDIn) {
myDataGroupFun <- function(connectionIn, cohortDefinitionIDIn) {
`_MapClass.Get`(connectionIn,
do.call(paste0("RenderCohort", dataGroupDomainIDIn),
list(cohortDefinitionIDIn, aggConceptIDIn)))
}
return(myDataGroupFun)
}
#==============================================================================
#' AggConceptIDsByDomainID
#'
#' @description
#' Aggregate concept IDS by the domain IDs.\cr
#'
#' @details
#' Assumption: the global dictionary has a `_DF_DOMAIN_NAME` column, which\cr
#' contains entries of the following form:\cr
#' 'DomainID:ConceptID'(e.g. 'Condition:255573', 'Measurement:3027018')\cr
#' The AggConceptIDsByDomainID does the following:\cr
#' separate the DomainIDs from the ConceptIDs\cr
#' select the desired domain(s)\cr
#' aggregate the ConceptIDs by the DomainIDs\cr
#' Output is a data frame with the following columns:\cr
#' `_DF_DATA_GROUP_DOMAIN_NAME` ... character constant\cr
#' 'aggConceptIDs' ... character constant\cr
#'
#' @param dataGroupDomainIDIn dataGroupDomainID(s) of interest, defaults to all\cr
#' distinct dataGroupDomainIDs in the dictionary.\cr
#'
#' @export
AggConceptIDsByDomainID <- function(dataGroupDomainIDIn = GetDataGroupDomainIDs()) {
myDF <- Get() %>%
separate_(col = as.name(`_DF_DOMAIN_NAME`),
into = c(`_DF_DATA_GROUP_DOMAIN_NAME`, 'conceptID'),
sep = `_DF_SEP`, remove = TRUE) %>%
filter_(.dots = paste0(`_DF_DATA_GROUP_DOMAIN_NAME`, " %in% ", "c(",
paste(paste0("'", dataGroupDomainIDIn, "'"), collapse = ', '), ")")) %>%
select_(.dots = paste0('c(', `_DF_DATA_GROUP_DOMAIN_NAME`, ', ', 'conceptID', ')')) %>%
distinct() %>%
group_by_(.dots = `_DF_DATA_GROUP_DOMAIN_NAME`) %>%
mutate(aggConceptIDs = paste0(conceptID, collapse = ", ")) %>%
ungroup() %>%
select_(.dots = paste0('c(', `_DF_DATA_GROUP_DOMAIN_NAME`, ', ', 'aggConceptIDs', ')')) %>%
distinct()
}
#==============================================================================
#' GetDataGroupDomainIDs
#'
#' @description
#' Returns distinct dataGroupDomainIDs from the global dictionary.\cr
#'
#' @details
#' Assumption: the global dictionary has a `_DF_DOMAIN_NAME` column, which\cr
#' contains entries of the following form:\cr
#' 'DomainID:ConceptID'(e.g. 'Condition:255573', 'Measurement:3027018')\cr
#' The GetDataGroupDomainIDs function returns the distinct DomainIDs.\cr
#'
#' @export
GetDataGroupDomainIDs <- function() {
myDF <- Get() %>%
separate_(col = as.name(`_DF_DOMAIN_NAME`),
into = c(`_DF_DATA_GROUP_DOMAIN_NAME`, 'conceptID'),
sep = `_DF_SEP`, remove = TRUE) %>%
select_(.dots = `_DF_DATA_GROUP_DOMAIN_NAME`) %>%
distinct()
myRetVal <- myDF[[`_DF_DATA_GROUP_DOMAIN_NAME`]]
}
#==============================================================================
#' RenderCohortCondition
#'
#' @description
#' A wrapper for the SqlRender::render method and a Condition related query\cr
#' for a specific cohort.
#'
#' @details
#' Default values for the parameters are defined in the following constants:\cr
#' `_MAP_DATA_GROUP_CONDITION_SQL_TEMPLATE`\cr
#' `_MAP_DATA_GROUP_DB_SCHEMA`\cr
#' `_MAP_DATA_GROUP_RESULTS_SCHEMA`\cr
#' Assumptions:
#' the SQL template files are located in the following folder:\cr
#' inst \ sql \ postgres' folder\cr
#' the cohort with the given id is defined
#'
#' @param cohortDefinitionIdIn The cohort id.
#' @param conceptIDIn The conceptID set.
#' @param sqlTemplateIn The name of the file with the SQL template.
#' @param cdmSchemaIn The name of the CDM schema.
#' @param resultsSchemaIn The name of the results schema.
#'
#' @export
RenderCohortCondition <-
function(cohortDefinitionIdIn,
conceptIDIn,
sqlTemplateIn = `_MAP_DATA_GROUP_CONDITION_SQL_TEMPLATE`,
cdmSchemaIn = `_MAP_DATA_GROUP_DB_SCHEMA`,
resultsSchemaIn = `_MAP_DATA_GROUP_RESULTS_SCHEMA`) {
renderedSql <- SqlRender::render(
sqlTemplateIn,
cdmSchema = cdmSchemaIn,
resultsSchema = resultsSchemaIn,
cohortDefinitionId = cohortDefinitionIdIn,
value = conceptIDIn
)
}
#==============================================================================
#' RenderCohortMeasurement
#'
#' @description
#' A wrapper for the SqlRender::render method and a Measurement related query\cr
#' for a specific cohort.
#'
#' @details
#' Default values for the parameters are defined in the following constants:\cr
#' `_MAP_DATA_GROUP_MEASUREMENT_SQL_TEMPLATE`\cr
#' `_MAP_DATA_GROUP_DB_SCHEMA`\cr
#' `_MAP_DATA_GROUP_RESULTS_SCHEMA`\cr
#' Assumptions:
#' the SQL template files are located in the following folder:\cr
#' inst \ sql \ postgres' folder\cr
#' the cohort with the given id is defined
#'
#' @param cohortDefinitionIdIn The cohort id.
#' @param conceptIDIn The conceptID set.
#' @param sqlTemplateIn The name of the file with the SQL template.
#' @param cdmSchemaIn The name of the CDM schema.
#' @param resultsSchemaIn The name of the results schema.
#'
#' @export
RenderCohortMeasurement <-
function(cohortDefinitionIdIn,
conceptIDIn,
sqlTemplateIn = `_MAP_DATA_GROUP_MEASUREMENT_SQL_TEMPLATE`,
cdmSchemaIn = `_MAP_DATA_GROUP_DB_SCHEMA`,
resultsSchemaIn = `_MAP_DATA_GROUP_RESULTS_SCHEMA`) {
renderedSql <- SqlRender::render(
sqlTemplateIn,
cdmSchema = cdmSchemaIn,
resultsSchema = resultsSchemaIn,
cohortDefinitionId = cohortDefinitionIdIn,
value = conceptIDIn
)
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# omopRds
# .R
# OpalWrappers.R
#==============================================================================
#' OpenOpalConnection
#'
......
# omopRds
# Utils.R
#==============================================================================
.onAttach <- function(libname, pkgname) {
packageStartupMessage("Welcome to omopRds package")
# create variables for use as constants
Constants()
}
#==============================================================================
#' Constants
#'
#' @description
#' Create global variables for use as constants.
#'
#' @details
#' Create the following global variables for use as constants variable:\cr
#' `_MAP_ICD_SQL_TEMPLATE`
#' `_MAP_LOINC_SQL_TEMPLATE`
#' `_MAP_CLASS_DB_SCHEMA`
#'
#' @export
Constants <- function() {
`_MAP_ICD_SQL_TEMPLATE` <<-
readr::read_file(paste0(
system.file(package = "omopRds"),
'/sql/postgres/selectWithJoin2.sql'
))
`_MAP_LOINC_SQL_TEMPLATE` <<-
readr::read_file(paste0(
system.file(package = "omopRds"),
'/sql/postgres/selectWithAttrOpValue2.sql'
))
`_MAP_GET_CHILDREN_SQL_TEMPLATE` <<-
readr::read_file(paste0(
system.file(package = "omopRds"),
'/sql/postgres/selectWithJoin2.sql'
))
`_MAP_SOURCE_CODE_TO_CONCEPT_ID_SQL_TEMPLATE` <<-
readr::read_file(paste0(
system.file(package = "omopRds"),
'/sql/postgres/selectWithAttrOpValue1.sql'
))
`_SIMPLE_SELECT_WHERE_TEMPLATE` <<- readr::read_file(paste0(
system.file(package = "omopRds"),
'/sql/postgres/selectWithAttrOpValue1.sql'
))
`_MAP_DATA_GROUP_CONDITION_SQL_TEMPLATE` <<-
readr::read_file(paste0(
system.file(package = "omopRds"),
'/sql/postgres/selectCondition.sql'
))
`_MAP_DATA_GROUP_MEASUREMENT_SQL_TEMPLATE` <<-
readr::read_file(paste0(
system.file(package = "omopRds"),
'/sql/postgres/selectMeasurement.sql'
))
`_MAP_DATA_GROUP_DB_SCHEMA` <<- 'synpuf_cdm'
`_MAP_DATA_GROUP_RESULTS_SCHEMA` <<- 'synpuf_results'
# use for DATABASE: FHIR_OMOP
`_MAP_CLASS_DB_SCHEMA` <<- 'vocabulary'
# use for DATABASE: OMOP_V5
#`_MAP_CLASS_DB_SCHEMA` <<- 'SYNPUF_CDM'
`_OMOP_V5_DB_SCHEMA` <<- 'ohdsi'
# Names of the of the columns for the input tibble in the `_MapClass.DecorateGeneric` function.
# Also used in the Factory.Map function as one of the `_MapClass.ParseGeneric` parameters.
#TODO: only one MAP_CLASS_DOMAIN needed
`_MAP_CLASS_DOMAIN_ID` <<- 'DOMAIN'
`_MAP_CLASS_CONCEPT_ID` <<- 'CONCEPT_ID'
`_MAP_CLASS_CONCEPT_NAME` <<- 'CONCEPT_NAME'
`_MAP_CLASS_CONCEPT_CODE` <<- 'CONCEPT_CODE'
`_MAP_CLASS_VOCABULARY_ID` <<- 'VOCABULARY_ID'
`_MAP_CLASS_DOMAIN` <<- 'DOMAIN'
`_MAP_CLASS_SOURCE_VOCABULARY_ID_AND_CODE` <<- 'source vocabulary id and code'
# Names of the columns for the output tibble in the following functions:
# `_MapClass.DecorateGeneric`
# DecorateICD
`_MAP_CLASS_GENERIC_NAME` <<- 'NAME'
`_MAP_CLASS_GENERIC_VALUE` <<- 'VALUE'
`_MAP_CLASS_OPAL_ENTITY_TYPE` <<- 'entityType'
`_MAP_CLASS_OPAL_IS_REPEATABLE` <<- 'isRepeatable'
`_MAP_CLASS_OPAL_VALUE_TYPE` <<- 'valueType'
# String values employed in the DecorateICD function, as well as in unit tests
`_MAP_CLASS_DECORATE_ICD_NAME` <<- "label"
# TODO: Not sure if needed for LOINC codes
# String values emplyed in the DecorateLoinc function, as well as in unit test
`_MAP_CLASS_DECORATE_LOINC_NAME` <<- "unit"
`_MAP_CLASS_DECORATE_ICD_HD_VALUE` <<- "Primary diagnosis."
`_MAP_CLASS_DECORATE_ICD_ND_VALUE` <<- "Secondary diagnosis."
`_DF_DOMAIN_NAME` <<- "domain"
`_DF_DATA_GROUP_DOMAIN_NAME` <<- "dataGroupDomainID"
`_DF_SEP` <<- ":"
`_CONSTANTS_INITIALIZED` <<- 1
`_LOINC_MAPPING` <<- try(readr::read_csv(paste0(
system.file(package = "omopRds"), '/csv/loinc_mapping.csv'), col_names = c("ID", "LOINC", "UKF_UNITS", "Meta_Units_UCUM"),
col_types = cols("c","c","c","c"), skip=1), silent=TRUE)
`_GET_CONCEPT_SETS_SQL_TEMPLATE_` <<- readr::read_file(paste0(
system.file(package = "omopRds"),
'/sql/postgres/selectWithJoinSubqueries.sql'
))
`_GET_CONCEPT_SETS_DATABASE_SCHEMA_` <<- "OHDSI"
`_GET_CONCEPT_SETS_COLUMN_NAMES` <<- c('CREATED_BY', 'MODIFIED_BY', 'CREATED_DATE', 'MODIFIED_DATE', 'ID', 'NAME')
`_GET_COHORT_DEFINITION_FROM_DATABASE_SQL_TEMPLATE_` <<-readr::read_file(paste0(
system.file(package = "omopRds"),
'/sql/postgres/selectWithAttrOpValue1.sql'
))
`_GET_COHORT_DEFINITION_FROM_DATABASE_SCHEMA_` <<- 'OHDSI'
# json files for conceptSet/cohort testing via WebAPI
`_CONCEPT_SET_TEST_FILE_NAME_` <<- 'test_concept_set.json'
`_CONCEPT_SET_TEST_NAME_` <<- "`_SAMPLE_CONCEPT_SET_`"
`_COHORT_TEST_FILE_NAME_` <<- 'test_cohort_definition.json'
`_COHORT_TEST_NAME_` <<- "`_SAMPLE_COHORT_`"
}
#==============================================================================
#' requireNamespaceWrapper
#'
#' @description
#' A wrapper for the requireNamespace method.
#'
#' @details
#' Checks whether a package is installed. If not: stops execution of the \cr
#' current expression and prints an error message.
#'
#' @param packageName The name of the package to be checked.
#' @export
requireNamespaceWrapper <- function(packageName) {
# check if the package is available
if (!requireNamespace(packageName, quietly = TRUE)) {
stop(
paste0(
"Package \"",
packageName,
"\" needed for this function to work. Please install it."
),
call. = FALSE
)
retVal = FALSE # should not evaluate, but just in case
} else {
retVal = TRUE
}
return(retVal)
}
#==============================================================================
#' OpalValueType
#'
#' @description
#' Map names of selected R basic data types to the corresponding Opal types.
#'
#' @details
#' The data types are mapped as follows:\cr
#' 'character' -> 'text'\cr
#' 'integer' -> 'integer'\cr
#' 'double' -> 'decimal'\cr
#' 'logical' -> 'boolean'\cr
#' Assumptions:\cr
#' the input is a name of a valid R basic data type.\cr
#' Constraint:\cr
#' the R 'complex' data type is not supported.\cr
#' for unsupported inputs NA is returned.\cr
#'
#' @param dataTypeIn The data type to be mapped.
#'
#' @export
OpalValueType <- function(dataTypeIn) {
switch(dataTypeIn,
'character'={
retVal <- 'text'
},
'integer'={
retVal <- 'integer'
},
'double'={
retVal <- 'decimal'
},
'logical'={
retVal <- 'boolean'
},
{
retVal <- NA
}
)
}
#==============================================================================
#' SchemaToJSON
#'
#' @description
#' Serialize input tibble to JSON
#'
#' @details
#' Serializes an input tibble to JSON, supporting the use of Opal web service.
#'
#' @param schemaIn Input schema.
#' @param outermostSquareBracketsIn Determines whether to keep \cr
#' or to remove the outermost square brackets (default value is true).
#'
#' @export
SchemaToJSON <-
function(schemaIn, outermostSquareBracketsIn = TRUE) {
# check if the package is available
if (!requireNamespace("jsonlite", quietly = TRUE)) {
stop("Package \"jsonlite\" needed for this function to work. Please install it.",
call. = FALSE)
}
schemaJSON <- schemaIn %>% toJSON(pretty = TRUE, flatten = TRUE)
# Workarounds for incomplete unboxing
# ["..."] -> "..."
schemaJSON <-
gsub("\\\"\\]", "\\\"", gsub("\\[\\\"", "\\\"", schemaJSON))
# [false] -> false
schemaJSON <-
gsub("false\\]", "false", gsub("\\[false", "false", schemaJSON))
# [true] -> true
schemaJSON <-
gsub("true\\]", "true", gsub("\\[true", "true", schemaJSON))
# [1] -> 1, [2] -> 2, etc.
schemaJSON <- gsub("\\[(\\d+)\\]", "\\1", schemaJSON)
# The following expression removes JSON output caused by empty tibbles, e.g.:
# Input:
# ...
# "valueType": "integer",
# "categories": [
# []
# ]
# ...
# Output:
# ...
# "valueType": "integer"
# ...
schemaJSON <-
gsub(",\\s*\\\"\\w*\\\":\\s*\\[\\s*\\[\\s*\\]\\s*\\]",
"",
schemaJSON)
# [[ -> [
schemaJSON <- gsub("\\[\\s*\\[", "\\[", schemaJSON)
# ]] -> ]
schemaJSON <- gsub("\\]\\s*\\]", "\\]", schemaJSON)
# remove the outermost square brackets
if (FALSE == outermostSquareBracketsIn)
schemaJSON <- substr(schemaJSON, 2, nchar(schemaJSON) - 1)
return(schemaJSON)
}
#==============================================================================
#' Table
#'
#' @description
#' Table schema definition, supporting the use of Opal web service.
#'
#' @details
#' Example(following subsequent JSON serialization):\cr
#' '[\cr
#' {"datasourceName" : "TestProject",\cr
#' "entityType" : "Participant",\cr
#' "link" : "/datasource/TestProject/table/TestTable",\cr
#' "name" : "TestTable"}\cr
#' ]'
#'
#' @param datasourceNameIn The name of the datasource.
#' @param entityTypeIn The entity type.
#' @param linkIn The link for the table.
#' @param nameIn The name of the table.
#'
#' @export
Table <- function(datasourceNameIn,
entityTypeIn,
linkIn,
nameIn) {
table <- tibble(
datasourceName = datasourceNameIn,
entityType = entityTypeIn,
link = linkIn,
name = nameIn
)
return(table)
}
# omopRds
# Variable.R
#==============================================================================
#' GetVariables
#'
#' @description
#' Create variables using VariableClosure and global dictionary entries.
#'
#' @details
#' Create variables associated with the given domain(s) by using the \cr
#' VariableClosure and the global dictionary entries.
#'
#' @param variableDomainIn The variable domain(s).
#'
#' @export
GetVariables <- function(variableDomainIn) {
# check if the package is available
if (!requireNamespace("purrr", quietly = TRUE)) {
stop("Package \"purrr\" needed for this function to work. Please install it.",
call. = FALSE)
}
myVariables <-
variableDomainIn %>%
map(VariableClosure) %>% # generate functions for creating the variables
map( ~ .x()) %>% # generate variables by executing the above functions
reduce(bind_rows) # bind the variables
return(myVariables)
}
#==============================================================================
#' Variable
#'
#' @description
#' Create a Variable structure, supporting the creation of Opal variables
#'
#' @details
#' Create a Variable with fields from given parameters.
#'
#' @param nameIn The variable name.
#' @param attributesIn The variable attributes (Attribute type).
#' @param entityTypeIn The variable entity type.
#' @param isRepeatableIn Specifies whether the variable is \cr
#' repeatable.
#' @param valueTypeIn The variable value type.
#' @param categoriesIn The variable categories (Category type).
#' @param indexIn The variable index.
#' @param mimeTypeIn The variable mime type.
#' @param referencedEntityTypeIn Specifies the referenced etity type.
#' @param unitIn The variable unit.
#'
#' @export
Variable <-
function(nameIn,
attributesIn,
entityTypeIn,
isRepeatableIn,
valueTypeIn,
categoriesIn = Category(),
indexIn = 1,
mimeTypeIn = "",
occurrenceGroupIn = "",
referencedEntityTypeIn = "",
unitIn = "") {
# check if the package is available
if (!requireNamespace("tidyr", quietly = TRUE)) {
stop("Package \"tidyr\" needed for this function to work. Please install it.",
call. = FALSE)
}
myVariable <- tibble(
name = nameIn,
attributes = list(attributesIn),
entityType = entityTypeIn,
index = indexIn,
isRepeatable = isRepeatableIn,
mimeType = mimeTypeIn,
occurrenceGroup = occurrenceGroupIn,
referencedEntityType = referencedEntityTypeIn,
unit = unitIn,
valueType = valueTypeIn,
categories = list(categoriesIn)
)
return(myVariable)
}
#==============================================================================
#' VariableClosure
#'
#' @description
#' Maps predefined domains to variables.
#'
#' @details
#' Map predefined domains to variables using a multi-dimensional lookup system and a consistent interface.
#'
#' @param domainIn Set of domains to map.
#'
#' @export
VariableClosure <- function(domainIn) {
function() {
myVariable <- Variable(
nameIn = domainIn,
attributesIn = Attribute.FromDict(Get(domainIn, keyIn = 'Attribute')),
entityTypeIn = as.character(unlist(
Get(domainIn, keyIn = 'entityType')$value
)),
isRepeatableIn = as.logical(unlist(
Get(domainIn, keyIn = 'isRepeatable')$value
)),
valueTypeIn = as.character(unlist(
Get(domainIn, keyIn = 'valueType')$value
)),
categoriesIn = Category.FromDict(Get(domainIn, keyIn = 'Category'))
)
return(myVariable)
}
}
#==============================================================================
#' UpdateVariableIndex
#'
#' @description
#' Update variable indices
#'
#' @details
#' Update variable indices in the given set of variables.\cr
#' Assumption: variablesIn has an index field.
#'
#' @param variablesIn The variable set.
#'
#' @export
UpdateVariableIndex <- function(variablesIn) {
# check if the package is available
if (!requireNamespace("dplyr", quietly = TRUE)) {
stop("Package \"dplyr\" needed for this function to work. Please install it.",
call. = FALSE)
}
myVariables <- variablesIn %>%
mutate(index = row_number(index))
return(myVariables)
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment