Load All Study Data from a metawoRld Project — load

Scans the project's data/ directory, reads the metadata.yml and data.csv file for each valid study subdirectory, and compiles the information into R objects.

Usage

load_metawoRld(path = ".", verbose = TRUE)

Arguments

path: Character string. The path to the root directory of the metawoRld project. Defaults to the current working directory (.).
verbose: Logical. Print informative messages about studies being loaded or skipped? Defaults to TRUE.

Value

A list with two components:

studies_data: A single data frame combining the data from all data.csv files. It includes an added study_id column. It attempts to join relevant information like 'unit' from measurement_methods and 'group_name' from outcome_groups based on the links defined in the study's metadata.yml. Columns that cannot be joined or are missing in specific studies will contain NA.
studies_metadata: A list where names are the study_ids and values are the full parsed metadata lists read from each study's metadata.yml file.

Returns NULL invisibly if the data directory is missing or no valid studies are found. Issues warnings for studies skipped due to missing files or errors.

Examples

if (FALSE) { # \dontrun{
# --- Setup: Create a temporary project and add two studies ---
proj_path <- file.path(tempdir(), "load_study_test")
create_metawoRld(
  path = proj_path,
  project_name = "Load Study Test",
  project_description = "Testing load_metawoRld()"
)

# Study 1 Data
meta1 <- list(
  study_id = "S1", title = "Study One", authors = list("A"), year = 2021,
  journal = "J1", study_design="Cohort", country="X", sample_type="Serum",
  outcome_groups = list(g1=list(name="Case", def="..."), g2=list(name="Ctrl", def="...")),
  measurement_methods = list(m1=list(unit="pg/mL", analysis_type="E")),
  datafindr_assessment = list(relevance_score=1)
)
data1 <- data.frame(measurement_id="m1a", method_ref_id="m1", cytokine_name="CK1",
                    group_label="g1", gestational_age_timing="T1", n=10,
                    statistic_type="mean_sd", value1=5, value2=1)
add_study_data(proj_path, "S1", meta1, data1)

# Study 2 Data
meta2 <- list(
  study_id = "S2", title = "Study Two", authors = list("B"), year = 2022,
  journal = "J2", study_design="Case-Ctrl", country="Y", sample_type="Plasma",
  outcome_groups = list(grpA=list(name="High", def="..."), grpB=list(name="Low", def="...")),
  measurement_methods = list(
     assayX = list(unit="ng/L", analysis_type="Luminex", target="CK1"),
     assayY = list(unit="pg/mL", analysis_type="ELISA", target="CK2")
   ),
   datafindr_assessment = list(relevance_score=0.8)
)
data2 <- data.frame(measurement_id=c("m2a", "m2b"), method_ref_id=c("assayX", "assayY"),
                    cytokine_name=c("CK1", "CK2"), group_label=c("grpA", "grpA"),
                    gestational_age_timing=c("T3","T3"), n=c(25, 25),
                    statistic_type=c("median_iqr", "mean_sem"),
                    value1=c(500, 12.3), value2=c("400-650", 2.1))
add_study_data(proj_path, "S2", meta2, data2)

# Add an empty folder (should be skipped)
fs::dir_create(file.path(proj_path, "data", "EmptyFolder"))

# --- Load the data ---
loaded_data <- load_metawoRld(proj_path)

# Explore the results
print("--- Combined Data Frame ---")
print(loaded_data$studies_data)

print("--- List of Metadata ---")
print(names(loaded_data$studies_metadata))
print(loaded_data$studies_metadata$S1$title)

# --- Clean up ---
unlink(proj_path, recursive = TRUE)
} # }