readTCGA function allows to read unzipped files:

  • clinical data - Merge_Clinical.Level_1
  • rnaseq data (genes' expressions) - rnaseqv2__illuminahiseq_rnaseqv2
  • genes' mutations data - Mutation_Packager_Calls.Level
  • Reverse phase protein array data (RPPA) - protein_normalization__data.Level_3
  • Merge transcriptome agilent data (mRNA) - Merge_transcriptome__agilentg4502a_07_3__unc_edu__Level_3__unc_lowess_normalization_gene_level__data.Level_3
  • miRNASeq data - Merge_mirnaseq__illuminaga_mirnaseq__bcgsc_ca__Level_3__miR_gene_expression__data.Level_3 or "Merge_mirnaseq__illuminahiseq_mirnaseq__bcgsc_ca__Level_3__miR_gene_expression__data.Level_3"
  • methylation data - Merge_methylation__humanmethylation27
  • isoforms data - Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_isoforms_normalized__data.Level_3
  • CNV data - segmented_scna_minus_germline_cnv_hg19

from TCGA project. Those files can be easily downloded with downloadTCGA function. See examples.

readTCGA(path, dataType, ...)

Arguments

path
See details and examples.
dataType
One of 'clinical', 'rnaseq', 'mutations', 'RPPA', 'mRNA', 'miRNASeq', 'methylation', 'isoforms', 'CNV' depending on which type of data user is trying to read in the tidy format.
...
Further arguments passed to the as.data.frame.

Value

An output is a data.frame with dataType data.

Details

All cohort names can be checked using: sub( x = names( infoTCGA() ), '-counts', '').

Parameter path specification:

  • If dataType = 'clinical' a path to a cancerType.clin.merged.txt file.
  • If dataType = 'mutations' a path to the unzziped folder Mutation_Packager_Calls.Level containing .maf files.
  • If dataType = 'rnaseq' a path to the uzziped file rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes_normalized__data.Level.
  • If dataType = 'RPPA' a path to the unzipped file in folder protein_normalization__data.Level_3.
  • If dataType = 'mRNA' a path to the unzipped file cancerType.transcriptome__agilentg4502a_07_3__unc_edu__Level_3__unc_lowess_normalization_gene_level__data.data.txt.
  • If dataType = 'miRNASeq' a path to unzipped files cancerType.mirnaseq__illuminahiseq_mirnaseq__bcgsc_ca__Level_3__miR_gene_expression__data.data.txt or cancerType.mirnaseq__illuminaga_mirnaseq__bcgsc_ca__Level_3__miR_gene_expression__data.data.txt
  • If dataType = 'methylation' a path to unzipped files cancerType.methylation__humanmethylation27__jhu_usc_edu__Level_3__within_bioassay_data_set_function__data.data.txt.
  • If dataType = 'isoforms' a path to unzipped files cancerType.rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_isoforms_normalized__data.data.txt.
  • If dataType = 'CNV' a path to unzipped files cancerType.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg18__seg.Level_3.txt.

Issues

If you have any problems, issues or think that something is missing or is not clear please post an issue on https://github.com/RTCGA/RTCGA/issues.

See also

RTCGA website http://rtcga.github.io/RTCGA/articles/Data_Download.html.

Other RTCGA: RTCGA-package, boxplotTCGA, checkTCGA, convertTCGA, createTCGA, datasetsTCGA, downloadTCGA, expressionsTCGA, heatmapTCGA, infoTCGA, installTCGA, kmTCGA, mutationsTCGA, pcaTCGA, survivalTCGA, theme_RTCGA

Examples

## Not run: ------------------------------------ # # # ############## # ##### clinical # ############## # # dir.create('data') # # # downloading clinical data # # dataset = "clinical" is default parameter so we may omit it # downloadTCGA(cancerTypes = c('BRCA', 'OV'), # destDir = 'data' ) # # shorten paths so that they are shorter than 256 signs - windows issue # list.files("data", full.names = TRUE) %>% # file.rename(to = substr(., start = 1, stop = 50)) # # # reading datasets # sapply(c('BRCA', 'OV'), function(element){ # path <- list.files('data', recursive = TRUE, # full.names = TRUE, # patten = "clin.merged.txt") # assign(value = readTCGA( path, 'clinical' ), # x = paste0(element, '.clin.data'), # envir = .GlobalEnv)}) # # ############ # ##### rnaseq # ############ # # dir.create('data2') # # # downloading rnaseq data # downloadTCGA(cancerTypes = 'BRCA', # dataSet = 'Level_3__RSEM_genes_normalized', # destDir = 'data2') # # # shorten paths so that they are shorter than 256 signs - windows issue # list.files("data2", full.names = TRUE) %>% # file.rename(to = substr(., start = 1, stop = 50)) # # path_rnaseq <- list.files('data2', recursive = TRUE, # full.names = TRUE, # patten = 'illuminahiseq') # readTCGA(path = pathRNA, dataType = 'rnaseq') -> rnaseq_data # # # ############### # ##### mutations # ############### # # # Example directory in which untarred data will be stored # dir.create('data3') # # # downloadTCGA(cancerTypes = 'OV', # dataSet = 'Mutation_Packager_Calls.Level', # destDir = 'data3') # # # reading data # list.files('data3', recursive = TRUE) -> directory # # readTCGA(directory, 'mutations') -> mut_file # # ################# # ##### methylation # ################# # # # Example directory in which untarred data will be stored # dir.create('data4') # # # Download KIRP methylation data and store it in data4 folder # cancerType = "KIRP" # downloadTCGA(cancerTypes = cancerType, # dataSet = "Merge_methylation__humanmethylation27", # destDir = "data4") # # # Shorten path of subdirectory with KIRP methylation data # list.files(path = "data4", full.names = TRUE) %>% # file.rename(to = file.path("data4", paste0(cancerType, ".methylation"))) # # # Remove manifest.txt file # list.files(path = "data4", full.names = TRUE, # recursive = TRUE, pattern = "MANIFEST") %>% # file.remove() # # # Read KIRP methylation data # path <- list.files(path = "data4", full.names = TRUE, recursive = TRUE) # KIRP.methylation <- readTCGA(path, dataType = "methylation") # # # ########## # ##### RPPA # ########## # # # Directory in which untarred data will be stored # dir.create('data5') # # # Download BRCA RPPA data and store it in data5 folder # cancerType = "BRCA" # downloadTCGA(cancerTypes = cancerType, # dataSet = "protein_normalization__data.Level_3", # destDir = "data5") # # # Shorten path of subdirectory with BRCA RPPA data # list.files(path = "data5", full.names = TRUE) %>% # file.rename(from = ., to = file.path("data5", paste0(cancerType, ".RPPA"))) # # # Remove manifest.txt file # list.files(path = "data5", full.names = TRUE, # recursive = TRUE, pattern = "MANIFEST") %>% # file.remove() # # # Read BRCA RPPA data # path <- list.files(path = "data5", full.names = TRUE, recursive = TRUE) # BRCA.RPPA <- readTCGA(path, dataType = "RPPA") # # # ########## # ##### mRNA # ########## # # # Directory in which untarred data will be stored # dir.create('data6') # # # Download UCEC mRNA data and store it in data6 folder # cancerType = "UCEC" # downloadTCGA(cancerTypes = cancerType, # dataSet = "agilentg4502a_07_3__unc_edu__Level_3", # destDir = "data6") # # # Shorten path of subdirectory with UCEC mRNA data # list.files(path = "data6", full.names = TRUE) %>% # file.rename(from = ., to = file.path("data6",paste0(cancerType, ".mRNA"))) # # # Remove manifest.txt file # list.files(path = "data6", full.names = TRUE, # recursive = TRUE, pattern = "MANIFEST") %>% # file.remove() # # # Read UCEC mRNA data # path <- list.files(path = "data6", full.names = TRUE, recursive = TRUE) # UCEC.mRNA <- readTCGA(path, dataType = "mRNA") # # ############## # ##### miRNASeq # ############## # # # Directory in which untarred data will be stored # dir.create('data7') # # # Download BRCA miRNASeq data and store it in data7 folder # # Remember that miRNASeq data are produced by two machines: # # Illumina Genome Analyzer and Illumina HiSeq 2000 machines # cancerType <- "BRCA" # downloadTCGA(cancerTypes = cancerType, # dataSet = paste0("Merge_mirnaseq__illuminaga_mirnaseq__bcgsc", # "_ca__Level_3__miR_gene_expression__data.Level_3"), # destDir = "data7") # # downloadTCGA(cancerTypes = cancerType, # dataSet = paste0("Merge_mirnaseq__illuminahiseq_mirnaseq__", # "bcgsc_ca__Level_3__miR_gene_expression__data.Level_3"), # destDir = "data7") # # # Shorten path of subdirectory with BRCA miRNASeq data # list.files(path = "data7", full.names = TRUE) %>% # sapply(function(path){ # if (grepl(pattern = "illuminaga", path)){ # file.rename(from = grep(pattern = "illuminaga", path, value = TRUE), # to = file.path("data7",paste0(cancerType, ".miRNASeq.illuminaga"))) # } else if (grepl(pattern = "illuminahiseq", path)){ # file.rename(from = grep(pattern = "illuminahiseq", path, value = TRUE), # to = file.path("data7",paste0(cancerType, ".miRNASeq.illuminahiseq"))) # } # }) # # # Remove manifest.txt file # list.files(path = "data6", full.names = TRUE, # recursive = TRUE, pattern = "MANIFEST") %>% # file.remove() # # # Read BRCA miRNASeq data # path <- list.files(path = "data7", full.names = TRUE, recursive = TRUE) # path_illuminaga <- grep("illuminaga", path, fixed = TRUE, value = TRUE) # path_illuminahiseq <- grep("illuminahiseq", path, fixed = TRUE, value = TRUE) # # BRCA.miRNASeq.illuminaga <- readTCGA(path_illuminaga, dataType = "miRNASeq") # BRCA.miRNASeq.illuminahiseq <- readTCGA(path_illuminahiseq, dataType = "miRNASeq") # # BRCA.miRNASeq.illuminaga <- cbind(machine = "Illumina Genome Analyzer", # BRCA.miRNASeq.illuminaga) # BRCA.miRNASeq.illuminahiseq <- cbind(machine = "Illumina HiSeq 2000", # BRCA.miRNASeq.illuminahiseq) # # BRCA.miRNASeq <- rbind(BRCA.miRNASeq.illuminaga, BRCA.miRNASeq.illuminahiseq) # # ############## # ##### isoforms # ############## # # # Directory in which untarred data will be stored # dir.create('data8') # # # Download ACC isoforms data and store it in data8 folder # cancerType = "ACC" # downloadTCGA(cancerTypes = cancerType, # dataSet = paste0("Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc", # "_edu__Level_3__RSEM_isoforms_normalized__data.Level_3"), # destDir = "data8") # # # Shorten path of subdirectory with ACC isoforms data # list.files(path = "data8", full.names = TRUE) %>% # file.rename(from = ., to = file.path("data8",paste0(cancerType, ".isoforms"))) # # # Remove manifest.txt file # list.files(path = "data6", full.names = TRUE, # recursive = TRUE, pattern = "MANIFEST") %>% # file.remove() # # # Read ACC isoforms data # path <- list.files(path = "data8", full.names = TRUE, recursive = TRUE) # ACC.isoforms <- readTCGA(path, dataType = "isoforms") # ## ---------------------------------------------