readTCGA
function allows to read unzipped files:
Merge_Clinical.Level_1
rnaseqv2__illuminahiseq_rnaseqv2
Mutation_Packager_Calls.Level
protein_normalization__data.Level_3
Merge_transcriptome__agilentg4502a_07_3__unc_edu__Level_3__unc_lowess_normalization_gene_level__data.Level_3
Merge_mirnaseq__illuminaga_mirnaseq__bcgsc_ca__Level_3__miR_gene_expression__data.Level_3
or
"Merge_mirnaseq__illuminahiseq_mirnaseq__bcgsc_ca__Level_3__miR_gene_expression__data.Level_3"
Merge_methylation__humanmethylation27
Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_isoforms_normalized__data.Level_3
segmented_scna_minus_germline_cnv_hg19
from TCGA project. Those files can be easily downloded with downloadTCGA function. See examples.
readTCGA(path, dataType, ...)
'clinical', 'rnaseq', 'mutations', 'RPPA', 'mRNA', 'miRNASeq', 'methylation', 'isoforms', 'CNV'
depending on which type of data user is trying to read in the tidy format.An output is a data.frame
with dataType
data.
All cohort names can be checked using: sub( x = names( infoTCGA() ), '-counts', '')
.
Parameter path
specification:
dataType = 'clinical'
a path to a cancerType.clin.merged.txt
file.
dataType = 'mutations'
a path to the unzziped folder Mutation_Packager_Calls.Level
containing .maf
files.
dataType = 'rnaseq'
a path to the uzziped file rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes_normalized__data.Level
.
dataType = 'RPPA'
a path to the unzipped file in folder protein_normalization__data.Level_3
.
dataType = 'mRNA'
a path to the unzipped file cancerType.transcriptome__agilentg4502a_07_3__unc_edu__Level_3__unc_lowess_normalization_gene_level__data.data.txt
.
dataType = 'miRNASeq'
a path to unzipped files cancerType.mirnaseq__illuminahiseq_mirnaseq__bcgsc_ca__Level_3__miR_gene_expression__data.data.txt
or cancerType.mirnaseq__illuminaga_mirnaseq__bcgsc_ca__Level_3__miR_gene_expression__data.data.txt
dataType = 'methylation'
a path to unzipped files cancerType.methylation__humanmethylation27__jhu_usc_edu__Level_3__within_bioassay_data_set_function__data.data.txt
.
dataType = 'isoforms'
a path to unzipped files cancerType.rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_isoforms_normalized__data.data.txt
.
dataType = 'CNV'
a path to unzipped files cancerType.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg18__seg.Level_3.txt
.
If you have any problems, issues or think that something is missing or is not clear please post an issue on https://github.com/RTCGA/RTCGA/issues.
RTCGA website http://rtcga.github.io/RTCGA/articles/Data_Download.html.
Other RTCGA: RTCGA-package
,
boxplotTCGA
, checkTCGA
,
convertTCGA
, createTCGA
,
datasetsTCGA
, downloadTCGA
,
expressionsTCGA
, heatmapTCGA
,
infoTCGA
, installTCGA
,
kmTCGA
, mutationsTCGA
,
pcaTCGA
, survivalTCGA
,
theme_RTCGA
## Not run: ------------------------------------ # # # ############## # ##### clinical # ############## # # dir.create('data') # # # downloading clinical data # # dataset = "clinical" is default parameter so we may omit it # downloadTCGA(cancerTypes = c('BRCA', 'OV'), # destDir = 'data' ) # # shorten paths so that they are shorter than 256 signs - windows issue # list.files("data", full.names = TRUE) %>% # file.rename(to = substr(., start = 1, stop = 50)) # # # reading datasets # sapply(c('BRCA', 'OV'), function(element){ # path <- list.files('data', recursive = TRUE, # full.names = TRUE, # patten = "clin.merged.txt") # assign(value = readTCGA( path, 'clinical' ), # x = paste0(element, '.clin.data'), # envir = .GlobalEnv)}) # # ############ # ##### rnaseq # ############ # # dir.create('data2') # # # downloading rnaseq data # downloadTCGA(cancerTypes = 'BRCA', # dataSet = 'Level_3__RSEM_genes_normalized', # destDir = 'data2') # # # shorten paths so that they are shorter than 256 signs - windows issue # list.files("data2", full.names = TRUE) %>% # file.rename(to = substr(., start = 1, stop = 50)) # # path_rnaseq <- list.files('data2', recursive = TRUE, # full.names = TRUE, # patten = 'illuminahiseq') # readTCGA(path = pathRNA, dataType = 'rnaseq') -> rnaseq_data # # # ############### # ##### mutations # ############### # # # Example directory in which untarred data will be stored # dir.create('data3') # # # downloadTCGA(cancerTypes = 'OV', # dataSet = 'Mutation_Packager_Calls.Level', # destDir = 'data3') # # # reading data # list.files('data3', recursive = TRUE) -> directory # # readTCGA(directory, 'mutations') -> mut_file # # ################# # ##### methylation # ################# # # # Example directory in which untarred data will be stored # dir.create('data4') # # # Download KIRP methylation data and store it in data4 folder # cancerType = "KIRP" # downloadTCGA(cancerTypes = cancerType, # dataSet = "Merge_methylation__humanmethylation27", # destDir = "data4") # # # Shorten path of subdirectory with KIRP methylation data # list.files(path = "data4", full.names = TRUE) %>% # file.rename(to = file.path("data4", paste0(cancerType, ".methylation"))) # # # Remove manifest.txt file # list.files(path = "data4", full.names = TRUE, # recursive = TRUE, pattern = "MANIFEST") %>% # file.remove() # # # Read KIRP methylation data # path <- list.files(path = "data4", full.names = TRUE, recursive = TRUE) # KIRP.methylation <- readTCGA(path, dataType = "methylation") # # # ########## # ##### RPPA # ########## # # # Directory in which untarred data will be stored # dir.create('data5') # # # Download BRCA RPPA data and store it in data5 folder # cancerType = "BRCA" # downloadTCGA(cancerTypes = cancerType, # dataSet = "protein_normalization__data.Level_3", # destDir = "data5") # # # Shorten path of subdirectory with BRCA RPPA data # list.files(path = "data5", full.names = TRUE) %>% # file.rename(from = ., to = file.path("data5", paste0(cancerType, ".RPPA"))) # # # Remove manifest.txt file # list.files(path = "data5", full.names = TRUE, # recursive = TRUE, pattern = "MANIFEST") %>% # file.remove() # # # Read BRCA RPPA data # path <- list.files(path = "data5", full.names = TRUE, recursive = TRUE) # BRCA.RPPA <- readTCGA(path, dataType = "RPPA") # # # ########## # ##### mRNA # ########## # # # Directory in which untarred data will be stored # dir.create('data6') # # # Download UCEC mRNA data and store it in data6 folder # cancerType = "UCEC" # downloadTCGA(cancerTypes = cancerType, # dataSet = "agilentg4502a_07_3__unc_edu__Level_3", # destDir = "data6") # # # Shorten path of subdirectory with UCEC mRNA data # list.files(path = "data6", full.names = TRUE) %>% # file.rename(from = ., to = file.path("data6",paste0(cancerType, ".mRNA"))) # # # Remove manifest.txt file # list.files(path = "data6", full.names = TRUE, # recursive = TRUE, pattern = "MANIFEST") %>% # file.remove() # # # Read UCEC mRNA data # path <- list.files(path = "data6", full.names = TRUE, recursive = TRUE) # UCEC.mRNA <- readTCGA(path, dataType = "mRNA") # # ############## # ##### miRNASeq # ############## # # # Directory in which untarred data will be stored # dir.create('data7') # # # Download BRCA miRNASeq data and store it in data7 folder # # Remember that miRNASeq data are produced by two machines: # # Illumina Genome Analyzer and Illumina HiSeq 2000 machines # cancerType <- "BRCA" # downloadTCGA(cancerTypes = cancerType, # dataSet = paste0("Merge_mirnaseq__illuminaga_mirnaseq__bcgsc", # "_ca__Level_3__miR_gene_expression__data.Level_3"), # destDir = "data7") # # downloadTCGA(cancerTypes = cancerType, # dataSet = paste0("Merge_mirnaseq__illuminahiseq_mirnaseq__", # "bcgsc_ca__Level_3__miR_gene_expression__data.Level_3"), # destDir = "data7") # # # Shorten path of subdirectory with BRCA miRNASeq data # list.files(path = "data7", full.names = TRUE) %>% # sapply(function(path){ # if (grepl(pattern = "illuminaga", path)){ # file.rename(from = grep(pattern = "illuminaga", path, value = TRUE), # to = file.path("data7",paste0(cancerType, ".miRNASeq.illuminaga"))) # } else if (grepl(pattern = "illuminahiseq", path)){ # file.rename(from = grep(pattern = "illuminahiseq", path, value = TRUE), # to = file.path("data7",paste0(cancerType, ".miRNASeq.illuminahiseq"))) # } # }) # # # Remove manifest.txt file # list.files(path = "data6", full.names = TRUE, # recursive = TRUE, pattern = "MANIFEST") %>% # file.remove() # # # Read BRCA miRNASeq data # path <- list.files(path = "data7", full.names = TRUE, recursive = TRUE) # path_illuminaga <- grep("illuminaga", path, fixed = TRUE, value = TRUE) # path_illuminahiseq <- grep("illuminahiseq", path, fixed = TRUE, value = TRUE) # # BRCA.miRNASeq.illuminaga <- readTCGA(path_illuminaga, dataType = "miRNASeq") # BRCA.miRNASeq.illuminahiseq <- readTCGA(path_illuminahiseq, dataType = "miRNASeq") # # BRCA.miRNASeq.illuminaga <- cbind(machine = "Illumina Genome Analyzer", # BRCA.miRNASeq.illuminaga) # BRCA.miRNASeq.illuminahiseq <- cbind(machine = "Illumina HiSeq 2000", # BRCA.miRNASeq.illuminahiseq) # # BRCA.miRNASeq <- rbind(BRCA.miRNASeq.illuminaga, BRCA.miRNASeq.illuminahiseq) # # ############## # ##### isoforms # ############## # # # Directory in which untarred data will be stored # dir.create('data8') # # # Download ACC isoforms data and store it in data8 folder # cancerType = "ACC" # downloadTCGA(cancerTypes = cancerType, # dataSet = paste0("Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc", # "_edu__Level_3__RSEM_isoforms_normalized__data.Level_3"), # destDir = "data8") # # # Shorten path of subdirectory with ACC isoforms data # list.files(path = "data8", full.names = TRUE) %>% # file.rename(from = ., to = file.path("data8",paste0(cancerType, ".isoforms"))) # # # Remove manifest.txt file # list.files(path = "data6", full.names = TRUE, # recursive = TRUE, pattern = "MANIFEST") %>% # file.remove() # # # Read ACC isoforms data # path <- list.files(path = "data8", full.names = TRUE, recursive = TRUE) # ACC.isoforms <- readTCGA(path, dataType = "isoforms") # ## ---------------------------------------------