# Date: Thu Dec 12 13:41:41 2019 # -------------- # Author: Yann Dorant # Date: # Modification: 04 feb 2020 # -------------- # Libraries library(dplyr) library(magrittr) library(tibble) library(edgeR) # Set working dir ------------------------- setwd(PATH) ########################################################################################## ###################### Define global functions ############################## ########################################################################################## #source my R toolbox functions (check PATH) ------------------------- # load specific useful functions from source '%ni%' <- Negate('%in%') # reverse of %in% #---------- Add new project functions ------------------------------- ########################################################################################## ########################### Main script ##################################### ########################################################################################## ############# load gdepth file from vcf data (duplicated SNPs) gdepth_all <- read.table("batch_2_duplicated_SNPs.gdepth", h=T) dim(gdepth_all) #clean gdepth datafile gdepth_all_transformed <- gdepth_all %>% dplyr::select(., -POS) %>% #remove POS column dplyr::distinct(., CHROM, .keep_all=TRUE) %>% #keep only unique loci column_to_rownames(., var = 'CHROM') #change colname for locus info #correct sample names colnames(gdepth_all_transformed) <- gsub("\\.16", "-16", colnames(gdepth_all_transformed)) # check data dim(gdepth_all_transformed) gdepth_all_transformed[1:10,1:10] #check matrix # |---------| # | Step 2 | ================> perform normalization using EdgeR package # |---------| #################### DGE_list <- DGEList(counts=as.matrix(gdepth_all_transformed)) #create list to store info gdepth_norm_Fact <-calcNormFactors(DGE_list) gdepth_normalized <- cpm(gdepth_norm_Fact, normalized.lib.sizes = TRUE, log = F) gdepth_normalized[1:10,1:10] #check normalized data #write normalized matrix of read depth CNVs write.table(gdepth_normalized, "gdepth_normalized_CNVs.temp.txt", col.names = T, row.names = T, quote = F, sep="\t")