# Isolate putative Chrm1 from Automated dataset # input: automated single bivalent observations # output: csv file of putative Chrm1 for manual curation setwd("./WildMiceMadison/") #read csv for big BivData file BivData = read.csv("~./Pero_BivData.csv", header=TRUE ) source("/CommonFunc.R") #################### # Cleaning BivData # #################### #clean data of images which shouldn't have run through algorithm BivData2 <- BivData[!grepl("p_rev", BivData$fileName),] #16492 #remove DUP labeled rows BivData2 <- BivData2[!grepl("DUP", BivData2$fileName),] BivData2 <- add_mouse(BivData2) BivData2 <- add_trapping_location(BivData2) BivData2 <- add_status(BivData2) BivData2 <- add_species(BivData2) #make a unique row name (obj.ID) (fileName_boxNumber) BivData2$Obj.ID <- paste(BivData2$fileName, BivData2$boxNumber, sep = "_") #remove very long SC length observations (all merged chrms) BivData2 <- BivData2[BivData2$chromosomeLength < 200, ] #16492 #this table returns the measure of the longest Chrm (maxChrm) in the image, and it's index number #the table needs to be made first so the SC lengths and be correctly calculated putativeChrm1_cell_table <- ddply(BivData2, c("fileName"), summarise, boxs.IDd = max(boxNumber), nboxes_passed = length(boxNumber), totalCO = sum(numberCrossOvers), maxChrm_length = max(chromosomeLength), maxChrm_boxNumber = boxNumber[which.max(chromosomeLength)], #remove ones with bad centromere (.3) maxChrm_centPER = centromere_PER_Position[which.max(chromosomeLength)] ) #1. filters to the single bivalent observations # filter by SC length, max and min SC lengths from empiracal measures putativeChrm1_BivData <- BivData2[BivData2$chromosomeLength < 170, ] #16447 putativeChrm1_BivData <- putativeChrm1_BivData[putativeChrm1_BivData$chromosomeLength > 90,] #2151 # filter by centromere position putativeChrm1_BivData <- putativeChrm1_BivData[putativeChrm1_BivData$centromere_PER_Position < .27,] #+2SE centromere, 0.2762215 #1739 putativeChrm1_BivData <- putativeChrm1_BivData[putativeChrm1_BivData$centromere_PER_Position > .15,] #+2SE centromere, 0.1664199 #828 #2. Filters based on cell level statistic #remove SC length observations that come from images with 30 object images. Likely noisey and bad quality bivalents. #Exclude images which have more than 30 objects putativeChrm1_cell_table <- putativeChrm1_cell_table[putativeChrm1_cell_table$boxs.IDd < 30, ] #926 to 440 #cross reference the cell table with the chromosome level dataframe. Make sure the bivalent level dataframe some from cells with less than 30 objects Chrm1_list_for_curation <- putativeChrm1_cell_table[(putativeChrm1_cell_table$fileName %in% putativeChrm1_BivData$fileName),] #343 #write out list of putative Chrm1 observations for manual verification write.csv(Chrm1_list_for_curation, file = "putative_Chrm1_list.csv")