Window Masker

From genomewiki
Revision as of 17:45, 23 April 2013 by Hiram (talk | contribs) (initial contents)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigationJump to search

Construct counts file

cd /data/genomes/ricCom1/bed/windowMasker
set fa = ricCom1.fa
set tmpDir = `mktemp -d -p /scratch/tmp doWindowMasker.XXXXXX`
chmod 775 $tmpDir
set inputTwoBit = /data/genomes/ricCom1/ricCom1.unmasked.2bit
pushd $tmpDir
twoBitToFa $inputTwoBit $fa
windowMasker -mk_counts true -input $fa -output windowmasker.counts
popd 
cp $tmpDir/windowmasker.counts .
rm -rf $tmpDir

Window Masker with sdust option

mkdir /data/genomes/ricCom1/bed/windowMasker
cd /data/genomes/ricCom1/bed/windowMasker
set fa = ricCom1.fa
set tmpDir = `mktemp -d -p /scratch/tmp doWindowMasker.XXXXXX`
chmod 775 $tmpDir
set inputTwoBit = /data/genomes/ricCom1/ricCom1.unmasked.2bit
cp windowmasker.counts $tmpDir
pushd $tmpDir
twoBitToFa $inputTwoBit $fa
windowMasker -ustat windowmasker.counts -sdust true -input $fa -output windowmasker.intervals
perl -wpe 'if (s/^>lcl\|(.*)\n$//) { $chr = $1; } \
   if (/^(\d+) - (\d+)/) { \
   $s=$1; $e=$2+1; s/(\d+) - (\d+)/$chr\t$s\t$e/; \
   }' windowmasker.intervals > windowmasker.sdust.bed
popd 
cp $tmpDir/windowmasker.sdust.bed .
rm -rf $tmpDir

Construct masked 2bit file

cd /data/genomes/ricCom1/bed/windowMasker
twoBitMask /data/genomes/ricCom1/ricCom1.unmasked.2bit windowmasker.sdust.bed ricCom1.wmsk.sdust.2bit

Measure masking result:

twoBitToFa ricCom1.wmsk.sdust.2bit stdout | faSize stdin > faSize.ricCom1.wmsk.sdust.txt 2>&1

Results:

350621860 bases (13662715 N's 336959145 real 185528058 upper 151431087 lower) in 25763 sequences in 1 files
Total size: mean 13609.5 sd 106411.1 min 202 (EQ999533) max 4693355 (EQ973772) median 1094
%43.19 masked total, %44.94 masked real