BedProject

From genomewiki
Jump to navigationJump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
#!/usr/bin/gawk -f
# max 1/06

BEGIN { 
        if (ARGC==1 || ARGC>4) {
          print;
          print "Will change the seqname-fields of a bed file and"
          print "and will add a given offset to both start- and end-pos"
          print "fields of bed. Used to project results from a program"
          print "that returned coordinates from 0...x to a given chromosome"
          print "If you specify a fasta-file as parameter, we will "
          print "read seqname/offset from the seqname-line of the fasta-file"
          print "It has to be in UCSC-style format, eg:"
          print "  >seqname range=chr4:1012-1050"
          print "In this case seqname would be 'chr4' and offset '1012'. "
          print "If you add -r at the end, coordinates will be SUBSTRACTED"
          print "offset instead of added (reversed)"
          print ""
          print "SYNTAX:"
          print " bedproject <seqname> <offset>" 
          print " bedproject <seqname> <offset> -r" 
          print " bedproject <fasta-file>"
          print " bedproject <fasta-file> -r"
          print "EXAMPLE:"
          print " cat bla.bed | bedproject chr4 1204"
          print " cat bla.bed | bedproject bla.fa"
          exit 1
        }
        
        OFS="\t"
        if (ARGV[ARGC-1]=="-r") {
            rev=1;
            ARGC-=1;
        }

        if (ARGC==3) {
         seqname = ARGV[1]
         offset = ARGV[2]
         revOffset = ARGV[2]
        }

        if (ARGC==2) {
         FS = " "
         getline < ARGV[1]
         split($2, range, "=")
         split(range[2], coords, ":")
         seqname = coords[1]
         split(coords[2], positions, "-")
         offset = positions[1]
         revOffset = positions[2]
        }

         ARGV[1]="-"
         ARGC=2
      }

/track/ { print; next;}
/browser/ {print; next;}

/.*[a-z]+.*/    { 
         if ($2 < 0) {
            print "bedproject Error: negative position found!"  > "/dev/stderr"
            exit 1
          }
        if (rev==1) {
            if ($6=="+") {
                $6="-"
            }
            else if ($6=="-") {
                $6="+"
            }
            if (revOffset-$3 < 0) {
                dropped+=1;
                next;
                }
            print seqname, revOffset-$3, revOffset-$2, $4, $5, $6, $7, $8, $9, $10,$11,$12,$13,$14;
        }
        else { 
        if (offset+$2 < 0) {
            print "warning: dropping negative startpos feature" > "/dev/stderr";
            next
        }
        print seqname, offset+$2, offset+$3, $4, $5, $6, $7, $8, $9, $10, $11, $12,$13, $14;
    }
        }

END { if (dropped!=0) { 
                print "warning: dropped "dropped" negative startpos features" > /dev/stderr;
            }
        }