BedTotalSize

From genomewiki
Revision as of 23:32, 14 August 2006 by Hiram (talk | contribs)
Jump to navigationJump to search
#!/usr/bin/env python


from sys import *
import sys
from re import *

if len(argv)==2:
        print " Will read bed-style features from stdin"
        print " Will add all features-lengths together"
        print ""
        print " SYNTAX: "
        print " totalSize "
        exit() 



line = sys.stdin.readline()
sum = 0
while line!="":         
    fields = line.split()
    start = int(fields[1])
    stop = int(fields[2])
    sum += (stop-start+1)
    line = sys.stdin.readline()

print "Total length of all features: "+str(sum)
#  you could also do this in awk with the single line statement:
#
#  awk '{sum += $3-$2}END{printf "total size: %d\n",sum}' file.bed
#
#  Plus, I don't think you want to add 1 to your stop-start calculation.
#  This relates to the subtle nature of the "0-relative" vs. "1-relative"
#  coordinate systems.  When in 0-relative you don't need the + or - 1's anywhere.