genreads.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. #!/usr/bin/env python
  2. # encoding: utf-8
  3. """
  4. Created by Cole Trapnell and Michael Schatz.
  5. """
  6. import sys
  7. import getopt
  8. import random
  9. help_message = """
  10. genreads outputs a multi-FASTA file containing a random sampling of
  11. read-sized subsequences of the provided reference sequence.
  12. Usage:
  13. genreads [options] <reference file> <length of reads> <# of reads>
  14. Options:
  15. -s <integer value> --seed= the seed for the random number
  16. generator
  17. """
  18. class Usage(Exception):
  19. def __init__(self, msg):
  20. self.msg = msg
  21. def main(argv=None):
  22. if argv is None:
  23. argv = sys.argv
  24. try:
  25. try:
  26. opts, args = getopt.getopt(argv[1:], "hs:v", ["help", "seed="])
  27. except getopt.error, msg:
  28. raise Usage(msg)
  29. seed_val = 0
  30. # option processing
  31. for option, value in opts:
  32. if option == "-v":
  33. verbose = True
  34. if option in ("-h", "--help"):
  35. raise Usage(help_message)
  36. if option in ("-s", "--seed"):
  37. seed_val = long(value)
  38. random.seed(seed_val)
  39. if len(args) != 3:
  40. raise Usage(help_message)
  41. fasta_input = args[0]
  42. read_length = int(args[1])
  43. num_reads = int(args[2])
  44. f = open(fasta_input, "r")
  45. lines = f.readlines()
  46. if lines[0].find(">") == -1:
  47. raise Usage("File is not FASTA format")
  48. seq = "".join([line.strip() for line in lines[1:]])
  49. L = len(seq)
  50. rid = 0
  51. for i in range(0, num_reads):
  52. start = random.randint(0, L - read_length)
  53. end = start + read_length
  54. rid += 1
  55. print ">rid" + str(rid) + " " + str(start+1) + "-" + str(end+1)
  56. print seq[start:end]
  57. except Usage, err:
  58. print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
  59. return 2
  60. if __name__ == "__main__":
  61. sys.exit(main())