FASTA and FASTQ reader

Finite state machines for fun & profit. A python snippet which extracts titles and sequences from FASTQ files and discards the rest.

reader = open("humanrna.fastq", "rb")
title=""
buffer=""

while 1:
    line = reader.readline()
    if not line:
        break
    line = line.strip()

    if line == "":
        continue

    if line.startswith("@"):
        #this is a title -- starts a new fastq block
        title=line[1:]
        #read seq
        buffer = reader.readline().strip()
        #dump quality control
        dummy = reader.readline()
        dummy = reader.readline()

       #some use of title and buffer...
       # ...

A snippet for reading FASTA files.

reader = open("humanrna.fasta", "rb")
title=""
buffer=""

def useBuffer(title, buffer):
    #use the buffer somehow...
    #...
    pass

while 1:
    line = reader.readline()
    if not line:
        #This is the end
        if title:
            #use the last title and buffer...
            useBuffer(title, buffer)
        break
    
    line = line.strip()

    if line == "":
        continue

    if line.startswith(">"):
        #this is a title -- starts a new fasta block         
        if buffer:
            #use the last title and buffer somehow...
            useBuffer(title, buffer)

        buffer=""
        title=line[1:]
        continue                    

    if title:
        buffer += line

 

 

Leave a Reply

Your email address will not be published. Required fields are marked *


Please simplify: \(\frac{\;\;\;\;12\;\;\;\;}{\frac{20}{5}}=\)