[BioPython] RecordFile.py
Chunlei Wu
chunlei.wu at uth.tmc.edu
Fri Apr 23 01:03:33 EDT 2004
Hi, group,
I just tried "RecordFile.py", but it failed for both fasta
file and genbank file I tested.
>>> rec_h=RecordFile.RecordFile(open(r"gb_test.txt" ),'LOCUS','\\')
or
>>> rec_h=RecordFile.RecordFile(open(r"gb_test.txt" ),'>','')
both returned the same error:
>>> rec_h.read()
Traceback (most recent call last):
File "<interactive input>", line 1, in ?
File "C:\Python23\Lib\site-packages\Bio\RecordFile.py", line 83, in read
text = self._in_record_state( args, keywds )
File "C:\Python23\Lib\site-packages\Bio\RecordFile.py", line 120, in
_in_record_state
requested_text = text
UnboundLocalError: local variable 'text' referenced before assignment
I checked the code, but the code is not obvious for me to fix it.
Actually, I wrote a simply script before using Bio.File's UndoHandle for
the same purpose. It looks much simpler, maybe not as powerful as
RecordFile.py, but it does works for me. I post it here and hope it is
worth sharing with you.
Best,
Chunlei Wu
-------------- next part --------------
#Chunlei Wu 07/30/2003
'''
FlatRecHandle is a class simulating a file handle for Flatfile format record file,
using record as a reading unit instead of line.
FlatRecHandle.readrecord() returns a record everytime.
'''
from Bio import File,Fasta
class FlatRecHandle:
'''A FileHandle for Flatfile format record file, using record as a reading unit instead of line.
start_marker is the marker of the start of each record:
">" for Fasta format record,
"LOCUS" for GenBank format record, etc.
stop_marker is the marker of the stop of each record, if None, the record stops till next start_marker or file end.
e.g.:
None for Fasta format record,
"//" for GenBank format record, etc.
return '' if reaching eof.'''
def __init__(self,handle,start_marker=None,stop_marker=None):
self._handle = File.UndoHandle(handle)
self.start_marker=start_marker
self.stop_marker=stop_marker
def readrecord(self):
'''return one record at one time,just like readline().
return '' if reaching eof.'''
is_record=0
saved_record=''
while 1:
line=self._handle.readline()
if line == '': ##reach eof.
if self.stop_marker is not None and is_record :
print 'Warning: This record may be incomplete. No stop marker("%s") found,but reach EOF!' % self.stop_marker
break
else:
break
if line[:len(self.start_marker)] == self.start_marker:
is_record=1
if is_record:
saved_record += line
if self.stop_marker is None:
next_line=self._handle.peekline()
if next_line[:len(self.start_marker)] == self.start_marker or next_line == '':
break
else:
if line[:len(self.stop_marker)] == self.stop_marker:
break
return saved_record
def rewind(self):
'''rewind the handler pointer to the beginning.'''
return self._handle.seek(0)
def tell(self):
return self._handle.tell()
def close(self):
return self._handle.close()
def closed(self):
return self._handle.closed()
def readrecords(self):
'''return list of records,just like readlines()'''
rec_list=[]
while 1:
rec=self.readrecord()
if rec == '':
break
rec_list.append(rec)
return rec_list
def fasta_handle(in_f_handle):
'''return a FlatRecHandle for fasta format.
input is a fasta format file handle.'''
return FlatRecHandle(in_f_handle,">")
def fasta_iterator(fastafile_handle):
'''return a Fasta file iterator using Bio.Fasta
input is a fasta format file handle.'''
parser=Fasta.RecordParser()
return Fasta.Iterator(fastafile_handle,parser)
def gb_handle(in_f_handle):
'''return a FlatRecHandle for GenBank format.'''
return FlatRecHandle(in_f_handle,"LOCUS","//")
More information about the BioPython
mailing list