If I understand the discussion of the file format correctly, I think I might have a solution for you. If not, it is probably close. Here's a sample data file I was working with to test (notice no escaped quotes in text fields -- not supported): "Text column one", 2, 3, "Text column four" -1, "two", "three", 4.0 I've attached a code file which parses it, creating strings (without the external quotes) and evaluating the numerical values to create the right kind of numbers. The resulting object is a subclass of UserList. The patterns used to parse the file are: TEXT_FIELD_PATTERN = r'"(?P<text>[^"]*)"' NUMBER_FIELD_PATTERN = r'(?P<number>([-+.]|\d)+)' FIELD_PATTERN = r'(' + TEXT_FIELD_PATTERN + r'|' + NUMBER_FIELD_PATTERN + r'),?\s*' Doug #!/usr/bin/env python # # Time-stamp: <99/11/04 11:03:44 hellmann> # """ Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation, and that the name of Doug Hellmann not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. DOUG HELLMANN DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DOUG HELLMANN BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. DESCRIPTION: Parse a comma delemited text file and turn it into a list of tuples. CHANGE HISTORY: $Log: cdf.py,v $ Revision 1.1 1999/11/04 16:02:00 hellmann Class for parsing simple comma delimited files. Does not handle the case where a text field contains an embeded quote. """ # # Creation Information # __rcs_module_name__ = '$RCSfile: cdf.py,v $' __creator__ = ' <hellmann@gnncast.net>' __project__ = 'Python Pybox' __created__ = 'Thu, 04-Nov-1999 10:44:46 EST' # # Current Information # __author__ = '$Author: hellmann $' __version__ = '$Revision: 1.1 $' __date__ = '$Date: 1999/11/04 16:02:00 $' __locker__ = '$Locker: $' # # Import system modules # import UserList import re import string # # Import Local modules # # # Module # class CDF(UserList.UserList): TEXT_FIELD_PATTERN = r'"(?P<text>[^"]*)"' NUMBER_FIELD_PATTERN = r'(?P<number>([-+.]|\d)+)' FIELD_PATTERN = r'(' + TEXT_FIELD_PATTERN + r'|' + NUMBER_FIELD_PATTERN + r'),?\s*' def __init__(self, input): UserList.UserList.__init__(self) reObj = re.compile(self.FIELD_PATTERN) if type(input) == type(''): input = open(input, 'rt') for line in input.readlines(): matchObj = reObj.search(line) rec = () while matchObj: #print matchObj.groupdict() groupdict = matchObj.groupdict() if groupdict['text']: rec = rec + (groupdict['text'],) else: numberText = string.strip(groupdict['number']) if numberText: rec = rec + (eval(groupdict['number']),) matchObj = reObj.search(line, matchObj.end()) self.append(rec) return if __name__ == '__main__': print CDF('cdf.txt')