[Zpt] CVS: Packages/TAL - nsgmllib.py:1.9
guido@digicool.com
guido@digicool.com
Thu, 15 Mar 2001 17:53:45 -0500 (EST)
Update of /cvs-repository/Packages/TAL
In directory korak:/tmp/cvs-serv12940
Modified Files:
nsgmllib.py
Log Message:
Keep track of current line number and offset. Call self.getpos() to
retrieve.
--- Updated File nsgmllib.py in package Packages/TAL --
--- nsgmllib.py 2001/03/15 19:00:41 1.8
+++ nsgmllib.py 2001/03/15 22:53:45 1.9
@@ -63,6 +63,8 @@
self.lasttag = '???'
self.nomoretags = 0
self.literal = 0
+ self.lineno = 1
+ self.offset = 0
# For derived classes only -- enter literal mode (CDATA) till EOF
def setnomoretags(self):
@@ -84,6 +86,27 @@
def close(self):
self.goahead(1)
+ # Internal -- update line number and offset. This should be
+ # called for each piece of data exactly once, in order -- in other
+ # words the concatenation of all the input strings to this
+ # function should be exactly the entire input.
+ def updatepos(self, i, j):
+ if i >= j:
+ return j
+ rawdata = self.rawdata
+ nlines = string.count(rawdata, "\n", i, j)
+ if nlines:
+ self.lineno = self.lineno + nlines
+ pos = string.rindex(rawdata, "\n", i, j) # Should not fail
+ self.offset = j-(pos+1)
+ else:
+ self.offset = self.offset + j-i
+ return j
+
+ # Interface -- return current line number and offset.
+ def getpos(self):
+ return self.lineno, self.offset
+
# Internal -- handle data as far as reasonable. May leave state
# and data to be processed by a subsequent call. If 'end' is
# true, force handling all data as if followed by EOF marker.
@@ -94,70 +117,75 @@
while i < n:
if self.nomoretags:
self.handle_data(rawdata[i:n])
- i = n
+ i = self.updatepos(i, n)
break
match = interesting.search(rawdata, i)
if match: j = match.start(0)
else: j = n
if i < j: self.handle_data(rawdata[i:j])
- i = j
+ i = self.updatepos(i, j)
if i == n: break
if rawdata[i] == '<':
if starttagopen.match(rawdata, i):
if self.literal:
self.handle_data(rawdata[i])
- i = i+1
+ i = self.updatepos(i, i+1)
continue
k = self.parse_starttag(i)
if k < 0: break
- i = k
+ i = self.updatepos(i, k)
continue
if endtagopen.match(rawdata, i):
k = self.parse_endtag(i)
if k < 0: break
- i = k
+ i = self.updatepos(i, k)
self.literal = 0
continue
if commentopen.match(rawdata, i):
if self.literal:
self.handle_data(rawdata[i])
- i = i+1
+ i = self.updatepos(i, i+1)
continue
k = self.parse_comment(i)
if k < 0: break
- i = i+k
+ i = self.updatepos(i, i+k)
continue
if piopen.match(rawdata, i):
if self.literal:
self.handle_data(rawdata[i])
- i = i+1
+ i = self.updatepos(i, i+1)
continue
k = self.parse_pi(i)
if k < 0: break
- i = i+k
+ i = self.updatepos(i, i+k)
continue
match = special.match(rawdata, i)
if match:
if self.literal:
self.handle_data(rawdata[i])
- i = i+1
+ i = self.updatepos(i, i+1)
continue
- i = match.end(0)
+ k = match.end(0)
+ i = self.updatepos(i, k)
continue
elif rawdata[i] == '&':
match = charref.match(rawdata, i)
if match:
name = match.group(1)
self.handle_charref(name)
- i = match.end(0)
- if rawdata[i-1] != ';': i = i-1
+ k = match.end(0)
+ if rawdata[i-1] != ';':
+ k = k-1
+ i = self.updatepos(i, k)
continue
match = entityref.match(rawdata, i)
if match:
name = match.group(1)
self.handle_entityref(name)
- i = match.end(0)
- if rawdata[i-1] != ';': i = i-1
+ k = match.end(0)
+ if rawdata[i-1] != ';':
+ k = k-1
+ i = self.updatepos(i, k)
continue
else:
raise RuntimeError, 'neither < nor & ??'
@@ -166,17 +194,17 @@
match = incomplete.match(rawdata, i)
if not match:
self.handle_data(rawdata[i])
- i = i+1
+ i = self.updatepos(i, i+1)
continue
j = match.end(0)
if j == n:
break # Really incomplete
self.handle_data(rawdata[i:j])
- i = j
+ i = self.updatepos(self, i, j)
# end while
if end and i < n:
self.handle_data(rawdata[i:n])
- i = n
+ i = self.updatepos(i, n)
self.rawdata = rawdata[i:]
# XXX if end: check for empty stack