[Zope-Checkins] CVS: Zope2 - Image.py:1.128.6.1
Martijn Pieters
mj@digicool.com
Wed, 18 Apr 2001 19:08:45 -0400 (EDT)
Update of /cvs-repository/Zope2/lib/python/OFS
In directory korak:/tmp/cvs-serv16463
Modified Files:
Tag: mj-http_range_support-branch
Image.py
Log Message:
Support for HTTP Range and If-Range requests.
This support allows browsers and other HTTP clients to retrieve only parts
of a File or Image, allowing restarts of broken downloads and advanced
partical caching possible.
--- Updated File Image.py in package Zope2 --
--- Image.py 2001/04/12 15:55:43 1.128
+++ Image.py 2001/04/18 23:08:44 1.128.6.1
@@ -86,7 +86,7 @@
__version__='$Revision$'[11:-2]
-import Globals, string, struct, content_types
+import Globals, string, struct, content_types, re, sys
from OFS.content_types import guess_content_type
from Globals import DTMLFile, MessageDialog
from PropertyManager import PropertyManager
@@ -100,10 +100,119 @@
from Acquisition import Implicit
from DateTime import DateTime
from Cache import Cacheable
+from mimetools import choose_boundary
StringType=type('')
+WHITESPACE = re.compile('\s*', re.MULTILINE)
+# RFC 2616 (HTTP 1.1) Range header parsing
+# Convert a range header to a list of slice indexes, returned as (start, end)
+# tuples. If no end was given, end is None. Note that the RFC specifies the end
+# offset to be inclusive, we return python convention indexes, where the end is
+# exclusive. Syntactically incorrect headers are to be ignored, so if we
+# encounter one we return None.
+def parseRange(header):
+ ranges = []
+ add = ranges.append
+
+ # First, clean out *all* whitespace. This is slightly more tolerant
+ # than the spec asks for, but hey, it makes this function much easier.
+ header = WHITESPACE.sub('', header)
+
+ # A range header only can specify a byte range
+ try: spec, sets = string.split(header, '=')
+ except ValueError: return None
+ if spec != 'bytes':
+ return None
+
+ # The sets are delimited by commas.
+ sets = string.split(sets, ',')
+ # Filter out empty values, things like ',,' are allowed in the spec
+ sets = filter(None, sets)
+ # We need at least one set
+ if not sets:
+ return None
+
+ for set in sets:
+ try: start, end = string.split(set, '-')
+ except ValueError: return None
+
+ # Catch empty sets
+ if not start and not end:
+ return None
+
+ # Convert to integers or None (which will raise errors if
+ # non-integers were used (which is what we want)).
+ try:
+ if start == '': start = None
+ else: start = int(start)
+ if end == '': end = None
+ else: end = int(end)
+ except ValueError:
+ return None
+
+ # Special case: No start means the suffix format was used, which
+ # means the end value is actually a negative start value.
+ # Convert this by making it absolute.
+ # A -0 range is converted to sys.maxint, which will result in a
+ # Unsatisfiable response if no other ranges can by satisfied either.
+ if start is None:
+ start, end = -end, None
+ if not start:
+ start = sys.maxint
+ elif end is not None:
+ end = end + 1 # Make the end of the range exclusive
+
+ if end is not None and end <= start:
+ return None
+
+ # And store
+ add((start, end))
+
+ return ranges
+
+# Optimize Range sets, given those sets and the length of the resource
+# Optimisation is done by first expanding relative start values and open ends,
+# then sorting and combining overlapping or adjacent ranges. We also remove
+# unsatisfiable ranges (where the start lies beyond the size of the resource).
+def optimizeRanges(ranges, size):
+ expanded = []
+ add = expanded.append
+ for start, end in ranges:
+ if start < 0:
+ start = size + start
+ end = end or size
+ # Only use satisfiable ranges
+ if start < size:
+ add((start, end))
+
+ ranges = expanded
+ ranges.sort()
+ ranges.reverse()
+ optimized = []
+ add = optimized.append
+ start, end = ranges.pop()
+
+ while ranges:
+ nextstart, nextend = ranges.pop()
+ # If the next range overlaps or is adjacent
+ if nextstart <= end:
+ # If it falls within the current range, discard
+ if nextend <= end:
+ continue
+
+ # Overlap, adjust end
+ end = nextend
+ else:
+ add((start, end))
+ start, end = nextstart, nextend
+
+ # Add the remaining optimized range
+ add((start, end))
+
+ return optimized
+
manage_addFileForm=DTMLFile('dtml/imageAdd', globals(),Kind='File',kind='file')
def manage_addFile(self,id,file='',title='',precondition='', content_type='',
REQUEST=None):
@@ -212,6 +321,8 @@
# with common servers such as Apache (which can usually
# understand the screwy date string as a lucky side effect
# of the way they parse it).
+ # This happens to be what RFC2616 tells us to do in the face of an
+ # invalid date.
try: mod_since=long(DateTime(header).timeTime())
except: mod_since=None
if mod_since is not None:
@@ -225,6 +336,7 @@
RESPONSE.setHeader('Last-Modified', rfc1123_date(self._p_mtime))
RESPONSE.setHeader('Content-Type', self.content_type)
RESPONSE.setHeader('Content-Length', self.size)
+ RESPONSE.setHeader('Accept-Ranges', 'bytes')
RESPONSE.setStatus(304)
return ''
@@ -237,9 +349,173 @@
c(REQUEST['PARENTS'][1],REQUEST)
else:
c()
+
+ # HTTP Range header handling
+ range = REQUEST.get_header('Range', None)
+ if_range = REQUEST.get_header('If-Range', None)
+ if range is not None:
+ ranges = parseRange(range)
+
+ if if_range is not None:
+ # Only send ranges if the data isn't modified, otherwise send
+ # the whole object.
+ date = string.split(if_range, ';')[0]
+ try: mod_since=long(DateTime(date).timeTime())
+ except: mod_since=None
+ if mod_since is not None:
+ if self._p_mtime:
+ last_mod = long(self._p_mtime)
+ else:
+ last_mod = long(0)
+ if last_mod > mod_since:
+ # Modified, so send a normal response. We delete the
+ # ranges, which causes us to skip to the 200 response.
+ ranges = None
+
+ if ranges:
+ # Search for satisfiable ranges.
+ satisfiable = 0
+ for start, end in ranges:
+ if start < self.size:
+ satisfiable = 1
+ break
+
+ if not satisfiable:
+ RESPONSE.setHeader('Content-Range',
+ 'bytes */%d' % self.size)
+ RESPONSE.setHeader('Last-Modified',
+ rfc1123_date(self._p_mtime))
+ RESPONSE.setHeader('Content-Type', self.content_type)
+ RESPONSE.setHeader('Content-Length', self.size)
+ RESPONSE.setStatus(416)
+ return ''
+
+ # Can we optimize?
+ if len(ranges) > 1:
+ ranges = optimizeRanges(ranges, self.size)
+
+ if len(ranges) == 1:
+ # Easy case, set extra header and return partial set.
+ start, end = ranges[0]
+ if start < 0:
+ start = self.size + start
+ end = end or self.size
+
+ size = end - start
+
+ RESPONSE.setHeader('Last-Modified',
+ rfc1123_date(self._p_mtime))
+ RESPONSE.setHeader('Content-Type', self.content_type)
+ RESPONSE.setHeader('Content-Length', size)
+ RESPONSE.setHeader('Content-Range',
+ 'bytes %d-%d/%d' % (start, end - 1, self.size))
+ RESPONSE.setStatus(206) # Partial content
+
+ data = self.data
+ if type(data) is StringType:
+ return data[start:end]
+
+ # Linked Pdata objects. Urgh.
+ pos = 0
+ while data is not None:
+ l = len(data.data)
+ pos = pos + l
+ if pos > start:
+ # We are within the range
+ lstart = l - (pos - start)
+
+ if lstart < 0: lstart = 0
+
+ # find the endpoint
+ if end <= pos:
+ lend = l - (pos - end)
+
+ # Send and end transmission
+ RESPONSE.write(data[lstart:lend])
+ break
+
+ # Not yet at the end, transmit what we have.
+ RESPONSE.write(data[lstart:])
+
+ data = data.next
+
+ return ''
+
+ else:
+ # Ignore multi-part ranges for now, pretend we don't know
+ # about ranges at all.
+ # When we get here, ranges have been optimized, so they are
+ # in order, non-overlapping, and start and end values are
+ # positive integers.
+ boundary = choose_boundary()
+
+ # Calculate the content length
+ size = (8 + len(boundary) + # End marker length
+ len(ranges) * ( # Constant lenght per set
+ 49 + len(boundary) + len(self.content_type) +
+ len('%d' % self.size)))
+ for start, end in ranges:
+ # Variable length per set
+ size = (size + len('%d%d' % (start, end - 1)) +
+ end - start)
+
+
+ RESPONSE.setHeader('Content-Length', size)
+ RESPONSE.setHeader('Last-Modified',
+ rfc1123_date(self._p_mtime))
+ RESPONSE.setHeader('Content-Type',
+ 'multipart/byteranges; boundary=%s' % boundary)
+ RESPONSE.setStatus(206) # Partial content
+
+ pos = 0
+ data = self.data
+
+ for start, end in ranges:
+ RESPONSE.write('\r\n--%s\r\n' % boundary)
+ RESPONSE.write('Content-Type: %s\r\n' %
+ self.content_type)
+ RESPONSE.write(
+ 'Content-Range: bytes %d-%d/%d\r\n\r\n' % (
+ start, end - 1, self.size))
+
+ if type(data) is StringType:
+ RESPONSE.write(data[start:end])
+
+ else:
+ # Yippee. Linked Pdata objects.
+ while data is not None:
+ l = len(data.data)
+ pos = pos + l
+ if pos > start:
+ # We are within the range
+ lstart = l - (pos - start)
+
+ if lstart < 0: lstart = 0
+
+ # find the endpoint
+ if end <= pos:
+ lend = l - (pos - end)
+
+ # Send and loop to next range
+ RESPONSE.write(data[lstart:lend])
+ # Back up the position marker, it will
+ # be incremented again for the next
+ # part.
+ pos = pos - l
+ break
+
+ # Not yet at the end, transmit what we have.
+ RESPONSE.write(data[lstart:])
+
+ data = data.next
+
+ RESPONSE.write('\r\n--%s--\r\n' % boundary)
+ return ''
+
RESPONSE.setHeader('Last-Modified', rfc1123_date(self._p_mtime))
RESPONSE.setHeader('Content-Type', self.content_type)
RESPONSE.setHeader('Content-Length', self.size)
+ RESPONSE.setHeader('Accept-Ranges', 'bytes')
# Don't cache the data itself, but provide an opportunity
# for a cache manager to set response headers.