[Zope-Checkins] CVS: Zope2 - Image.py:1.128.6.1

Wed, 18 Apr 2001 19:08:45 -0400 (EDT)

Update of /cvs-repository/Zope2/lib/python/OFS
In directory korak:/tmp/cvs-serv16463

Modified Files:
      Tag: mj-http_range_support-branch
	Image.py 
Log Message:
Support for HTTP Range and If-Range requests.

This support allows browsers and other HTTP clients to retrieve only parts
of a File or Image, allowing restarts of broken downloads and advanced
partical caching possible.

--- Updated File Image.py in package Zope2 --
--- Image.py	2001/04/12 15:55:43	1.128
+++ Image.py	2001/04/18 23:08:44	1.128.6.1
@@ -86,7 +86,7 @@
 
 __version__='$Revision$'[11:-2]
 
-import Globals, string, struct, content_types
+import Globals, string, struct, content_types, re, sys
 from OFS.content_types import guess_content_type
 from Globals import DTMLFile, MessageDialog
 from PropertyManager import PropertyManager
@@ -100,10 +100,119 @@
 from Acquisition import Implicit
 from DateTime import DateTime
 from Cache import Cacheable
+from mimetools import choose_boundary
 
 
 StringType=type('')
+WHITESPACE = re.compile('\s*', re.MULTILINE)
 
+# RFC 2616 (HTTP 1.1) Range header parsing
+# Convert a range header to a list of slice indexes, returned as (start, end)
+# tuples. If no end was given, end is None. Note that the RFC specifies the end
+# offset to be inclusive, we return python convention indexes, where the end is
+# exclusive. Syntactically incorrect headers are to be ignored, so if we
+# encounter one we return None.
+def parseRange(header):
+    ranges = []
+    add = ranges.append
+
+    # First, clean out *all* whitespace. This is slightly more tolerant
+    # than the spec asks for, but hey, it makes this function much easier.
+    header = WHITESPACE.sub('', header)
+
+    # A range header only can specify a byte range
+    try: spec, sets = string.split(header, '=')
+    except ValueError: return None
+    if spec != 'bytes':
+        return None
+
+    # The sets are delimited by commas.
+    sets = string.split(sets, ',')
+    # Filter out empty values, things like ',,' are allowed in the spec
+    sets = filter(None, sets)
+    # We need at least one set
+    if not sets:
+        return None
+
+    for set in sets:
+        try: start, end = string.split(set, '-')
+        except ValueError: return None
+
+        # Catch empty sets
+        if not start and not end:
+            return None
+
+        # Convert to integers or None (which will raise errors if
+        # non-integers were used (which is what we want)).
+        try:
+            if start == '': start = None
+            else: start = int(start)
+            if end == '': end = None
+            else: end = int(end)
+        except ValueError:
+            return None
+
+        # Special case: No start means the suffix format was used, which
+        # means the end value is actually a negative start value.
+        # Convert this by making it absolute.
+        # A -0 range is converted to sys.maxint, which will result in a
+        # Unsatisfiable response if no other ranges can by satisfied either.
+        if start is None:
+            start, end = -end, None
+            if not start:
+                start = sys.maxint
+        elif end is not None:
+            end = end + 1 # Make the end of the range exclusive
+
+        if end is not None and end <= start:
+            return None
+
+        # And store
+        add((start, end))
+
+    return ranges
+
+# Optimize Range sets, given those sets and the length of the resource
+# Optimisation is done by first expanding relative start values and open ends,
+# then sorting and combining overlapping or adjacent ranges. We also remove
+# unsatisfiable ranges (where the start lies beyond the size of the resource).
+def optimizeRanges(ranges, size):
+    expanded = []
+    add = expanded.append
+    for start, end in ranges:
+        if start < 0:
+            start = size + start
+        end = end or size
+        # Only use satisfiable ranges
+        if start < size:
+            add((start, end))
+
+    ranges = expanded
+    ranges.sort()
+    ranges.reverse()
+    optimized = []
+    add = optimized.append
+    start, end = ranges.pop()
+    
+    while ranges:
+        nextstart, nextend = ranges.pop()
+        # If the next range overlaps or is adjacent
+        if nextstart <= end:
+            # If it falls within the current range, discard
+            if nextend <= end:
+                continue
+            
+            # Overlap, adjust end
+            end = nextend
+        else:
+            add((start, end))
+            start, end = nextstart, nextend
+
+    # Add the remaining optimized range
+    add((start, end))
+    
+    return optimized
+
 manage_addFileForm=DTMLFile('dtml/imageAdd', globals(),Kind='File',kind='file')
 def manage_addFile(self,id,file='',title='',precondition='', content_type='',
                    REQUEST=None):
@@ -212,6 +321,8 @@
             # with common servers such as Apache (which can usually
             # understand the screwy date string as a lucky side effect
             # of the way they parse it).
+            # This happens to be what RFC2616 tells us to do in the face of an
+            # invalid date.
             try:    mod_since=long(DateTime(header).timeTime())
             except: mod_since=None
             if mod_since is not None:
@@ -225,6 +336,7 @@
                     RESPONSE.setHeader('Last-Modified', rfc1123_date(self._p_mtime))
                     RESPONSE.setHeader('Content-Type', self.content_type)
                     RESPONSE.setHeader('Content-Length', self.size)
+                    RESPONSE.setHeader('Accept-Ranges', 'bytes')
                     RESPONSE.setStatus(304)
                     return ''
 
@@ -237,9 +349,173 @@
                 c(REQUEST['PARENTS'][1],REQUEST)
             else:
                 c()
+
+        # HTTP Range header handling
+        range = REQUEST.get_header('Range', None)
+        if_range = REQUEST.get_header('If-Range', None)
+        if range is not None:
+            ranges = parseRange(range)
+
+            if if_range is not None:
+                # Only send ranges if the data isn't modified, otherwise send
+                # the whole object.
+                date = string.split(if_range, ';')[0]
+                try: mod_since=long(DateTime(date).timeTime())
+                except: mod_since=None
+                if mod_since is not None:
+                    if self._p_mtime:
+                        last_mod = long(self._p_mtime)
+                    else:
+                        last_mod = long(0)
+                    if last_mod > mod_since:
+                        # Modified, so send a normal response. We delete the
+                        # ranges, which causes us to skip to the 200 response.
+                        ranges = None
+
+            if ranges:
+                # Search for satisfiable ranges.
+                satisfiable = 0
+                for start, end in ranges:
+                    if start < self.size:
+                        satisfiable = 1
+                        break
+
+                if not satisfiable:
+                    RESPONSE.setHeader('Content-Range', 
+                        'bytes */%d' % self.size)
+                    RESPONSE.setHeader('Last-Modified',
+                        rfc1123_date(self._p_mtime))
+                    RESPONSE.setHeader('Content-Type', self.content_type)
+                    RESPONSE.setHeader('Content-Length', self.size)
+                    RESPONSE.setStatus(416)
+                    return ''
+
+                # Can we optimize?
+                if len(ranges) > 1:
+                    ranges = optimizeRanges(ranges, self.size)
+                                
+                if len(ranges) == 1:
+                    # Easy case, set extra header and return partial set.
+                    start, end = ranges[0]
+                    if start < 0:
+                        start = self.size + start
+                    end = end or self.size
+
+                    size = end - start
+                    
+                    RESPONSE.setHeader('Last-Modified',
+                        rfc1123_date(self._p_mtime))
+                    RESPONSE.setHeader('Content-Type', self.content_type)
+                    RESPONSE.setHeader('Content-Length', size)
+                    RESPONSE.setHeader('Content-Range', 
+                        'bytes %d-%d/%d' % (start, end - 1, self.size))
+                    RESPONSE.setStatus(206) # Partial content
+
+                    data = self.data
+                    if type(data) is StringType:
+                        return data[start:end]
+
+                    # Linked Pdata objects. Urgh.
+                    pos = 0
+                    while data is not None:
+                        l = len(data.data)
+                        pos = pos + l
+                        if pos > start:
+                            # We are within the range
+                            lstart = l - (pos - start)
+
+                            if lstart < 0: lstart = 0
+                            
+                            # find the endpoint
+                            if end <= pos:
+                                lend = l - (pos - end)
+                                
+                                # Send and end transmission
+                                RESPONSE.write(data[lstart:lend])
+                                break
+
+                            # Not yet at the end, transmit what we have.
+                            RESPONSE.write(data[lstart:])
+
+                        data = data.next
+                    
+                    return ''
+                    
+                else:
+                    # Ignore multi-part ranges for now, pretend we don't know
+                    # about ranges at all.
+                    # When we get here, ranges have been optimized, so they are
+                    # in order, non-overlapping, and start and end values are
+                    # positive integers.
+                    boundary = choose_boundary()
+                    
+                    # Calculate the content length
+                    size = (8 + len(boundary) + # End marker length
+                        len(ranges) * (         # Constant lenght per set
+                            49 + len(boundary) + len(self.content_type) + 
+                            len('%d' % self.size)))
+                    for start, end in ranges:
+                        # Variable length per set
+                        size = (size + len('%d%d' % (start, end - 1)) + 
+                            end - start)
+                            
+                    
+                    RESPONSE.setHeader('Content-Length', size)
+                    RESPONSE.setHeader('Last-Modified',
+                        rfc1123_date(self._p_mtime))
+                    RESPONSE.setHeader('Content-Type',
+                        'multipart/byteranges; boundary=%s' % boundary)
+                    RESPONSE.setStatus(206) # Partial content
+
+                    pos = 0
+                    data = self.data
+
+                    for start, end in ranges:
+                        RESPONSE.write('\r\n--%s\r\n' % boundary)
+                        RESPONSE.write('Content-Type: %s\r\n' %
+                            self.content_type)
+                        RESPONSE.write(
+                            'Content-Range: bytes %d-%d/%d\r\n\r\n' % (
+                                start, end - 1, self.size)) 
+
+                        if type(data) is StringType:
+                            RESPONSE.write(data[start:end])
+
+                        else:
+                            # Yippee. Linked Pdata objects.
+                            while data is not None:
+                                l = len(data.data)
+                                pos = pos + l
+                                if pos > start:
+                                    # We are within the range
+                                    lstart = l - (pos - start)
+
+                                    if lstart < 0: lstart = 0
+                                    
+                                    # find the endpoint
+                                    if end <= pos:
+                                        lend = l - (pos - end)
+                                        
+                                        # Send and loop to next range
+                                        RESPONSE.write(data[lstart:lend])
+                                        # Back up the position marker, it will
+                                        # be incremented again for the next
+                                        # part.
+                                        pos = pos - l
+                                        break
+
+                                    # Not yet at the end, transmit what we have.
+                                    RESPONSE.write(data[lstart:])
+
+                                data = data.next
+
+                    RESPONSE.write('\r\n--%s--\r\n' % boundary)
+                    return ''
+
         RESPONSE.setHeader('Last-Modified', rfc1123_date(self._p_mtime))
         RESPONSE.setHeader('Content-Type', self.content_type)
         RESPONSE.setHeader('Content-Length', self.size)
+        RESPONSE.setHeader('Accept-Ranges', 'bytes')
 
         # Don't cache the data itself, but provide an opportunity
         # for a cache manager to set response headers.