[Zope3-checkins] SVN: Zope3/branches/benji-integrate-new-mechanize/src/ add new version of mechanize

Mon Jun 19 11:38:22 EDT 2006

Log message for revision 68755:
  add new version of mechanize
  

Changed:
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/__init__.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_auth.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_clientcookie.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_gzip.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_headersutil.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_html.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_lwpcookiejar.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_mechanize.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_mozillacookiejar.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_msiecookiejar.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_opener.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_pullparser.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_request.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_urllib2.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_urllib2_support.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_useragent.py
  A   Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_util.py
  U   Zope3/branches/benji-integrate-new-mechanize/src/zope/testbrowser/testing.py

-=-
Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/__init__.py
===================================================================

--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/__init__.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/__init__.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,39 @@
+from _mechanize import __version__
+
+# high-level stateful browser-style interface
+from _mechanize import \
+     Browser, \
+     BrowserStateError, LinkNotFoundError, FormNotFoundError
+
+# configurable URL-opener interface
+from _useragent import UserAgent
+from _html import \
+     Link, \
+     Factory, DefaultFactory, RobustFactory, \
+     FormsFactory, LinksFactory, TitleFactory, \
+     RobustFormsFactory, RobustLinksFactory, RobustTitleFactory
+
+# urllib2 work-alike interface (part from mechanize, part from urllib2)
+from _urllib2 import *
+
+# misc
+from _util import http2time as str2time
+from _util import response_seek_wrapper, make_response
+from _urllib2_support import HeadParser
+try:
+    from _urllib2_support import XHTMLCompatibleHeadParser
+except ImportError:
+    pass
+#from _gzip import HTTPGzipProcessor  # crap ATM
+
+
+# cookies
+from _clientcookie import Cookie, CookiePolicy, DefaultCookiePolicy, \
+     CookieJar, FileCookieJar, LoadError, request_host
+from _lwpcookiejar import LWPCookieJar, lwp_cookie_str
+from _mozillacookiejar import MozillaCookieJar
+from _msiecookiejar import MSIECookieJar
+
+# If you hate the idea of turning bugs into warnings, do:
+# import mechanize; mechanize.USE_BARE_EXCEPT = False
+USE_BARE_EXCEPT = True


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/__init__.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_auth.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_auth.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_auth.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,471 @@
+"""HTTP Authentication and Proxy support.
+
+All but HTTPProxyPasswordMgr come from Python 2.5.
+
+
+Copyright 2006 John J. Lee <jjl at pobox.com>
+
+This code is free software; you can redistribute it and/or modify it under
+the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+import re, base64, urlparse, posixpath, md5, sha
+
+from urllib2 import BaseHandler
+from urllib import getproxies, unquote, splittype, splituser, splitpasswd
+
+
+def _parse_proxy(proxy):
+    """Return (scheme, user, password, host/port) given a URL or an authority.
+
+    If a URL is supplied, it must have an authority (host:port) component.
+    According to RFC 3986, having an authority component means the URL must
+    have two slashes after the scheme:
+
+    >>> _parse_proxy('file:/ftp.example.com/')
+    Traceback (most recent call last):
+    ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
+
+    The first three items of the returned tuple may be None.
+
+    Examples of authority parsing:
+
+    >>> _parse_proxy('proxy.example.com')
+    (None, None, None, 'proxy.example.com')
+    >>> _parse_proxy('proxy.example.com:3128')
+    (None, None, None, 'proxy.example.com:3128')
+
+    The authority component may optionally include userinfo (assumed to be
+    username:password):
+
+    >>> _parse_proxy('joe:password at proxy.example.com')
+    (None, 'joe', 'password', 'proxy.example.com')
+    >>> _parse_proxy('joe:password at proxy.example.com:3128')
+    (None, 'joe', 'password', 'proxy.example.com:3128')
+
+    Same examples, but with URLs instead:
+
+    >>> _parse_proxy('http://proxy.example.com/')
+    ('http', None, None, 'proxy.example.com')
+    >>> _parse_proxy('http://proxy.example.com:3128/')
+    ('http', None, None, 'proxy.example.com:3128')
+    >>> _parse_proxy('http://joe:password@proxy.example.com/')
+    ('http', 'joe', 'password', 'proxy.example.com')
+    >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
+    ('http', 'joe', 'password', 'proxy.example.com:3128')
+
+    Everything after the authority is ignored:
+
+    >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
+    ('ftp', 'joe', 'password', 'proxy.example.com')
+
+    Test for no trailing '/' case:
+
+    >>> _parse_proxy('http://joe:password@proxy.example.com')
+    ('http', 'joe', 'password', 'proxy.example.com')
+
+    """
+    scheme, r_scheme = splittype(proxy)
+    if not r_scheme.startswith("/"):
+        # authority
+        scheme = None
+        authority = proxy
+    else:
+        # URL
+        if not r_scheme.startswith("//"):
+            raise ValueError("proxy URL with no authority: %r" % proxy)
+        # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
+        # and 3.3.), path is empty or starts with '/'
+        end = r_scheme.find("/", 2)
+        if end == -1:
+            end = None
+        authority = r_scheme[2:end]
+    userinfo, hostport = splituser(authority)
+    if userinfo is not None:
+        user, password = splitpasswd(userinfo)
+    else:
+        user = password = None
+    return scheme, user, password, hostport
+
+class ProxyHandler(BaseHandler):
+    # Proxies must be in front
+    handler_order = 100
+
+    def __init__(self, proxies=None):
+        if proxies is None:
+            proxies = getproxies()
+        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
+        self.proxies = proxies
+        for type, url in proxies.items():
+            setattr(self, '%s_open' % type,
+                    lambda r, proxy=url, type=type, meth=self.proxy_open: \
+                    meth(r, proxy, type))
+
+    def proxy_open(self, req, proxy, type):
+        orig_type = req.get_type()
+        proxy_type, user, password, hostport = _parse_proxy(proxy)
+        if proxy_type is None:
+            proxy_type = orig_type
+        if user and password:
+            user_pass = '%s:%s' % (unquote(user), unquote(password))
+            creds = base64.encodestring(user_pass).strip()
+            req.add_header('Proxy-authorization', 'Basic ' + creds)
+        hostport = unquote(hostport)
+        req.set_proxy(hostport, proxy_type)
+        if orig_type == proxy_type:
+            # let other handlers take care of it
+            return None
+        else:
+            # need to start over, because the other handlers don't
+            # grok the proxy's URL type
+            # e.g. if we have a constructor arg proxies like so:
+            # {'http': 'ftp://proxy.example.com'}, we may end up turning
+            # a request for http://acme.example.com/a into one for
+            # ftp://proxy.example.com/a
+            return self.parent.open(req)
+
+class HTTPPasswordMgr:
+
+    def __init__(self):
+        self.passwd = {}
+
+    def add_password(self, realm, uri, user, passwd):
+        # uri could be a single URI or a sequence
+        if isinstance(uri, basestring):
+            uri = [uri]
+        uri = tuple(map(self.reduce_uri, uri))
+        if not realm in self.passwd:
+            self.passwd[realm] = {}
+        self.passwd[realm][uri] = (user, passwd)
+
+    def find_user_password(self, realm, authuri):
+        domains = self.passwd.get(realm, {})
+        authuri = self.reduce_uri(authuri)
+        for uris, authinfo in domains.iteritems():
+            for uri in uris:
+                if self.is_suburi(uri, authuri):
+                    return authinfo
+        return None, None
+
+    def reduce_uri(self, uri):
+        """Accept netloc or URI and extract only the netloc and path"""
+        parts = urlparse.urlsplit(uri)
+        if parts[1]:
+            # URI
+            return parts[1], parts[2] or '/'
+        elif parts[0]:
+            # host:port
+            return uri, '/'
+        else:
+            # host
+            return parts[2], '/'
+
+    def is_suburi(self, base, test):
+        """Check if test is below base in a URI tree
+
+        Both args must be URIs in reduced form.
+        """
+        if base == test:
+            return True
+        if base[0] != test[0]:
+            return False
+        common = posixpath.commonprefix((base[1], test[1]))
+        if len(common) == len(base[1]):
+            return True
+        return False
+
+
+class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
+
+    def find_user_password(self, realm, authuri):
+        user, password = HTTPPasswordMgr.find_user_password(self, realm,
+                                                            authuri)
+        if user is not None:
+            return user, password
+        return HTTPPasswordMgr.find_user_password(self, None, authuri)
+
+
+class AbstractBasicAuthHandler:
+
+    rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
+
+    # XXX there can actually be multiple auth-schemes in a
+    # www-authenticate header.  should probably be a lot more careful
+    # in parsing them to extract multiple alternatives
+
+    def __init__(self, password_mgr=None):
+        if password_mgr is None:
+            password_mgr = HTTPPasswordMgr()
+        self.passwd = password_mgr
+        self.add_password = self.passwd.add_password
+
+    def http_error_auth_reqed(self, authreq, host, req, headers):
+        # host may be an authority (without userinfo) or a URL with an
+        # authority
+        # XXX could be multiple headers
+        authreq = headers.get(authreq, None)
+        if authreq:
+            mo = AbstractBasicAuthHandler.rx.search(authreq)
+            if mo:
+                scheme, realm = mo.groups()
+                if scheme.lower() == 'basic':
+                    return self.retry_http_basic_auth(host, req, realm)
+
+    def retry_http_basic_auth(self, host, req, realm):
+        user, pw = self.passwd.find_user_password(realm, host)
+        if pw is not None:
+            raw = "%s:%s" % (user, pw)
+            auth = 'Basic %s' % base64.encodestring(raw).strip()
+            if req.headers.get(self.auth_header, None) == auth:
+                return None
+            req.add_header(self.auth_header, auth)
+            return self.parent.open(req)
+        else:
+            return None
+
+
+class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
+
+    auth_header = 'Authorization'
+
+    def http_error_401(self, req, fp, code, msg, headers):
+        url = req.get_full_url()
+        return self.http_error_auth_reqed('www-authenticate',
+                                          url, req, headers)
+
+
+class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
+
+    auth_header = 'Proxy-authorization'
+
+    def http_error_407(self, req, fp, code, msg, headers):
+        # http_error_auth_reqed requires that there is no userinfo component in
+        # authority.  Assume there isn't one, since urllib2 does not (and
+        # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
+        # userinfo.
+        authority = req.get_host()
+        return self.http_error_auth_reqed('proxy-authenticate',
+                                          authority, req, headers)
+
+
+def randombytes(n):
+    """Return n random bytes."""
+    # Use /dev/urandom if it is available.  Fall back to random module
+    # if not.  It might be worthwhile to extend this function to use
+    # other platform-specific mechanisms for getting random bytes.
+    if os.path.exists("/dev/urandom"):
+        f = open("/dev/urandom")
+        s = f.read(n)
+        f.close()
+        return s
+    else:
+        L = [chr(random.randrange(0, 256)) for i in range(n)]
+        return "".join(L)
+
+class AbstractDigestAuthHandler:
+    # Digest authentication is specified in RFC 2617.
+
+    # XXX The client does not inspect the Authentication-Info header
+    # in a successful response.
+
+    # XXX It should be possible to test this implementation against
+    # a mock server that just generates a static set of challenges.
+
+    # XXX qop="auth-int" supports is shaky
+
+    def __init__(self, passwd=None):
+        if passwd is None:
+            passwd = HTTPPasswordMgr()
+        self.passwd = passwd
+        self.add_password = self.passwd.add_password
+        self.retried = 0
+        self.nonce_count = 0
+
+    def reset_retry_count(self):
+        self.retried = 0
+
+    def http_error_auth_reqed(self, auth_header, host, req, headers):
+        authreq = headers.get(auth_header, None)
+        if self.retried > 5:
+            # Don't fail endlessly - if we failed once, we'll probably
+            # fail a second time. Hm. Unless the Password Manager is
+            # prompting for the information. Crap. This isn't great
+            # but it's better than the current 'repeat until recursion
+            # depth exceeded' approach <wink>
+            raise HTTPError(req.get_full_url(), 401, "digest auth failed",
+                            headers, None)
+        else:
+            self.retried += 1
+        if authreq:
+            scheme = authreq.split()[0]
+            if scheme.lower() == 'digest':
+                return self.retry_http_digest_auth(req, authreq)
+
+    def retry_http_digest_auth(self, req, auth):
+        token, challenge = auth.split(' ', 1)
+        chal = parse_keqv_list(parse_http_list(challenge))
+        auth = self.get_authorization(req, chal)
+        if auth:
+            auth_val = 'Digest %s' % auth
+            if req.headers.get(self.auth_header, None) == auth_val:
+                return None
+            req.add_unredirected_header(self.auth_header, auth_val)
+            resp = self.parent.open(req)
+            return resp
+
+    def get_cnonce(self, nonce):
+        # The cnonce-value is an opaque
+        # quoted string value provided by the client and used by both client
+        # and server to avoid chosen plaintext attacks, to provide mutual
+        # authentication, and to provide some message integrity protection.
+        # This isn't a fabulous effort, but it's probably Good Enough.
+        dig = sha.new("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
+                                       randombytes(8))).hexdigest()
+        return dig[:16]
+
+    def get_authorization(self, req, chal):
+        try:
+            realm = chal['realm']
+            nonce = chal['nonce']
+            qop = chal.get('qop')
+            algorithm = chal.get('algorithm', 'MD5')
+            # mod_digest doesn't send an opaque, even though it isn't
+            # supposed to be optional
+            opaque = chal.get('opaque', None)
+        except KeyError:
+            return None
+
+        H, KD = self.get_algorithm_impls(algorithm)
+        if H is None:
+            return None
+
+        user, pw = self.passwd.find_user_password(realm, req.get_full_url())
+        if user is None:
+            return None
+
+        # XXX not implemented yet
+        if req.has_data():
+            entdig = self.get_entity_digest(req.get_data(), chal)
+        else:
+            entdig = None
+
+        A1 = "%s:%s:%s" % (user, realm, pw)
+        A2 = "%s:%s" % (req.get_method(),
+                        # XXX selector: what about proxies and full urls
+                        req.get_selector())
+        if qop == 'auth':
+            self.nonce_count += 1
+            ncvalue = '%08x' % self.nonce_count
+            cnonce = self.get_cnonce(nonce)
+            noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
+            respdig = KD(H(A1), noncebit)
+        elif qop is None:
+            respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
+        else:
+            # XXX handle auth-int.
+            pass
+
+        # XXX should the partial digests be encoded too?
+
+        base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
+               'response="%s"' % (user, realm, nonce, req.get_selector(),
+                                  respdig)
+        if opaque:
+            base += ', opaque="%s"' % opaque
+        if entdig:
+            base += ', digest="%s"' % entdig
+        base += ', algorithm="%s"' % algorithm
+        if qop:
+            base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
+        return base
+
+    def get_algorithm_impls(self, algorithm):
+        # lambdas assume digest modules are imported at the top level
+        if algorithm == 'MD5':
+            H = lambda x: md5.new(x).hexdigest()
+        elif algorithm == 'SHA':
+            H = lambda x: sha.new(x).hexdigest()
+        # XXX MD5-sess
+        KD = lambda s, d: H("%s:%s" % (s, d))
+        return H, KD
+
+    def get_entity_digest(self, data, chal):
+        # XXX not implemented yet
+        return None
+
+
+class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
+    """An authentication protocol defined by RFC 2069
+
+    Digest authentication improves on basic authentication because it
+    does not transmit passwords in the clear.
+    """
+
+    auth_header = 'Authorization'
+
+    def http_error_401(self, req, fp, code, msg, headers):
+        host = urlparse.urlparse(req.get_full_url())[1]
+        retry = self.http_error_auth_reqed('www-authenticate',
+                                           host, req, headers)
+        self.reset_retry_count()
+        return retry
+
+
+class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
+
+    auth_header = 'Proxy-Authorization'
+
+    def http_error_407(self, req, fp, code, msg, headers):
+        host = req.get_host()
+        retry = self.http_error_auth_reqed('proxy-authenticate',
+                                           host, req, headers)
+        self.reset_retry_count()
+        return retry
+
+
+
+class HTTPProxyPasswordMgr(HTTPPasswordMgr):
+    # has default realm and host/port
+    def add_password(self, realm, uri, user, passwd):
+        # uri could be a single URI or a sequence
+        if uri is None or isinstance(uri, basestring):
+            uris = [uri]
+        else:
+            uris = uri
+        passwd_by_domain = self.passwd.setdefault(realm, {})
+        for uri in uris:
+            uri = self.reduce_uri(uri)
+            passwd_by_domain[uri] = (user, passwd)
+
+    def find_user_password(self, realm, authuri):
+        perms = [(realm, authuri), (None, authuri)]
+        # bleh, want default realm to take precedence over default
+        # URI/authority, hence this outer loop
+        for default_uri in False, True:
+            for realm, authuri in perms:
+                authinfo_by_domain = self.passwd.get(realm, {})
+                reduced_authuri = self.reduce_uri(authuri)
+                for uri, authinfo in authinfo_by_domain.iteritems():
+                    if uri is None and not default_uri:
+                        continue
+                    if self.is_suburi(uri, reduced_authuri):
+                        return authinfo
+                user, password = None, None
+
+                if user is not None:
+                    break
+        return user, password
+
+    def reduce_uri(self, uri):
+        if uri is None:
+            return None
+        return HTTPPasswordMgr.reduce_uri(self, uri)
+
+    def is_suburi(self, base, test):
+        if base is None:
+            # default to the proxy's host/port
+            hostport, path = test
+            base = (hostport, "/")
+        return HTTPPasswordMgr.is_suburi(self, base, test)


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_auth.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_clientcookie.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_clientcookie.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_clientcookie.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,1656 @@
+"""HTTP cookie handling for web clients, plus some other stuff.
+
+This module originally developed from my port of Gisle Aas' Perl module
+HTTP::Cookies, from the libwww-perl library.
+
+Docstrings, comments and debug strings in this code refer to the
+attributes of the HTTP cookie system as cookie-attributes, to distinguish
+them clearly from Python attributes.
+
+                        CookieJar____
+                        /     \      \
+            FileCookieJar      \      \
+             /    |   \         \      \
+ MozillaCookieJar | LWPCookieJar \      \
+                  |               |      \
+                  |   ---MSIEBase |       \
+                  |  /      |     |        \
+                  | /   MSIEDBCookieJar BSDDBCookieJar
+                  |/    
+               MSIECookieJar
+
+Comments to John J Lee <jjl at pobox.com>.
+
+
+Copyright 2002-2006 John J Lee <jjl at pobox.com>
+Copyright 1997-1999 Gisle Aas (original libwww-perl code)
+Copyright 2002-2003 Johnny Lee (original MSIE Perl code)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import sys, re, urlparse, string, copy, time, struct, urllib, types, logging
+try:
+    import threading
+    _threading = threading; del threading
+except ImportError:
+    import dummy_threading
+    _threading = dummy_threading; del dummy_threading
+import httplib  # only for the default HTTP port
+
+MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
+                         "instance initialised with one)")
+DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
+
+from _headersutil import split_header_words, parse_ns_headers
+from _util import startswith, endswith, isstringlike, getheaders
+
+debug = logging.getLogger("mechanize.cookies").debug
+
+
+def reraise_unmasked_exceptions(unmasked=()):
+    # There are a few catch-all except: statements in this module, for
+    # catching input that's bad in unexpected ways.
+    # This function re-raises some exceptions we don't want to trap.
+    import mechanize, warnings
+    if not mechanize.USE_BARE_EXCEPT:
+        raise
+    unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError)
+    etype = sys.exc_info()[0]
+    if issubclass(etype, unmasked):
+        raise
+    # swallowed an exception
+    import traceback, StringIO
+    f = StringIO.StringIO()
+    traceback.print_exc(None, f)
+    msg = f.getvalue()
+    warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2)
+
+
+IPV4_RE = re.compile(r"\.\d+$")
+def is_HDN(text):
+    """Return True if text is a host domain name."""
+    # XXX
+    # This may well be wrong.  Which RFC is HDN defined in, if any (for
+    #  the purposes of RFC 2965)?
+    # For the current implementation, what about IPv6?  Remember to look
+    #  at other uses of IPV4_RE also, if change this.
+    return not (IPV4_RE.search(text) or
+                text == "" or
+                text[0] == "." or text[-1] == ".")
+
+def domain_match(A, B):
+    """Return True if domain A domain-matches domain B, according to RFC 2965.
+
+    A and B may be host domain names or IP addresses.
+
+    RFC 2965, section 1:
+
+    Host names can be specified either as an IP address or a HDN string.
+    Sometimes we compare one host name with another.  (Such comparisons SHALL
+    be case-insensitive.)  Host A's name domain-matches host B's if
+
+         *  their host name strings string-compare equal; or
+
+         * A is a HDN string and has the form NB, where N is a non-empty
+            name string, B has the form .B', and B' is a HDN string.  (So,
+            x.y.com domain-matches .Y.com but not Y.com.)
+
+    Note that domain-match is not a commutative operation: a.b.c.com
+    domain-matches .c.com, but not the reverse.
+
+    """
+    # Note that, if A or B are IP addresses, the only relevant part of the
+    # definition of the domain-match algorithm is the direct string-compare.
+    A = string.lower(A)
+    B = string.lower(B)
+    if A == B:
+        return True
+    if not is_HDN(A):
+        return False
+    i = string.rfind(A, B)
+    has_form_nb = not (i == -1 or i == 0)
+    return (
+        has_form_nb and
+        startswith(B, ".") and
+        is_HDN(B[1:])
+        )
+
+def liberal_is_HDN(text):
+    """Return True if text is a sort-of-like a host domain name.
+
+    For accepting/blocking domains.
+
+    """
+    return not IPV4_RE.search(text)
+
+def user_domain_match(A, B):
+    """For blocking/accepting domains.
+
+    A and B may be host domain names or IP addresses.
+
+    """
+    A = string.lower(A)
+    B = string.lower(B)
+    if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
+        if A == B:
+            # equal IP addresses
+            return True
+        return False
+    initial_dot = startswith(B, ".")
+    if initial_dot and endswith(A, B):
+        return True
+    if not initial_dot and A == B:
+        return True
+    return False
+
+cut_port_re = re.compile(r":\d+$")
+def request_host(request):
+    """Return request-host, as defined by RFC 2965.
+
+    Variation from RFC: returned value is lowercased, for convenient
+    comparison.
+
+    """
+    url = request.get_full_url()
+    host = urlparse.urlparse(url)[1]
+    if host == "":
+        host = request.get_header("Host", "")
+
+    # remove port, if present
+    host = cut_port_re.sub("", host, 1)
+    return string.lower(host)
+
+def eff_request_host(request):
+    """Return a tuple (request-host, effective request-host name).
+
+    As defined by RFC 2965, except both are lowercased.
+
+    """
+    erhn = req_host = request_host(request)
+    if string.find(req_host, ".") == -1 and not IPV4_RE.search(req_host):
+        erhn = req_host + ".local"
+    return req_host, erhn
+
+def request_path(request):
+    """request-URI, as defined by RFC 2965."""
+    url = request.get_full_url()
+    #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
+    #req_path = escape_path(string.join(urlparse.urlparse(url)[2:], ""))
+    path, parameters, query, frag = urlparse.urlparse(url)[2:]
+    if parameters:
+        path = "%s;%s" % (path, parameters)
+    path = escape_path(path)
+    req_path = urlparse.urlunparse(("", "", path, "", query, frag))
+    if not startswith(req_path, "/"):
+        # fix bad RFC 2396 absoluteURI
+        req_path = "/"+req_path
+    return req_path
+
+def request_port(request):
+    host = request.get_host()
+    i = string.find(host, ':')
+    if i >= 0:
+        port = host[i+1:]
+        try:
+            int(port)
+        except ValueError:
+            debug("nonnumeric port: '%s'", port)
+            return None
+    else:
+        port = DEFAULT_HTTP_PORT
+    return port
+
+# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
+# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
+HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
+ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
+def uppercase_escaped_char(match):
+    return "%%%s" % string.upper(match.group(1))
+def escape_path(path):
+    """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
+    # There's no knowing what character encoding was used to create URLs
+    # containing %-escapes, but since we have to pick one to escape invalid
+    # path characters, we pick UTF-8, as recommended in the HTML 4.0
+    # specification:
+    # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
+    # And here, kind of: draft-fielding-uri-rfc2396bis-03
+    # (And in draft IRI specification: draft-duerst-iri-05)
+    # (And here, for new URI schemes: RFC 2718)
+    if isinstance(path, types.UnicodeType):
+        path = path.encode("utf-8")
+    path = urllib.quote(path, HTTP_PATH_SAFE)
+    path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
+    return path
+
+def reach(h):
+    """Return reach of host h, as defined by RFC 2965, section 1.
+
+    The reach R of a host name H is defined as follows:
+
+       *  If
+
+          -  H is the host domain name of a host; and,
+
+          -  H has the form A.B; and
+
+          -  A has no embedded (that is, interior) dots; and
+
+          -  B has at least one embedded dot, or B is the string "local".
+             then the reach of H is .B.
+
+       *  Otherwise, the reach of H is H.
+
+    >>> reach("www.acme.com")
+    '.acme.com'
+    >>> reach("acme.com")
+    'acme.com'
+    >>> reach("acme.local")
+    '.local'
+
+    """
+    i = string.find(h, ".")
+    if i >= 0:
+        #a = h[:i]  # this line is only here to show what a is
+        b = h[i+1:]
+        i = string.find(b, ".")
+        if is_HDN(h) and (i >= 0 or b == "local"):
+            return "."+b
+    return h
+
+def is_third_party(request):
+    """
+
+    RFC 2965, section 3.3.6:
+
+        An unverifiable transaction is to a third-party host if its request-
+        host U does not domain-match the reach R of the request-host O in the
+        origin transaction.
+
+    """
+    req_host = request_host(request)
+    # the origin request's request-host was stuffed into request by
+    # _urllib2_support.AbstractHTTPHandler
+    return not domain_match(req_host, reach(request.origin_req_host))
+
+
+class Cookie:
+    """HTTP Cookie.
+
+    This class represents both Netscape and RFC 2965 cookies.
+
+    This is deliberately a very simple class.  It just holds attributes.  It's
+    possible to construct Cookie instances that don't comply with the cookie
+    standards.  CookieJar.make_cookies is the factory function for Cookie
+    objects -- it deals with cookie parsing, supplying defaults, and
+    normalising to the representation used in this class.  CookiePolicy is
+    responsible for checking them to see whether they should be accepted from
+    and returned to the server.
+
+    version: integer;
+    name: string;
+    value: string (may be None);
+    port: string; None indicates no attribute was supplied (eg. "Port", rather
+     than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list
+     string (eg. "80,8080")
+    port_specified: boolean; true if a value was supplied with the Port
+     cookie-attribute
+    domain: string;
+    domain_specified: boolean; true if Domain was explicitly set
+    domain_initial_dot: boolean; true if Domain as set in HTTP header by server
+     started with a dot (yes, this really is necessary!)
+    path: string;
+    path_specified: boolean; true if Path was explicitly set
+    secure:  boolean; true if should only be returned over secure connection
+    expires: integer; seconds since epoch (RFC 2965 cookies should calculate
+     this value from the Max-Age attribute)
+    discard: boolean, true if this is a session cookie; (if no expires value,
+     this should be true)
+    comment: string;
+    comment_url: string;
+    rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not
+     Set-Cookie2:) header, but had a version cookie-attribute of 1
+    rest: mapping of other cookie-attributes
+
+    Note that the port may be present in the headers, but unspecified ("Port"
+    rather than"Port=80", for example); if this is the case, port is None.
+
+    """
+
+    def __init__(self, version, name, value,
+                 port, port_specified,
+                 domain, domain_specified, domain_initial_dot,
+                 path, path_specified,
+                 secure,
+                 expires,
+                 discard,
+                 comment,
+                 comment_url,
+                 rest,
+                 rfc2109=False,
+                 ):
+
+        if version is not None: version = int(version)
+        if expires is not None: expires = int(expires)
+        if port is None and port_specified is True:
+            raise ValueError("if port is None, port_specified must be false")
+
+        self.version = version
+        self.name = name
+        self.value = value
+        self.port = port
+        self.port_specified = port_specified
+        # normalise case, as per RFC 2965 section 3.3.3
+        self.domain = string.lower(domain)
+        self.domain_specified = domain_specified
+        # Sigh.  We need to know whether the domain given in the
+        # cookie-attribute had an initial dot, in order to follow RFC 2965
+        # (as clarified in draft errata).  Needed for the returned $Domain
+        # value.
+        self.domain_initial_dot = domain_initial_dot
+        self.path = path
+        self.path_specified = path_specified
+        self.secure = secure
+        self.expires = expires
+        self.discard = discard
+        self.comment = comment
+        self.comment_url = comment_url
+        self.rfc2109 = rfc2109
+
+        self._rest = copy.copy(rest)
+
+    def has_nonstandard_attr(self, name):
+        return self._rest.has_key(name)
+    def get_nonstandard_attr(self, name, default=None):
+        return self._rest.get(name, default)
+    def set_nonstandard_attr(self, name, value):
+        self._rest[name] = value
+    def nonstandard_attr_keys(self):
+        return self._rest.keys()
+
+    def is_expired(self, now=None):
+        if now is None: now = time.time()
+        return (self.expires is not None) and (self.expires <= now)
+
+    def __str__(self):
+        if self.port is None: p = ""
+        else: p = ":"+self.port
+        limit = self.domain + p + self.path
+        if self.value is not None:
+            namevalue = "%s=%s" % (self.name, self.value)
+        else:
+            namevalue = self.name
+        return "<Cookie %s for %s>" % (namevalue, limit)
+
+    def __repr__(self):
+        args = []
+        for name in ["version", "name", "value",
+                     "port", "port_specified",
+                     "domain", "domain_specified", "domain_initial_dot",
+                     "path", "path_specified",
+                     "secure", "expires", "discard", "comment", "comment_url",
+                     ]:
+            attr = getattr(self, name)
+            args.append("%s=%s" % (name, repr(attr)))
+        args.append("rest=%s" % repr(self._rest))
+        args.append("rfc2109=%s" % repr(self.rfc2109))
+        return "Cookie(%s)" % string.join(args, ", ")
+
+
+class CookiePolicy:
+    """Defines which cookies get accepted from and returned to server.
+
+    May also modify cookies.
+
+    The subclass DefaultCookiePolicy defines the standard rules for Netscape
+    and RFC 2965 cookies -- override that if you want a customised policy.
+
+    As well as implementing set_ok and return_ok, implementations of this
+    interface must also supply the following attributes, indicating which
+    protocols should be used, and how.  These can be read and set at any time,
+    though whether that makes complete sense from the protocol point of view is
+    doubtful.
+
+    Public attributes:
+
+    netscape: implement netscape protocol
+    rfc2965: implement RFC 2965 protocol
+    rfc2109_as_netscape:
+       WARNING: This argument will change or go away if is not accepted into
+                the Python standard library in this form!
+     If true, treat RFC 2109 cookies as though they were Netscape cookies.  The
+     default is for this attribute to be None, which means treat 2109 cookies
+     as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is,
+     by default), and as Netscape cookies otherwise.
+    hide_cookie2: don't add Cookie2 header to requests (the presence of
+     this header indicates to the server that we understand RFC 2965
+     cookies)
+
+    """
+    def set_ok(self, cookie, request):
+        """Return true if (and only if) cookie should be accepted from server.
+
+        Currently, pre-expired cookies never get this far -- the CookieJar
+        class deletes such cookies itself.
+
+        cookie: mechanize.Cookie object
+        request: object implementing the interface defined by
+         CookieJar.extract_cookies.__doc__
+
+        """
+        raise NotImplementedError()
+
+    def return_ok(self, cookie, request):
+        """Return true if (and only if) cookie should be returned to server.
+
+        cookie: mechanize.Cookie object
+        request: object implementing the interface defined by
+         CookieJar.add_cookie_header.__doc__
+
+        """
+        raise NotImplementedError()
+
+    def domain_return_ok(self, domain, request):
+        """Return false if cookies should not be returned, given cookie domain.
+
+        This is here as an optimization, to remove the need for checking every
+        cookie with a particular domain (which may involve reading many files).
+        The default implementations of domain_return_ok and path_return_ok
+        (return True) leave all the work to return_ok.
+
+        If domain_return_ok returns true for the cookie domain, path_return_ok
+        is called for the cookie path.  Otherwise, path_return_ok and return_ok
+        are never called for that cookie domain.  If path_return_ok returns
+        true, return_ok is called with the Cookie object itself for a full
+        check.  Otherwise, return_ok is never called for that cookie path.
+
+        Note that domain_return_ok is called for every *cookie* domain, not
+        just for the *request* domain.  For example, the function might be
+        called with both ".acme.com" and "www.acme.com" if the request domain is
+        "www.acme.com".  The same goes for path_return_ok.
+
+        For argument documentation, see the docstring for return_ok.
+
+        """
+        return True
+
+    def path_return_ok(self, path, request):
+        """Return false if cookies should not be returned, given cookie path.
+
+        See the docstring for domain_return_ok.
+
+        """
+        return True
+
+
+class DefaultCookiePolicy(CookiePolicy):
+    """Implements the standard rules for accepting and returning cookies.
+
+    Both RFC 2965 and Netscape cookies are covered.  RFC 2965 handling is
+    switched off by default.
+
+    The easiest way to provide your own policy is to override this class and
+    call its methods in your overriden implementations before adding your own
+    additional checks.
+
+    import mechanize
+    class MyCookiePolicy(mechanize.DefaultCookiePolicy):
+        def set_ok(self, cookie, request):
+            if not mechanize.DefaultCookiePolicy.set_ok(
+                self, cookie, request):
+                return False
+            if i_dont_want_to_store_this_cookie():
+                return False
+            return True
+
+    In addition to the features required to implement the CookiePolicy
+    interface, this class allows you to block and allow domains from setting
+    and receiving cookies.  There are also some strictness switches that allow
+    you to tighten up the rather loose Netscape protocol rules a little bit (at
+    the cost of blocking some benign cookies).
+
+    A domain blacklist and whitelist is provided (both off by default).  Only
+    domains not in the blacklist and present in the whitelist (if the whitelist
+    is active) participate in cookie setting and returning.  Use the
+    blocked_domains constructor argument, and blocked_domains and
+    set_blocked_domains methods (and the corresponding argument and methods for
+    allowed_domains).  If you set a whitelist, you can turn it off again by
+    setting it to None.
+
+    Domains in block or allow lists that do not start with a dot must
+    string-compare equal.  For example, "acme.com" matches a blacklist entry of
+    "acme.com", but "www.acme.com" does not.  Domains that do start with a dot
+    are matched by more specific domains too.  For example, both "www.acme.com"
+    and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does
+    not).  IP addresses are an exception, and must match exactly.  For example,
+    if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is
+    blocked, but 193.168.1.2 is not.
+
+    Additional Public Attributes:
+
+    General strictness switches
+
+    strict_domain: don't allow sites to set two-component domains with
+     country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc.
+     This is far from perfect and isn't guaranteed to work!
+
+    RFC 2965 protocol strictness switches
+
+    strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable
+     transactions (usually, an unverifiable transaction is one resulting from
+     a redirect or an image hosted on another site); if this is false, cookies
+     are NEVER blocked on the basis of verifiability
+
+    Netscape protocol strictness switches
+
+    strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions
+     even to Netscape cookies
+    strict_ns_domain: flags indicating how strict to be with domain-matching
+     rules for Netscape cookies:
+      DomainStrictNoDots: when setting cookies, host prefix must not contain a
+       dot (eg. www.foo.bar.com can't set a cookie for .bar.com, because
+       www.foo contains a dot)
+      DomainStrictNonDomain: cookies that did not explicitly specify a Domain
+       cookie-attribute can only be returned to a domain that string-compares
+       equal to the domain that set the cookie (eg. rockets.acme.com won't
+       be returned cookies from acme.com that had no Domain cookie-attribute)
+      DomainRFC2965Match: when setting cookies, require a full RFC 2965
+       domain-match
+      DomainLiberal and DomainStrict are the most useful combinations of the
+       above flags, for convenience
+    strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that
+     have names starting with '$'
+    strict_ns_set_path: don't allow setting cookies whose path doesn't
+     path-match request URI
+
+    """
+
+    DomainStrictNoDots = 1
+    DomainStrictNonDomain = 2
+    DomainRFC2965Match = 4
+
+    DomainLiberal = 0
+    DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
+
+    def __init__(self,
+                 blocked_domains=None, allowed_domains=None,
+                 netscape=True, rfc2965=False,
+                 # WARNING: this argument will change or go away if is not
+                 # accepted into the Python standard library in this form!
+                 # default, ie. treat 2109 as netscape iff not rfc2965
+                 rfc2109_as_netscape=None,
+                 hide_cookie2=False,
+                 strict_domain=False,
+                 strict_rfc2965_unverifiable=True,
+                 strict_ns_unverifiable=False,
+                 strict_ns_domain=DomainLiberal,
+                 strict_ns_set_initial_dollar=False,
+                 strict_ns_set_path=False,
+                 ):
+        """
+        Constructor arguments should be used as keyword arguments only.
+
+        blocked_domains: sequence of domain names that we never accept cookies
+         from, nor return cookies to
+        allowed_domains: if not None, this is a sequence of the only domains
+         for which we accept and return cookies
+
+        For other arguments, see CookiePolicy.__doc__ and
+        DefaultCookiePolicy.__doc__..
+
+        """
+        self.netscape = netscape
+        self.rfc2965 = rfc2965
+        self.rfc2109_as_netscape = rfc2109_as_netscape
+        self.hide_cookie2 = hide_cookie2
+        self.strict_domain = strict_domain
+        self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
+        self.strict_ns_unverifiable = strict_ns_unverifiable
+        self.strict_ns_domain = strict_ns_domain
+        self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
+        self.strict_ns_set_path = strict_ns_set_path
+
+        if blocked_domains is not None:
+            self._blocked_domains = tuple(blocked_domains)
+        else:
+            self._blocked_domains = ()
+
+        if allowed_domains is not None:
+            allowed_domains = tuple(allowed_domains)
+        self._allowed_domains = allowed_domains
+
+    def blocked_domains(self):
+        """Return the sequence of blocked domains (as a tuple)."""
+        return self._blocked_domains
+    def set_blocked_domains(self, blocked_domains):
+        """Set the sequence of blocked domains."""
+        self._blocked_domains = tuple(blocked_domains)
+
+    def is_blocked(self, domain):
+        for blocked_domain in self._blocked_domains:
+            if user_domain_match(domain, blocked_domain):
+                return True
+        return False
+
+    def allowed_domains(self):
+        """Return None, or the sequence of allowed domains (as a tuple)."""
+        return self._allowed_domains
+    def set_allowed_domains(self, allowed_domains):
+        """Set the sequence of allowed domains, or None."""
+        if allowed_domains is not None:
+            allowed_domains = tuple(allowed_domains)
+        self._allowed_domains = allowed_domains
+
+    def is_not_allowed(self, domain):
+        if self._allowed_domains is None:
+            return False
+        for allowed_domain in self._allowed_domains:
+            if user_domain_match(domain, allowed_domain):
+                return False
+        return True
+
+    def set_ok(self, cookie, request):
+        """
+        If you override set_ok, be sure to call this method.  If it returns
+        false, so should your subclass (assuming your subclass wants to be more
+        strict about which cookies to accept).
+
+        """
+        debug(" - checking cookie %s", cookie)
+
+        assert cookie.name is not None
+
+        for n in "version", "verifiability", "name", "path", "domain", "port":
+            fn_name = "set_ok_"+n
+            fn = getattr(self, fn_name)
+            if not fn(cookie, request):
+                return False
+
+        return True
+
+    def set_ok_version(self, cookie, request):
+        if cookie.version is None:
+            # Version is always set to 0 by parse_ns_headers if it's a Netscape
+            # cookie, so this must be an invalid RFC 2965 cookie.
+            debug("   Set-Cookie2 without version attribute (%s)", cookie)
+            return False
+        if cookie.version > 0 and not self.rfc2965:
+            debug("   RFC 2965 cookies are switched off")
+            return False
+        elif cookie.version == 0 and not self.netscape:
+            debug("   Netscape cookies are switched off")
+            return False
+        return True
+
+    def set_ok_verifiability(self, cookie, request):
+        if request.unverifiable and is_third_party(request):
+            if cookie.version > 0 and self.strict_rfc2965_unverifiable:
+                debug("   third-party RFC 2965 cookie during "
+                             "unverifiable transaction")
+                return False
+            elif cookie.version == 0 and self.strict_ns_unverifiable:
+                debug("   third-party Netscape cookie during "
+                             "unverifiable transaction")
+                return False
+        return True
+
+    def set_ok_name(self, cookie, request):
+        # Try and stop servers setting V0 cookies designed to hack other
+        # servers that know both V0 and V1 protocols.
+        if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
+            startswith(cookie.name, "$")):
+            debug("   illegal name (starts with '$'): '%s'", cookie.name)
+            return False
+        return True
+
+    def set_ok_path(self, cookie, request):
+        if cookie.path_specified:
+            req_path = request_path(request)
+            if ((cookie.version > 0 or
+                 (cookie.version == 0 and self.strict_ns_set_path)) and
+                not startswith(req_path, cookie.path)):
+                debug("   path attribute %s is not a prefix of request "
+                      "path %s", cookie.path, req_path)
+                return False
+        return True
+
+    def set_ok_countrycode_domain(self, cookie, request):
+        """Return False if explicit cookie domain is not acceptable.
+
+        Called by set_ok_domain, for convenience of overriding by
+        subclasses.
+
+        """
+        if cookie.domain_specified and self.strict_domain:
+            domain = cookie.domain
+            # since domain was specified, we know that:
+            assert domain.startswith(".")
+            if string.count(domain, ".") == 2:
+                # domain like .foo.bar
+                i = string.rfind(domain, ".")
+                tld = domain[i+1:]
+                sld = domain[1:i]
+                if (string.lower(sld) in [
+                    "co", "ac",
+                    "com", "edu", "org", "net", "gov", "mil", "int",
+                    "aero", "biz", "cat", "coop", "info", "jobs", "mobi",
+                    "museum", "name", "pro", "travel",
+                    ] and
+                    len(tld) == 2):
+                    # domain like .co.uk
+                    return False
+        return True
+
+    def set_ok_domain(self, cookie, request):
+        if self.is_blocked(cookie.domain):
+            debug("   domain %s is in user block-list", cookie.domain)
+            return False
+        if self.is_not_allowed(cookie.domain):
+            debug("   domain %s is not in user allow-list", cookie.domain)
+            return False
+        if not self.set_ok_countrycode_domain(cookie, request):
+            debug("   country-code second level domain %s", cookie.domain)
+            return False
+        if cookie.domain_specified:
+            req_host, erhn = eff_request_host(request)
+            domain = cookie.domain
+            if startswith(domain, "."):
+                undotted_domain = domain[1:]
+            else:
+                undotted_domain = domain
+            embedded_dots = (string.find(undotted_domain, ".") >= 0)
+            if not embedded_dots and domain != ".local":
+                debug("   non-local domain %s contains no embedded dot",
+                      domain)
+                return False
+            if cookie.version == 0:
+                if (not endswith(erhn, domain) and
+                    (not startswith(erhn, ".") and
+                     not endswith("."+erhn, domain))):
+                    debug("   effective request-host %s (even with added "
+                          "initial dot) does not end end with %s",
+                          erhn, domain)
+                    return False
+            if (cookie.version > 0 or
+                (self.strict_ns_domain & self.DomainRFC2965Match)):
+                if not domain_match(erhn, domain):
+                    debug("   effective request-host %s does not domain-match "
+                          "%s", erhn, domain)
+                    return False
+            if (cookie.version > 0 or
+                (self.strict_ns_domain & self.DomainStrictNoDots)):
+                host_prefix = req_host[:-len(domain)]
+                if (string.find(host_prefix, ".") >= 0 and
+                    not IPV4_RE.search(req_host)):
+                    debug("   host prefix %s for domain %s contains a dot",
+                          host_prefix, domain)
+                    return False
+        return True
+
+    def set_ok_port(self, cookie, request):
+        if cookie.port_specified:
+            req_port = request_port(request)
+            if req_port is None:
+                req_port = "80"
+            else:
+                req_port = str(req_port)
+            for p in string.split(cookie.port, ","):
+                try:
+                    int(p)
+                except ValueError:
+                    debug("   bad port %s (not numeric)", p)
+                    return False
+                if p == req_port:
+                    break
+            else:
+                debug("   request port (%s) not found in %s",
+                      req_port, cookie.port)
+                return False
+        return True
+
+    def return_ok(self, cookie, request):
+        """
+        If you override return_ok, be sure to call this method.  If it returns
+        false, so should your subclass (assuming your subclass wants to be more
+        strict about which cookies to return).
+
+        """
+        # Path has already been checked by path_return_ok, and domain blocking
+        # done by domain_return_ok.
+        debug(" - checking cookie %s", cookie)
+
+        for n in "version", "verifiability", "secure", "expires", "port", "domain":
+            fn_name = "return_ok_"+n
+            fn = getattr(self, fn_name)
+            if not fn(cookie, request):
+                return False
+        return True
+
+    def return_ok_version(self, cookie, request):
+        if cookie.version > 0 and not self.rfc2965:
+            debug("   RFC 2965 cookies are switched off")
+            return False
+        elif cookie.version == 0 and not self.netscape:
+            debug("   Netscape cookies are switched off")
+            return False
+        return True
+
+    def return_ok_verifiability(self, cookie, request):
+        if request.unverifiable and is_third_party(request):
+            if cookie.version > 0 and self.strict_rfc2965_unverifiable:
+                debug("   third-party RFC 2965 cookie during unverifiable "
+                      "transaction")
+                return False
+            elif cookie.version == 0 and self.strict_ns_unverifiable:
+                debug("   third-party Netscape cookie during unverifiable "
+                      "transaction")
+                return False
+        return True
+
+    def return_ok_secure(self, cookie, request):
+        if cookie.secure and request.get_type() != "https":
+            debug("   secure cookie with non-secure request")
+            return False
+        return True
+
+    def return_ok_expires(self, cookie, request):
+        if cookie.is_expired(self._now):
+            debug("   cookie expired")
+            return False
+        return True
+
+    def return_ok_port(self, cookie, request):
+        if cookie.port:
+            req_port = request_port(request)
+            if req_port is None:
+                req_port = "80"
+            for p in string.split(cookie.port, ","):
+                if p == req_port:
+                    break
+            else:
+                debug("   request port %s does not match cookie port %s",
+                      req_port, cookie.port)
+                return False
+        return True
+
+    def return_ok_domain(self, cookie, request):
+        req_host, erhn = eff_request_host(request)
+        domain = cookie.domain
+
+        # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
+        if (cookie.version == 0 and
+            (self.strict_ns_domain & self.DomainStrictNonDomain) and
+            not cookie.domain_specified and domain != erhn):
+            debug("   cookie with unspecified domain does not string-compare "
+                  "equal to request domain")
+            return False
+
+        if cookie.version > 0 and not domain_match(erhn, domain):
+            debug("   effective request-host name %s does not domain-match "
+                  "RFC 2965 cookie domain %s", erhn, domain)
+            return False
+        if cookie.version == 0 and not endswith("."+erhn, domain):
+            debug("   request-host %s does not match Netscape cookie domain "
+                  "%s", req_host, domain)
+            return False
+        return True
+
+    def domain_return_ok(self, domain, request):
+        # Liberal check of domain.  This is here as an optimization to avoid
+        # having to load lots of MSIE cookie files unless necessary.
+
+        # Munge req_host and erhn to always start with a dot, so as to err on
+        # the side of letting cookies through.
+        dotted_req_host, dotted_erhn = eff_request_host(request)
+        if not startswith(dotted_req_host, "."):
+            dotted_req_host = "."+dotted_req_host
+        if not startswith(dotted_erhn, "."):
+            dotted_erhn = "."+dotted_erhn
+        if not (endswith(dotted_req_host, domain) or
+                endswith(dotted_erhn, domain)):
+            #debug("   request domain %s does not match cookie domain %s",
+            #      req_host, domain)
+            return False
+
+        if self.is_blocked(domain):
+            debug("   domain %s is in user block-list", domain)
+            return False
+        if self.is_not_allowed(domain):
+            debug("   domain %s is not in user allow-list", domain)
+            return False
+
+        return True
+
+    def path_return_ok(self, path, request):
+        debug("- checking cookie path=%s", path)
+        req_path = request_path(request)
+        if not startswith(req_path, path):
+            debug("  %s does not path-match %s", req_path, path)
+            return False
+        return True
+
+
+def vals_sorted_by_key(adict):
+    keys = adict.keys()
+    keys.sort()
+    return map(adict.get, keys)
+
+class MappingIterator:
+    """Iterates over nested mapping, depth-first, in sorted order by key."""
+    def __init__(self, mapping):
+        self._s = [(vals_sorted_by_key(mapping), 0, None)]  # LIFO stack
+
+    def __iter__(self): return self
+
+    def next(self):
+        # this is hairy because of lack of generators
+        while 1:
+            try:
+                vals, i, prev_item = self._s.pop()
+            except IndexError:
+                raise StopIteration()
+            if i < len(vals):
+                item = vals[i]
+                i = i + 1
+                self._s.append((vals, i, prev_item))
+                try:
+                    item.items
+                except AttributeError:
+                    # non-mapping
+                    break
+                else:
+                    # mapping
+                    self._s.append((vals_sorted_by_key(item), 0, item))
+                    continue
+        return item
+
+
+# Used as second parameter to dict.get method, to distinguish absent
+# dict key from one with a None value.
+class Absent: pass
+
+class CookieJar:
+    """Collection of HTTP cookies.
+
+    You may not need to know about this class: try mechanize.urlopen().
+
+    The major methods are extract_cookies and add_cookie_header; these are all
+    you are likely to need.
+
+    CookieJar supports the iterator protocol:
+
+    for cookie in cookiejar:
+        # do something with cookie
+
+    Methods:
+
+    add_cookie_header(request)
+    extract_cookies(response, request)
+    make_cookies(response, request)
+    set_cookie_if_ok(cookie, request)
+    set_cookie(cookie)
+    clear_session_cookies()
+    clear_expired_cookies()
+    clear(domain=None, path=None, name=None)
+
+    Public attributes
+
+    policy: CookiePolicy object
+
+    """
+
+    non_word_re = re.compile(r"\W")
+    quote_re = re.compile(r"([\"\\])")
+    strict_domain_re = re.compile(r"\.?[^.]*")
+    domain_re = re.compile(r"[^.]*")
+    dots_re = re.compile(r"^\.+")
+
+    def __init__(self, policy=None):
+        """
+        See CookieJar.__doc__ for argument documentation.
+
+        """
+        if policy is None:
+            policy = DefaultCookiePolicy()
+        self._policy = policy
+
+        self._cookies = {}
+
+        # for __getitem__ iteration in pre-2.2 Pythons
+        self._prev_getitem_index = 0
+
+    def set_policy(self, policy):
+        self._policy = policy
+
+    def _cookies_for_domain(self, domain, request):
+        cookies = []
+        if not self._policy.domain_return_ok(domain, request):
+            return []
+        debug("Checking %s for cookies to return", domain)
+        cookies_by_path = self._cookies[domain]
+        for path in cookies_by_path.keys():
+            if not self._policy.path_return_ok(path, request):
+                continue
+            cookies_by_name = cookies_by_path[path]
+            for cookie in cookies_by_name.values():
+                if not self._policy.return_ok(cookie, request):
+                    debug("   not returning cookie")
+                    continue
+                debug("   it's a match")
+                cookies.append(cookie)
+        return cookies
+
+    def _cookies_for_request(self, request):
+        """Return a list of cookies to be returned to server."""
+        cookies = []
+        for domain in self._cookies.keys():
+            cookies.extend(self._cookies_for_domain(domain, request))
+        return cookies
+
+    def _cookie_attrs(self, cookies):
+        """Return a list of cookie-attributes to be returned to server.
+
+        like ['foo="bar"; $Path="/"', ...]
+
+        The $Version attribute is also added when appropriate (currently only
+        once per request).
+
+        """
+        # add cookies in order of most specific (ie. longest) path first
+        def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
+        cookies.sort(decreasing_size)
+
+        version_set = False
+
+        attrs = []
+        for cookie in cookies:
+            # set version of Cookie header
+            # XXX
+            # What should it be if multiple matching Set-Cookie headers have
+            #  different versions themselves?
+            # Answer: there is no answer; was supposed to be settled by
+            #  RFC 2965 errata, but that may never appear...
+            version = cookie.version
+            if not version_set:
+                version_set = True
+                if version > 0:
+                    attrs.append("$Version=%s" % version)
+
+            # quote cookie value if necessary
+            # (not for Netscape protocol, which already has any quotes
+            #  intact, due to the poorly-specified Netscape Cookie: syntax)
+            if ((cookie.value is not None) and
+                self.non_word_re.search(cookie.value) and version > 0):
+                value = self.quote_re.sub(r"\\\1", cookie.value)
+            else:
+                value = cookie.value
+
+            # add cookie-attributes to be returned in Cookie header
+            if cookie.value is None:
+                attrs.append(cookie.name)
+            else:
+                attrs.append("%s=%s" % (cookie.name, value))
+            if version > 0:
+                if cookie.path_specified:
+                    attrs.append('$Path="%s"' % cookie.path)
+                if startswith(cookie.domain, "."):
+                    domain = cookie.domain
+                    if (not cookie.domain_initial_dot and
+                        startswith(domain, ".")):
+                        domain = domain[1:]
+                    attrs.append('$Domain="%s"' % domain)
+                if cookie.port is not None:
+                    p = "$Port"
+                    if cookie.port_specified:
+                        p = p + ('="%s"' % cookie.port)
+                    attrs.append(p)
+
+        return attrs
+
+    def add_cookie_header(self, request):
+        """Add correct Cookie: header to request (urllib2.Request object).
+
+        The Cookie2 header is also added unless policy.hide_cookie2 is true.
+
+        The request object (usually a urllib2.Request instance) must support
+        the methods get_full_url, get_host, get_type, has_header, get_header,
+        header_items and add_unredirected_header, as documented by urllib2, and
+        the port attribute (the port number).  Actually,
+        RequestUpgradeProcessor will automatically upgrade your Request object
+        to one with has_header, get_header, header_items and
+        add_unredirected_header, if it lacks those methods, for compatibility
+        with pre-2.4 versions of urllib2.
+
+        """
+        debug("add_cookie_header")
+        self._policy._now = self._now = int(time.time())
+
+        req_host, erhn = eff_request_host(request)
+        strict_non_domain = (
+            self._policy.strict_ns_domain & self._policy.DomainStrictNonDomain)
+
+        cookies = self._cookies_for_request(request)
+
+        attrs = self._cookie_attrs(cookies)
+        if attrs:
+            if not request.has_header("Cookie"):
+                request.add_unredirected_header(
+                    "Cookie", string.join(attrs, "; "))
+
+        # if necessary, advertise that we know RFC 2965
+        if self._policy.rfc2965 and not self._policy.hide_cookie2:
+            for cookie in cookies:
+                if cookie.version != 1 and not request.has_header("Cookie2"):
+                    request.add_unredirected_header("Cookie2", '$Version="1"')
+                    break
+
+        self.clear_expired_cookies()
+
+    def _normalized_cookie_tuples(self, attrs_set):
+        """Return list of tuples containing normalised cookie information.
+
+        attrs_set is the list of lists of key,value pairs extracted from
+        the Set-Cookie or Set-Cookie2 headers.
+
+        Tuples are name, value, standard, rest, where name and value are the
+        cookie name and value, standard is a dictionary containing the standard
+        cookie-attributes (discard, secure, version, expires or max-age,
+        domain, path and port) and rest is a dictionary containing the rest of
+        the cookie-attributes.
+
+        """
+        cookie_tuples = []
+
+        boolean_attrs = "discard", "secure"
+        value_attrs = ("version",
+                       "expires", "max-age",
+                       "domain", "path", "port",
+                       "comment", "commenturl")
+
+        for cookie_attrs in attrs_set:
+            name, value = cookie_attrs[0]
+
+            # Build dictionary of standard cookie-attributes (standard) and
+            # dictionary of other cookie-attributes (rest).
+
+            # Note: expiry time is normalised to seconds since epoch.  V0
+            # cookies should have the Expires cookie-attribute, and V1 cookies
+            # should have Max-Age, but since V1 includes RFC 2109 cookies (and
+            # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
+            # accept either (but prefer Max-Age).
+            max_age_set = False
+
+            bad_cookie = False
+
+            standard = {}
+            rest = {}
+            for k, v in cookie_attrs[1:]:
+                lc = string.lower(k)
+                # don't lose case distinction for unknown fields
+                if lc in value_attrs or lc in boolean_attrs:
+                    k = lc
+                if k in boolean_attrs and v is None:
+                    # boolean cookie-attribute is present, but has no value
+                    # (like "discard", rather than "port=80")
+                    v = True
+                if standard.has_key(k):
+                    # only first value is significant
+                    continue
+                if k == "domain":
+                    if v is None:
+                        debug("   missing value for domain attribute")
+                        bad_cookie = True
+                        break
+                    # RFC 2965 section 3.3.3
+                    v = string.lower(v)
+                if k == "expires":
+                    if max_age_set:
+                        # Prefer max-age to expires (like Mozilla)
+                        continue
+                    if v is None:
+                        debug("   missing or invalid value for expires "
+                              "attribute: treating as session cookie")
+                        continue
+                if k == "max-age":
+                    max_age_set = True
+                    try:
+                        v = int(v)
+                    except ValueError:
+                        debug("   missing or invalid (non-numeric) value for "
+                              "max-age attribute")
+                        bad_cookie = True
+                        break
+                    # convert RFC 2965 Max-Age to seconds since epoch
+                    # XXX Strictly you're supposed to follow RFC 2616
+                    #   age-calculation rules.  Remember that zero Max-Age is a
+                    #   is a request to discard (old and new) cookie, though.
+                    k = "expires"
+                    v = self._now + v
+                if (k in value_attrs) or (k in boolean_attrs):
+                    if (v is None and
+                        k not in ["port", "comment", "commenturl"]):
+                        debug("   missing value for %s attribute" % k)
+                        bad_cookie = True
+                        break
+                    standard[k] = v
+                else:
+                    rest[k] = v
+
+            if bad_cookie:
+                continue
+
+            cookie_tuples.append((name, value, standard, rest))
+
+        return cookie_tuples
+
+    def _cookie_from_cookie_tuple(self, tup, request):
+        # standard is dict of standard cookie-attributes, rest is dict of the
+        # rest of them
+        name, value, standard, rest = tup
+
+        domain = standard.get("domain", Absent)
+        path = standard.get("path", Absent)
+        port = standard.get("port", Absent)
+        expires = standard.get("expires", Absent)
+
+        # set the easy defaults
+        version = standard.get("version", None)
+        if version is not None: version = int(version)
+        secure = standard.get("secure", False)
+        # (discard is also set if expires is Absent)
+        discard = standard.get("discard", False)
+        comment = standard.get("comment", None)
+        comment_url = standard.get("commenturl", None)
+
+        # set default path
+        if path is not Absent and path != "":
+            path_specified = True
+            path = escape_path(path)
+        else:
+            path_specified = False
+            path = request_path(request)
+            i = string.rfind(path, "/")
+            if i != -1:
+                if version == 0:
+                    # Netscape spec parts company from reality here
+                    path = path[:i]
+                else:
+                    path = path[:i+1]
+            if len(path) == 0: path = "/"
+
+        # set default domain
+        domain_specified = domain is not Absent
+        # but first we have to remember whether it starts with a dot
+        domain_initial_dot = False
+        if domain_specified:
+            domain_initial_dot = bool(startswith(domain, "."))
+        if domain is Absent:
+            req_host, erhn = eff_request_host(request)
+            domain = erhn
+        elif not startswith(domain, "."):
+            domain = "."+domain
+
+        # set default port
+        port_specified = False
+        if port is not Absent:
+            if port is None:
+                # Port attr present, but has no value: default to request port.
+                # Cookie should then only be sent back on that port.
+                port = request_port(request)
+            else:
+                port_specified = True
+                port = re.sub(r"\s+", "", port)
+        else:
+            # No port attr present.  Cookie can be sent back on any port.
+            port = None
+
+        # set default expires and discard
+        if expires is Absent:
+            expires = None
+            discard = True
+        elif expires <= self._now:
+            # Expiry date in past is request to delete cookie.  This can't be
+            # in DefaultCookiePolicy, because can't delete cookies there.
+            try:
+                self.clear(domain, path, name)
+            except KeyError:
+                pass
+            debug("Expiring cookie, domain='%s', path='%s', name='%s'",
+                  domain, path, name)
+            return None
+
+        return Cookie(version,
+                      name, value,
+                      port, port_specified,
+                      domain, domain_specified, domain_initial_dot,
+                      path, path_specified,
+                      secure,
+                      expires,
+                      discard,
+                      comment,
+                      comment_url,
+                      rest)
+
+    def _cookies_from_attrs_set(self, attrs_set, request):
+        cookie_tuples = self._normalized_cookie_tuples(attrs_set)
+
+        cookies = []
+        for tup in cookie_tuples:
+            cookie = self._cookie_from_cookie_tuple(tup, request)
+            if cookie: cookies.append(cookie)
+        return cookies
+
+    def _process_rfc2109_cookies(self, cookies):
+        if self._policy.rfc2109_as_netscape is None:
+            rfc2109_as_netscape = not self._policy.rfc2965
+        else:
+            rfc2109_as_netscape = self._policy.rfc2109_as_netscape
+        for cookie in cookies:
+            if cookie.version == 1:
+                cookie.rfc2109 = True
+                if rfc2109_as_netscape: 
+                    # treat 2109 cookies as Netscape cookies rather than
+                    # as RFC2965 cookies
+                    cookie.version = 0
+
+    def make_cookies(self, response, request):
+        """Return sequence of Cookie objects extracted from response object.
+
+        See extract_cookies.__doc__ for the interfaces required of the
+        response and request arguments.
+
+        """
+        # get cookie-attributes for RFC 2965 and Netscape protocols
+        headers = response.info()
+        rfc2965_hdrs = getheaders(headers, "Set-Cookie2")
+        ns_hdrs = getheaders(headers, "Set-Cookie")
+
+        rfc2965 = self._policy.rfc2965
+        netscape = self._policy.netscape
+
+        if ((not rfc2965_hdrs and not ns_hdrs) or
+            (not ns_hdrs and not rfc2965) or
+            (not rfc2965_hdrs and not netscape) or
+            (not netscape and not rfc2965)):
+            return []  # no relevant cookie headers: quick exit
+
+        try:
+            cookies = self._cookies_from_attrs_set(
+                split_header_words(rfc2965_hdrs), request)
+        except:
+            reraise_unmasked_exceptions()
+            cookies = []
+
+        if ns_hdrs and netscape:
+            try:
+                # RFC 2109 and Netscape cookies
+                ns_cookies = self._cookies_from_attrs_set(
+                    parse_ns_headers(ns_hdrs), request)
+            except:
+                reraise_unmasked_exceptions()
+                ns_cookies = []
+            self._process_rfc2109_cookies(ns_cookies)
+
+            # Look for Netscape cookies (from Set-Cookie headers) that match
+            # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
+            # For each match, keep the RFC 2965 cookie and ignore the Netscape
+            # cookie (RFC 2965 section 9.1).  Actually, RFC 2109 cookies are
+            # bundled in with the Netscape cookies for this purpose, which is
+            # reasonable behaviour.
+            if rfc2965:
+                lookup = {}
+                for cookie in cookies:
+                    lookup[(cookie.domain, cookie.path, cookie.name)] = None
+
+                def no_matching_rfc2965(ns_cookie, lookup=lookup):
+                    key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
+                    return not lookup.has_key(key)
+                ns_cookies = filter(no_matching_rfc2965, ns_cookies)
+
+            if ns_cookies:
+                cookies.extend(ns_cookies)
+
+        return cookies
+
+    def set_cookie_if_ok(self, cookie, request):
+        """Set a cookie if policy says it's OK to do so.
+
+        cookie: mechanize.Cookie instance
+        request: see extract_cookies.__doc__ for the required interface
+
+        """
+        self._policy._now = self._now = int(time.time())
+
+        if self._policy.set_ok(cookie, request):
+            self.set_cookie(cookie)
+
+    def set_cookie(self, cookie):
+        """Set a cookie, without checking whether or not it should be set.
+
+        cookie: mechanize.Cookie instance
+        """
+        c = self._cookies
+        if not c.has_key(cookie.domain): c[cookie.domain] = {}
+        c2 = c[cookie.domain]
+        if not c2.has_key(cookie.path): c2[cookie.path] = {}
+        c3 = c2[cookie.path]
+        c3[cookie.name] = cookie
+
+    def extract_cookies(self, response, request):
+        """Extract cookies from response, where allowable given the request.
+
+        Look for allowable Set-Cookie: and Set-Cookie2: headers in the response
+        object passed as argument.  Any of these headers that are found are
+        used to update the state of the object (subject to the policy.set_ok
+        method's approval).
+
+        The response object (usually be the result of a call to
+        mechanize.urlopen, or similar) should support an info method, which
+        returns a mimetools.Message object (in fact, the 'mimetools.Message
+        object' may be any object that provides a getallmatchingheaders
+        method).
+
+        The request object (usually a urllib2.Request instance) must support
+        the methods get_full_url and get_host, as documented by urllib2, and
+        the port attribute (the port number).  The request is used to set
+        default values for cookie-attributes as well as for checking that the
+        cookie is OK to be set.
+
+        """
+        debug("extract_cookies: %s", response.info())
+        self._policy._now = self._now = int(time.time())
+
+        for cookie in self.make_cookies(response, request):
+            if self._policy.set_ok(cookie, request):
+                debug(" setting cookie: %s", cookie)
+                self.set_cookie(cookie)
+
+    def clear(self, domain=None, path=None, name=None):
+        """Clear some cookies.
+
+        Invoking this method without arguments will clear all cookies.  If
+        given a single argument, only cookies belonging to that domain will be
+        removed.  If given two arguments, cookies belonging to the specified
+        path within that domain are removed.  If given three arguments, then
+        the cookie with the specified name, path and domain is removed.
+
+        Raises KeyError if no matching cookie exists.
+
+        """
+        if name is not None:
+            if (domain is None) or (path is None):
+                raise ValueError(
+                    "domain and path must be given to remove a cookie by name")
+            del self._cookies[domain][path][name]
+        elif path is not None:
+            if domain is None:
+                raise ValueError(
+                    "domain must be given to remove cookies by path")
+            del self._cookies[domain][path]
+        elif domain is not None:
+            del self._cookies[domain]
+        else:
+            self._cookies = {}
+
+    def clear_session_cookies(self):
+        """Discard all session cookies.
+
+        Discards all cookies held by object which had either no Max-Age or
+        Expires cookie-attribute or an explicit Discard cookie-attribute, or
+        which otherwise have ended up with a true discard attribute.  For
+        interactive browsers, the end of a session usually corresponds to
+        closing the browser window.
+
+        Note that the save method won't save session cookies anyway, unless you
+        ask otherwise by passing a true ignore_discard argument.
+
+        """
+        for cookie in self:
+            if cookie.discard:
+                self.clear(cookie.domain, cookie.path, cookie.name)
+
+    def clear_expired_cookies(self):
+        """Discard all expired cookies.
+
+        You probably don't need to call this method: expired cookies are never
+        sent back to the server (provided you're using DefaultCookiePolicy),
+        this method is called by CookieJar itself every so often, and the save
+        method won't save expired cookies anyway (unless you ask otherwise by
+        passing a true ignore_expires argument).
+
+        """
+        now = time.time()
+        for cookie in self:
+            if cookie.is_expired(now):
+                self.clear(cookie.domain, cookie.path, cookie.name)
+
+    def __getitem__(self, i):
+        if i == 0:
+            self._getitem_iterator = self.__iter__()
+        elif self._prev_getitem_index != i-1: raise IndexError(
+            "CookieJar.__getitem__ only supports sequential iteration")
+        self._prev_getitem_index = i
+        try:
+            return self._getitem_iterator.next()
+        except StopIteration:
+            raise IndexError()
+
+    def __iter__(self):
+        return MappingIterator(self._cookies)
+
+    def __len__(self):
+        """Return number of contained cookies."""
+        i = 0
+        for cookie in self: i = i + 1
+        return i
+
+    def __repr__(self):
+        r = []
+        for cookie in self: r.append(repr(cookie))
+        return "<%s[%s]>" % (self.__class__, string.join(r, ", "))
+
+    def __str__(self):
+        r = []
+        for cookie in self: r.append(str(cookie))
+        return "<%s[%s]>" % (self.__class__, string.join(r, ", "))
+
+
+class LoadError(Exception): pass
+
+class FileCookieJar(CookieJar):
+    """CookieJar that can be loaded from and saved to a file.
+
+    Additional methods
+
+    save(filename=None, ignore_discard=False, ignore_expires=False)
+    load(filename=None, ignore_discard=False, ignore_expires=False)
+    revert(filename=None, ignore_discard=False, ignore_expires=False)
+
+    Additional public attributes
+
+    filename: filename for loading and saving cookies
+
+    Additional public readable attributes
+
+    delayload: request that cookies are lazily loaded from disk; this is only
+     a hint since this only affects performance, not behaviour (unless the
+     cookies on disk are changing); a CookieJar object may ignore it (in fact,
+     only MSIECookieJar lazily loads cookies at the moment)
+
+    """
+
+    def __init__(self, filename=None, delayload=False, policy=None):
+        """
+        See FileCookieJar.__doc__ for argument documentation.
+
+        Cookies are NOT loaded from the named file until either the load or
+        revert method is called.
+
+        """
+        CookieJar.__init__(self, policy)
+        if filename is not None and not isstringlike(filename):
+            raise ValueError("filename must be string-like")
+        self.filename = filename
+        self.delayload = bool(delayload)
+
+    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+        """Save cookies to a file.
+
+        filename: name of file in which to save cookies
+        ignore_discard: save even cookies set to be discarded
+        ignore_expires: save even cookies that have expired
+
+        The file is overwritten if it already exists, thus wiping all its
+        cookies.  Saved cookies can be restored later using the load or revert
+        methods.  If filename is not specified, self.filename is used; if
+        self.filename is None, ValueError is raised.
+
+        """
+        raise NotImplementedError()
+
+    def load(self, filename=None, ignore_discard=False, ignore_expires=False):
+        """Load cookies from a file.
+
+        Old cookies are kept unless overwritten by newly loaded ones.
+
+        Arguments are as for .save().
+
+        If filename is not specified, self.filename is used; if self.filename
+        is None, ValueError is raised.  The named file must be in the format
+        understood by the class, or LoadError will be raised.  This format will
+        be identical to that written by the save method, unless the load format
+        is not sufficiently well understood (as is the case for MSIECookieJar).
+
+        """
+        if filename is None:
+            if self.filename is not None: filename = self.filename
+            else: raise ValueError(MISSING_FILENAME_TEXT)
+
+        f = open(filename)
+        try:
+            self._really_load(f, filename, ignore_discard, ignore_expires)
+        finally:
+            f.close()
+
+    def revert(self, filename=None,
+               ignore_discard=False, ignore_expires=False):
+        """Clear all cookies and reload cookies from a saved file.
+
+        Raises LoadError (or IOError) if reversion is not successful; the
+        object's state will not be altered if this happens.
+
+        """
+        if filename is None:
+            if self.filename is not None: filename = self.filename
+            else: raise ValueError(MISSING_FILENAME_TEXT)
+
+        old_state = copy.deepcopy(self._cookies)
+        self._cookies = {}
+        try:
+            self.load(filename, ignore_discard, ignore_expires)
+        except (LoadError, IOError):
+            self._cookies = old_state
+            raise


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_clientcookie.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_gzip.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_gzip.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_gzip.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,103 @@
+import urllib2
+from cStringIO import StringIO
+import _util
+
+# GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library
+class GzipConsumer:
+
+    def __init__(self, consumer):
+        self.__consumer = consumer
+        self.__decoder = None
+        self.__data = ""
+
+    def __getattr__(self, key):
+        return getattr(self.__consumer, key)
+
+    def feed(self, data):
+        if self.__decoder is None:
+            # check if we have a full gzip header
+            data = self.__data + data
+            try:
+                i = 10
+                flag = ord(data[3])
+                if flag & 4: # extra
+                    x = ord(data[i]) + 256*ord(data[i+1])
+                    i = i + 2 + x
+                if flag & 8: # filename
+                    while ord(data[i]):
+                        i = i + 1
+                    i = i + 1
+                if flag & 16: # comment
+                    while ord(data[i]):
+                        i = i + 1
+                    i = i + 1
+                if flag & 2: # crc
+                    i = i + 2
+                if len(data) < i:
+                    raise IndexError("not enough data")
+                if data[:3] != "\x1f\x8b\x08":
+                    raise IOError("invalid gzip data")
+                data = data[i:]
+            except IndexError:
+                self.__data = data
+                return # need more data
+            import zlib
+            self.__data = ""
+            self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
+        data = self.__decoder.decompress(data)
+        if data:
+            self.__consumer.feed(data)
+
+    def close(self):
+        if self.__decoder:
+            data = self.__decoder.flush()
+            if data:
+                self.__consumer.feed(data)
+        self.__consumer.close()
+
+
+# --------------------------------------------------------------------
+
+# the rest of this module is John Lee's stupid code, not
+# Fredrik's nice code :-)
+
+class stupid_gzip_consumer:
+    def __init__(self): self.data = []
+    def feed(self, data): self.data.append(data)
+
+class stupid_gzip_wrapper(_util.closeable_response):
+    def __init__(self, response):
+        self._response = response
+
+        c = stupid_gzip_consumer()
+        gzc = GzipConsumer(c)
+        gzc.feed(response.read())
+        self.__data = StringIO("".join(c.data))
+
+    def read(self, size=-1):
+        return self.__data.read(size)
+    def readline(self, size=-1):
+        return self.__data.readline(size)
+    def readlines(self, sizehint=-1):
+        return self.__data.readlines(size)
+
+    def __getattr__(self, name):
+        # delegate unknown methods/attributes
+        return getattr(self._response, name)
+
+class HTTPGzipProcessor(urllib2.BaseHandler):
+    handler_order = 200  # response processing before HTTPEquivProcessor
+
+    def http_request(self, request):
+        request.add_header("Accept-Encoding", "gzip")
+        return request
+
+    def http_response(self, request, response):
+        # post-process response
+        enc_hdrs = response.info().getheaders("Content-encoding")
+        for enc_hdr in enc_hdrs:
+            if ("gzip" in enc_hdr) or ("compress" in enc_hdr):
+                return stupid_gzip_wrapper(response)
+        return response
+
+    https_response = http_response


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_gzip.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_headersutil.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_headersutil.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_headersutil.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,225 @@
+"""Utility functions for HTTP header value parsing and construction.
+
+Copyright 1997-1998, Gisle Aas
+Copyright 2002-2006, John J. Lee
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import os, re, string, urlparse
+from types import StringType
+from types import UnicodeType
+STRING_TYPES = StringType, UnicodeType
+
+from _util import startswith, endswith, http2time
+
+def is_html(ct_headers, url, allow_xhtml=False):
+    """
+    ct_headers: Sequence of Content-Type headers
+    url: Response URL
+
+    """
+    if not ct_headers:
+        # guess
+        ext = os.path.splitext(urlparse.urlparse(url)[2])[1]
+        html_exts = [".htm", ".html"]
+        if allow_xhtml:
+            html_exts += [".xhtml"]
+        return ext in html_exts
+    # use first header
+    ct = split_header_words(ct_headers)[0][0][0]
+    html_types = ["text/html"]
+    if allow_xhtml:
+        html_types += [
+            "text/xhtml", "text/xml",
+            "application/xml", "application/xhtml+xml",
+            ]
+    return ct in html_types
+
+def unmatched(match):
+    """Return unmatched part of re.Match object."""
+    start, end = match.span(0)
+    return match.string[:start]+match.string[end:]
+
+token_re =        re.compile(r"^\s*([^=\s;,]+)")
+quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
+value_re =        re.compile(r"^\s*=\s*([^\s;,]*)")
+escape_re = re.compile(r"\\(.)")
+def split_header_words(header_values):
+    r"""Parse header values into a list of lists containing key,value pairs.
+
+    The function knows how to deal with ",", ";" and "=" as well as quoted
+    values after "=".  A list of space separated tokens are parsed as if they
+    were separated by ";".
+
+    If the header_values passed as argument contains multiple values, then they
+    are treated as if they were a single value separated by comma ",".
+
+    This means that this function is useful for parsing header fields that
+    follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
+    the requirement for tokens).
+
+      headers           = #header
+      header            = (token | parameter) *( [";"] (token | parameter))
+
+      token             = 1*<any CHAR except CTLs or separators>
+      separators        = "(" | ")" | "<" | ">" | "@"
+                        | "," | ";" | ":" | "\" | <">
+                        | "/" | "[" | "]" | "?" | "="
+                        | "{" | "}" | SP | HT
+
+      quoted-string     = ( <"> *(qdtext | quoted-pair ) <"> )
+      qdtext            = <any TEXT except <">>
+      quoted-pair       = "\" CHAR
+
+      parameter         = attribute "=" value
+      attribute         = token
+      value             = token | quoted-string
+
+    Each header is represented by a list of key/value pairs.  The value for a
+    simple token (not part of a parameter) is None.  Syntactically incorrect
+    headers will not necessarily be parsed as you would want.
+
+    This is easier to describe with some examples:
+
+    >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
+    [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
+    >>> split_header_words(['text/html; charset="iso-8859-1"'])
+    [[('text/html', None), ('charset', 'iso-8859-1')]]
+    >>> split_header_words([r'Basic realm="\"foo\bar\""'])
+    [[('Basic', None), ('realm', '"foobar"')]]
+
+    """
+    assert type(header_values) not in STRING_TYPES
+    result = []
+    for text in header_values:
+        orig_text = text
+        pairs = []
+        while text:
+            m = token_re.search(text)
+            if m:
+                text = unmatched(m)
+                name = m.group(1)
+                m = quoted_value_re.search(text)
+                if m:  # quoted value
+                    text = unmatched(m)
+                    value = m.group(1)
+                    value = escape_re.sub(r"\1", value)
+                else:
+                    m = value_re.search(text)
+                    if m:  # unquoted value
+                        text = unmatched(m)
+                        value = m.group(1)
+                        value = string.rstrip(value)
+                    else:
+                        # no value, a lone token
+                        value = None
+                pairs.append((name, value))
+            elif startswith(string.lstrip(text), ","):
+                # concatenated headers, as per RFC 2616 section 4.2
+                text = string.lstrip(text)[1:]
+                if pairs: result.append(pairs)
+                pairs = []
+            else:
+                # skip junk
+                non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
+                assert nr_junk_chars > 0, (
+                    "split_header_words bug: '%s', '%s', %s" %
+                    (orig_text, text, pairs))
+                text = non_junk
+        if pairs: result.append(pairs)
+    return result
+
+join_escape_re = re.compile(r"([\"\\])")
+def join_header_words(lists):
+    """Do the inverse of the conversion done by split_header_words.
+
+    Takes a list of lists of (key, value) pairs and produces a single header
+    value.  Attribute values are quoted if needed.
+
+    >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
+    'text/plain; charset="iso-8859/1"'
+    >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
+    'text/plain, charset="iso-8859/1"'
+
+    """
+    headers = []
+    for pairs in lists:
+        attr = []
+        for k, v in pairs:
+            if v is not None:
+                if not re.search(r"^\w+$", v):
+                    v = join_escape_re.sub(r"\\\1", v)  # escape " and \
+                    v = '"%s"' % v
+                if k is None:  # Netscape cookies may have no name
+                    k = v
+                else:
+                    k = "%s=%s" % (k, v)
+            attr.append(k)
+        if attr: headers.append(string.join(attr, "; "))
+    return string.join(headers, ", ")
+
+def parse_ns_headers(ns_headers):
+    """Ad-hoc parser for Netscape protocol cookie-attributes.
+
+    The old Netscape cookie format for Set-Cookie can for instance contain
+    an unquoted "," in the expires field, so we have to use this ad-hoc
+    parser instead of split_header_words.
+
+    XXX This may not make the best possible effort to parse all the crap
+    that Netscape Cookie headers contain.  Ronald Tschalar's HTTPClient
+    parser is probably better, so could do worse than following that if
+    this ever gives any trouble.
+
+    Currently, this is also used for parsing RFC 2109 cookies.
+
+    """
+    known_attrs = ("expires", "domain", "path", "secure",
+                   # RFC 2109 attrs (may turn up in Netscape cookies, too)
+                   "port", "max-age")
+
+    result = []
+    for ns_header in ns_headers:
+        pairs = []
+        version_set = False
+        params = re.split(r";\s*", ns_header)
+        for ii in range(len(params)):
+            param = params[ii]
+            param = string.rstrip(param)
+            if param == "": continue
+            if "=" not in param:
+                k, v = param, None
+            else:
+                k, v = re.split(r"\s*=\s*", param, 1)
+                k = string.lstrip(k)
+            if ii != 0:
+                lc = string.lower(k)
+                if lc in known_attrs:
+                    k = lc
+                if k == "version":
+                    # This is an RFC 2109 cookie.
+                    version_set = True
+                if k == "expires":
+                    # convert expires date to seconds since epoch
+                    if startswith(v, '"'): v = v[1:]
+                    if endswith(v, '"'): v = v[:-1]
+                    v = http2time(v)  # None if invalid
+            pairs.append((k, v))
+
+        if pairs:
+            if not version_set:
+                pairs.append(("version", "0"))
+            result.append(pairs)
+
+    return result
+
+
+def _test():
+   import doctest, _headersutil
+   return doctest.testmod(_headersutil)
+
+if __name__ == "__main__":
+   _test()


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_headersutil.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_html.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_html.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_html.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,575 @@
+"""HTML handling.
+
+Copyright 2003-2006 John J. Lee <jjl at pobox.com>
+
+This code is free software; you can redistribute it and/or modify it under
+the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+import re, copy, urllib, htmlentitydefs
+from urlparse import urljoin
+
+import _request
+from _headersutil import split_header_words, is_html as _is_html
+
+## # XXXX miserable hack
+## def urljoin(base, url):
+##     if url.startswith("?"):
+##         return base+url
+##     else:
+##         return urlparse.urljoin(base, url)
+
+## def chr_range(a, b):
+##     return "".join(map(chr, range(ord(a), ord(b)+1)))
+
+## RESERVED_URL_CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+##                       "abcdefghijklmnopqrstuvwxyz"
+##                       "-_.~")
+## UNRESERVED_URL_CHARS = "!*'();:@&=+$,/?%#[]"
+# we want (RESERVED_URL_CHARS+UNRESERVED_URL_CHARS), minus those
+# 'safe'-by-default characters that urllib.urlquote never quotes
+URLQUOTE_SAFE_URL_CHARS = "!*'();:@&=+$,/?%#[]~"
+
+DEFAULT_ENCODING = "latin-1"
+
+class CachingGeneratorFunction(object):
+    """Caching wrapper around a no-arguments iterable."""
+    def __init__(self, iterable):
+        self._iterable = iterable
+        self._cache = []
+    def __call__(self):
+        cache = self._cache
+        for item in cache:
+            yield item
+        for item in self._iterable:
+            cache.append(item)
+            yield item
+
+def encoding_finder(default_encoding):
+    def encoding(response):
+        # HTTPEquivProcessor may be in use, so both HTTP and HTTP-EQUIV
+        # headers may be in the response.  HTTP-EQUIV headers come last,
+        # so try in order from first to last.
+        for ct in response.info().getheaders("content-type"):
+            for k, v in split_header_words([ct])[0]:
+                if k == "charset":
+                    return v
+        return default_encoding
+    return encoding
+
+def make_is_html(allow_xhtml):
+    def is_html(response, encoding):
+        ct_hdrs = response.info().getheaders("content-type")
+        url = response.geturl()
+        # XXX encoding
+        return _is_html(ct_hdrs, url, allow_xhtml)
+    return is_html
+
+# idea for this argument-processing trick is from Peter Otten
+class Args:
+    def __init__(self, args_map):
+        self.dictionary = dict(args_map)
+    def __getattr__(self, key):
+        try:
+            return self.dictionary[key]
+        except KeyError:
+            return getattr(self.__class__, key)
+
+def form_parser_args(
+    select_default=False,
+    form_parser_class=None,
+    request_class=None,
+    backwards_compat=False,
+    ):
+    return Args(locals())
+
+
+class Link:
+    def __init__(self, base_url, url, text, tag, attrs):
+        assert None not in [url, tag, attrs]
+        self.base_url = base_url
+        self.absolute_url = urljoin(base_url, url)
+        self.url, self.text, self.tag, self.attrs = url, text, tag, attrs
+    def __cmp__(self, other):
+        try:
+            for name in "url", "text", "tag", "attrs":
+                if getattr(self, name) != getattr(other, name):
+                    return -1
+        except AttributeError:
+            return -1
+        return 0
+    def __repr__(self):
+        return "Link(base_url=%r, url=%r, text=%r, tag=%r, attrs=%r)" % (
+            self.base_url, self.url, self.text, self.tag, self.attrs)
+
+
+def clean_url(url, encoding):
+    # percent-encode illegal URL characters
+    # Trying to come up with test cases for this gave me a headache, revisit
+    # when do switch to unicode.
+    # Somebody else's comments (lost the attribution):
+##     - IE will return you the url in the encoding you send it
+##     - Mozilla/Firefox will send you latin-1 if there's no non latin-1
+##     characters in your link. It will send you utf-8 however if there are...
+    if type(url) == type(""):
+        url = url.decode(encoding, "replace")
+    url = url.strip()
+    return urllib.quote(url.encode(encoding), URLQUOTE_SAFE_URL_CHARS)
+
+class LinksFactory:
+
+    def __init__(self,
+                 link_parser_class=None,
+                 link_class=Link,
+                 urltags=None,
+                 ):
+        import _pullparser
+        if link_parser_class is None:
+            link_parser_class = _pullparser.TolerantPullParser
+        self.link_parser_class = link_parser_class
+        self.link_class = link_class
+        if urltags is None:
+            urltags = {
+                "a": "href",
+                "area": "href",
+                "frame": "src",
+                "iframe": "src",
+                }
+        self.urltags = urltags
+        self._response = None
+        self._encoding = None
+
+    def set_response(self, response, base_url, encoding):
+        self._response = response
+        self._encoding = encoding
+        self._base_url = base_url
+
+    def links(self):
+        """Return an iterator that provides links of the document."""
+        response = self._response
+        encoding = self._encoding
+        base_url = self._base_url
+        p = self.link_parser_class(response, encoding=encoding)
+
+        for token in p.tags(*(self.urltags.keys()+["base"])):
+            if token.data == "base":
+                base_url = dict(token.attrs).get("href")
+                continue
+            if token.type == "endtag":
+                continue
+            attrs = dict(token.attrs)
+            tag = token.data
+            name = attrs.get("name")
+            text = None
+            # XXX use attr_encoding for ref'd doc if that doc does not provide
+            #  one by other means
+            #attr_encoding = attrs.get("charset")
+            url = attrs.get(self.urltags[tag])  # XXX is "" a valid URL?
+            if not url:
+                # Probably an <A NAME="blah"> link or <AREA NOHREF...>.
+                # For our purposes a link is something with a URL, so ignore
+                # this.
+                continue
+
+            url = clean_url(url, encoding)
+            if tag == "a":
+                if token.type != "startendtag":
+                    # hmm, this'd break if end tag is missing
+                    text = p.get_compressed_text(("endtag", tag))
+                # but this doesn't work for eg. <a href="blah"><b>Andy</b></a>
+                #text = p.get_compressed_text()
+
+            yield Link(base_url, url, text, tag, token.attrs)
+
+class FormsFactory:
+
+    """Makes a sequence of objects satisfying ClientForm.HTMLForm interface.
+
+    For constructor argument docs, see ClientForm.ParseResponse
+    argument docs.
+
+    """
+
+    def __init__(self,
+                 select_default=False,
+                 form_parser_class=None,
+                 request_class=None,
+                 backwards_compat=False,
+                 ):
+        import ClientForm
+        self.select_default = select_default
+        if form_parser_class is None:
+            form_parser_class = ClientForm.FormParser
+        self.form_parser_class = form_parser_class
+        if request_class is None:
+            request_class = _request.Request
+        self.request_class = request_class
+        self.backwards_compat = backwards_compat
+        self._response = None
+        self.encoding = None
+
+    def set_response(self, response, encoding):
+        self._response = response
+        self.encoding = encoding
+
+    def forms(self):
+        import ClientForm
+        encoding = self.encoding
+        return ClientForm.ParseResponse(
+            self._response,
+            select_default=self.select_default,
+            form_parser_class=self.form_parser_class,
+            request_class=self.request_class,
+            backwards_compat=self.backwards_compat,
+            encoding=encoding,
+            )
+
+class TitleFactory:
+    def __init__(self):
+        self._response = self._encoding = None
+
+    def set_response(self, response, encoding):
+        self._response = response
+        self._encoding = encoding
+
+    def title(self):
+        import _pullparser
+        p = _pullparser.TolerantPullParser(
+            self._response, encoding=self._encoding)
+        try:
+            p.get_tag("title")
+        except _pullparser.NoMoreTokensError:
+            return None
+        else:
+            return p.get_text()
+
+
+def unescape(data, entities, encoding):
+    if data is None or "&" not in data:
+        return data
+
+    def replace_entities(match):
+        ent = match.group()
+        if ent[1] == "#":
+            return unescape_charref(ent[2:-1], encoding)
+
+        repl = entities.get(ent[1:-1])
+        if repl is not None:
+            repl = unichr(repl)
+            if type(repl) != type(""):
+                try:
+                    repl = repl.encode(encoding)
+                except UnicodeError:
+                    repl = ent
+        else:
+            repl = ent
+        return repl
+
+    return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
+
+def unescape_charref(data, encoding):
+    name, base = data, 10
+    if name.startswith("x"):
+        name, base= name[1:], 16
+    uc = unichr(int(name, base))
+    if encoding is None:
+        return uc
+    else:
+        try:
+            repl = uc.encode(encoding)
+        except UnicodeError:
+            repl = "&#%s;" % data
+        return repl
+
+
+try:
+    import BeautifulSoup
+except ImportError:
+    pass
+else:
+    import sgmllib
+    # monkeypatch to fix http://www.python.org/sf/803422 :-(
+    sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
+    class MechanizeBs(BeautifulSoup.BeautifulSoup):
+        _entitydefs = htmlentitydefs.name2codepoint
+        # don't want the magic Microsoft-char workaround
+        PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'),
+                           lambda(x):x.group(1) + ' />'),
+                          (re.compile('<!\s+([^<>]*)>'),
+                           lambda(x):'<!' + x.group(1) + '>')
+                          ]
+
+        def __init__(self, encoding, text=None, avoidParserProblems=True,
+                     initialTextIsEverything=True):
+            self._encoding = encoding
+            BeautifulSoup.BeautifulSoup.__init__(
+                self, text, avoidParserProblems, initialTextIsEverything)
+
+        def handle_charref(self, ref):
+            t = unescape("&#%s;"%ref, self._entitydefs, self._encoding)
+            self.handle_data(t)
+        def handle_entityref(self, ref):
+            t = unescape("&%s;"%ref, self._entitydefs, self._encoding)
+            self.handle_data(t)
+        def unescape_attrs(self, attrs):
+            escaped_attrs = []
+            for key, val in attrs:
+                val = unescape(val, self._entitydefs, self._encoding)
+                escaped_attrs.append((key, val))
+            return escaped_attrs
+
+class RobustLinksFactory:
+
+    compress_re = re.compile(r"\s+")
+
+    def __init__(self,
+                 link_parser_class=None,
+                 link_class=Link,
+                 urltags=None,
+                 ):
+        import BeautifulSoup
+        if link_parser_class is None:
+            link_parser_class = MechanizeBs
+        self.link_parser_class = link_parser_class
+        self.link_class = link_class
+        if urltags is None:
+            urltags = {
+                "a": "href",
+                "area": "href",
+                "frame": "src",
+                "iframe": "src",
+                }
+        self.urltags = urltags
+        self._bs = None
+        self._encoding = None
+        self._base_url = None
+
+    def set_soup(self, soup, base_url, encoding):
+        self._bs = soup
+        self._base_url = base_url
+        self._encoding = encoding
+
+    def links(self):
+        import BeautifulSoup
+        bs = self._bs
+        base_url = self._base_url
+        encoding = self._encoding
+        gen = bs.recursiveChildGenerator()
+        for ch in bs.recursiveChildGenerator():
+            if (isinstance(ch, BeautifulSoup.Tag) and
+                ch.name in self.urltags.keys()+["base"]):
+                link = ch
+                attrs = bs.unescape_attrs(link.attrs)
+                attrs_dict = dict(attrs)
+                if link.name == "base":
+                    base_url = attrs_dict.get("href")
+                    continue
+                url_attr = self.urltags[link.name]
+                url = attrs_dict.get(url_attr)
+                if not url:
+                    continue
+                url = clean_url(url, encoding)
+                text = link.firstText(lambda t: True)
+                if text is BeautifulSoup.Null:
+                    # follow _pullparser's weird behaviour rigidly
+                    if link.name == "a":
+                        text = ""
+                    else:
+                        text = None
+                else:
+                    text = self.compress_re.sub(" ", text.strip())
+                yield Link(base_url, url, text, link.name, attrs)
+
+
+class RobustFormsFactory(FormsFactory):
+    def __init__(self, *args, **kwds):
+        import ClientForm
+        args = form_parser_args(*args, **kwds)
+        if args.form_parser_class is None:
+            args.form_parser_class = ClientForm.RobustFormParser
+        FormsFactory.__init__(self, **args.dictionary)
+
+    def set_response(self, response, encoding):
+        self._response = response
+        self.encoding = encoding
+
+
+class RobustTitleFactory:
+    def __init__(self):
+        self._bs = self._encoding = None
+
+    def set_soup(self, soup, encoding):
+        self._bs = soup
+        self._encoding = encoding
+
+    def title(soup):
+        import BeautifulSoup
+        title = self._bs.first("title")
+        if title == BeautifulSoup.Null:
+            return None
+        else:
+            return title.firstText(lambda t: True)
+
+
+class Factory:
+    """Factory for forms, links, etc.
+
+    This interface may expand in future.
+
+    Public methods:
+
+    set_request_class(request_class)
+    set_response(response)
+    forms()
+    links()
+
+    Public attributes:
+
+    encoding: string specifying the encoding of response if it contains a text
+     document (this value is left unspecified for documents that do not have
+     an encoding, e.g. an image file)
+    is_html: true if response contains an HTML document (XHTML may be
+     regarded as HTML too)
+    title: page title, or None if no title or not HTML
+
+    """
+
+    def __init__(self, forms_factory, links_factory, title_factory,
+                 get_encoding=encoding_finder(DEFAULT_ENCODING),
+                 is_html_p=make_is_html(allow_xhtml=False),
+                 ):
+        """
+
+        Pass keyword arguments only.
+
+        default_encoding: character encoding to use if encoding cannot be
+         determined (or guessed) from the response.  You should turn on
+         HTTP-EQUIV handling if you want the best chance of getting this right
+         without resorting to this default.  The default value of this
+         parameter (currently latin-1) may change in future.
+
+        """
+        self._forms_factory = forms_factory
+        self._links_factory = links_factory
+        self._title_factory = title_factory
+        self._get_encoding = get_encoding
+        self._is_html_p = is_html_p
+
+        self.set_response(None)
+
+    def set_request_class(self, request_class):
+        """Set urllib2.Request class.
+
+        ClientForm.HTMLForm instances returned by .forms() will return
+        instances of this class when .click()ed.
+
+        """
+        self._forms_factory.request_class = request_class
+
+    def set_response(self, response):
+        """Set response.
+
+        The response must implement the same interface as objects returned by
+        urllib2.urlopen().
+
+        """
+        self._response = response
+#        self._forms_genf = self._links_genf = None
+        self._forms = self._links_genf = None
+        self._get_title = None
+        for name in ["encoding", "is_html", "title"]:
+            try:
+                delattr(self, name)
+            except AttributeError:
+                pass
+
+    def __getattr__(self, name):
+        if name not in ["encoding", "is_html", "title"]:
+            return getattr(self.__class__, name)
+
+        try:
+            if name == "encoding":
+                self.encoding = self._get_encoding(self._response)
+                return self.encoding
+            elif name == "is_html":
+                self.is_html = self._is_html_p(self._response, self.encoding)
+                return self.is_html
+            elif name == "title":
+                if self.is_html:
+                    self.title = self._title_factory.title()
+                else:
+                    self.title = None
+                return self.title
+        finally:
+            self._response.seek(0)
+
+    def forms(self):
+        """Return iterable over ClientForm.HTMLForm-like objects."""
+#        import pdb;pdb.set_trace()
+        if self._forms is None:
+##            self._forms_genf = CachingGeneratorFunction(
+##                self._forms_factory.forms())
+#            forms = self._forms_factory.forms()
+#            self._forms_genf = CachingGeneratorFunction(forms)
+            self._forms = self._forms_factory.forms()
+#        if len(self._forms) > 1: import pdb;pdb.set_trace()
+        return self._forms
+
+    def links(self):
+        """Return iterable over mechanize.Link-like objects."""
+        if self._links_genf is None:
+            self._links_genf = CachingGeneratorFunction(
+                self._links_factory.links())
+        return self._links_genf()
+
+class DefaultFactory(Factory):
+    """Based on sgmllib."""
+    def __init__(self, i_want_broken_xhtml_support=False):
+        Factory.__init__(
+            self,
+            forms_factory=FormsFactory(),
+            links_factory=LinksFactory(),
+            title_factory=TitleFactory(),
+            is_html_p=make_is_html(allow_xhtml=i_want_broken_xhtml_support),
+            )
+
+    def set_response(self, response):
+        Factory.set_response(self, response)
+        if response is not None:
+            self._forms_factory.set_response(
+                copy.copy(response), self.encoding)
+            self._links_factory.set_response(
+                copy.copy(response), self._response.geturl(), self.encoding)
+            self._title_factory.set_response(
+                copy.copy(response), self.encoding)
+
+class RobustFactory(Factory):
+    """Based on BeautifulSoup, hopefully a bit more robust to bad HTML than is
+    DefaultFactory.
+
+    """
+    def __init__(self, i_want_broken_xhtml_support=False,
+                 soup_class=None):
+        Factory.__init__(
+            self,
+            forms_factory=RobustFormsFactory(),
+            links_factory=RobustLinksFactory(),
+            title_factory=RobustTitleFactory(),
+            is_html_p=make_is_html(allow_xhtml=i_want_broken_xhtml_support),
+            )
+        if soup_class is None:
+            soup_class = MechanizeBs
+        self._soup_class = soup_class
+
+    def set_response(self, response):
+        import BeautifulSoup
+        Factory.set_response(self, response)
+        if response is not None:
+            data = response.read()
+            soup = self._soup_class(self.encoding, data)
+            self._forms_factory.set_response(response, self.encoding)
+            self._links_factory.set_soup(
+                soup, response.geturl(), self.encoding)
+            self._title_factory.set_soup(soup, self.encoding)


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_html.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_lwpcookiejar.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_lwpcookiejar.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_lwpcookiejar.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,185 @@
+"""Load / save to libwww-perl (LWP) format files.
+
+Actually, the format is slightly extended from that used by LWP's
+(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information
+not recorded by LWP.
+
+It uses the version string "2.0", though really there isn't an LWP Cookies
+2.0 format.  This indicates that there is extra information in here
+(domain_dot and port_spec) while still being compatible with libwww-perl,
+I hope.
+
+Copyright 2002-2006 John J Lee <jjl at pobox.com>
+Copyright 1997-1999 Gisle Aas (original libwww-perl code)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import time, re, string, logging
+
+from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
+     MISSING_FILENAME_TEXT, LoadError
+from _headersutil import join_header_words, split_header_words
+from _util import startswith, iso2time, time2isoz
+
+debug = logging.getLogger("mechanize").debug
+
+
+def lwp_cookie_str(cookie):
+    """Return string representation of Cookie in an the LWP cookie file format.
+
+    Actually, the format is extended a bit -- see module docstring.
+
+    """
+    h = [(cookie.name, cookie.value),
+         ("path", cookie.path),
+         ("domain", cookie.domain)]
+    if cookie.port is not None: h.append(("port", cookie.port))
+    if cookie.path_specified: h.append(("path_spec", None))
+    if cookie.port_specified: h.append(("port_spec", None))
+    if cookie.domain_initial_dot: h.append(("domain_dot", None))
+    if cookie.secure: h.append(("secure", None))
+    if cookie.expires: h.append(("expires",
+                               time2isoz(float(cookie.expires))))
+    if cookie.discard: h.append(("discard", None))
+    if cookie.comment: h.append(("comment", cookie.comment))
+    if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
+    if cookie.rfc2109: h.append(("rfc2109", None))
+
+    keys = cookie.nonstandard_attr_keys()
+    keys.sort()
+    for k in keys:
+        h.append((k, str(cookie.get_nonstandard_attr(k))))
+
+    h.append(("version", str(cookie.version)))
+
+    return join_header_words([h])
+
+class LWPCookieJar(FileCookieJar):
+    """
+    The LWPCookieJar saves a sequence of"Set-Cookie3" lines.
+    "Set-Cookie3" is the format used by the libwww-perl libary, not known
+    to be compatible with any browser, but which is easy to read and
+    doesn't lose information about RFC 2965 cookies.
+
+    Additional methods
+
+    as_lwp_str(ignore_discard=True, ignore_expired=True)
+
+    """
+
+    magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
+
+    def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
+        """Return cookies as a string of "\n"-separated "Set-Cookie3" headers.
+
+        ignore_discard and ignore_expires: see docstring for FileCookieJar.save
+
+        """
+        now = time.time()
+        r = []
+        for cookie in self:
+            if not ignore_discard and cookie.discard:
+                debug("   Not saving %s: marked for discard", cookie.name)
+                continue
+            if not ignore_expires and cookie.is_expired(now):
+                debug("   Not saving %s: expired", cookie.name)
+                continue
+            r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
+        return string.join(r+[""], "\n")
+
+    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+        if filename is None:
+            if self.filename is not None: filename = self.filename
+            else: raise ValueError(MISSING_FILENAME_TEXT)
+
+        f = open(filename, "w")
+        try:
+            debug("Saving LWP cookies file")
+            # There really isn't an LWP Cookies 2.0 format, but this indicates
+            # that there is extra information in here (domain_dot and
+            # port_spec) while still being compatible with libwww-perl, I hope.
+            f.write("#LWP-Cookies-2.0\n")
+            f.write(self.as_lwp_str(ignore_discard, ignore_expires))
+        finally:
+            f.close()
+
+    def _really_load(self, f, filename, ignore_discard, ignore_expires):
+        magic = f.readline()
+        if not re.search(self.magic_re, magic):
+            msg = "%s does not seem to contain cookies" % filename
+            raise LoadError(msg)
+
+        now = time.time()
+
+        header = "Set-Cookie3:"
+        boolean_attrs = ("port_spec", "path_spec", "domain_dot",
+                         "secure", "discard", "rfc2109")
+        value_attrs = ("version",
+                       "port", "path", "domain",
+                       "expires",
+                       "comment", "commenturl")
+
+        try:
+            while 1:
+                line = f.readline()
+                if line == "": break
+                if not startswith(line, header):
+                    continue
+                line = string.strip(line[len(header):])
+
+                for data in split_header_words([line]):
+                    name, value = data[0]
+                    standard = {}
+                    rest = {}
+                    for k in boolean_attrs:
+                        standard[k] = False
+                    for k, v in data[1:]:
+                        if k is not None:
+                            lc = string.lower(k)
+                        else:
+                            lc = None
+                        # don't lose case distinction for unknown fields
+                        if (lc in value_attrs) or (lc in boolean_attrs):
+                            k = lc
+                        if k in boolean_attrs:
+                            if v is None: v = True
+                            standard[k] = v
+                        elif k in value_attrs:
+                            standard[k] = v
+                        else:
+                            rest[k] = v
+
+                    h = standard.get
+                    expires = h("expires")
+                    discard = h("discard")
+                    if expires is not None:
+                        expires = iso2time(expires)
+                    if expires is None:
+                        discard = True
+                    domain = h("domain")
+                    domain_specified = startswith(domain, ".")
+                    c = Cookie(h("version"), name, value,
+                               h("port"), h("port_spec"),
+                               domain, domain_specified, h("domain_dot"),
+                               h("path"), h("path_spec"),
+                               h("secure"),
+                               expires,
+                               discard,
+                               h("comment"),
+                               h("commenturl"),
+                               rest,
+                               h("rfc2109"),
+                               ) 
+                    if not ignore_discard and c.discard:
+                        continue
+                    if not ignore_expires and c.is_expired(now):
+                        continue
+                    self.set_cookie(c)
+        except:
+            reraise_unmasked_exceptions((IOError,))
+            raise LoadError("invalid Set-Cookie3 format file %s" % filename)
+


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_lwpcookiejar.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_mechanize.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_mechanize.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_mechanize.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,539 @@
+"""Stateful programmatic WWW navigation, after Perl's WWW::Mechanize.
+
+Copyright 2003-2006 John J. Lee <jjl at pobox.com>
+Copyright 2003 Andy Lester (original Perl code)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+import urllib2, urlparse, sys, copy, re
+
+from _useragent import UserAgent
+from _html import DefaultFactory
+from _util import response_seek_wrapper, closeable_response
+import _request
+
+__version__ = (0, 1, 2, "b", None)  # 0.1.2b
+
+class BrowserStateError(Exception): pass
+class LinkNotFoundError(Exception): pass
+class FormNotFoundError(Exception): pass
+
+
+class History:
+    """
+
+    Though this will become public, the implied interface is not yet stable.
+
+    """
+    def __init__(self):
+        self._history = []  # LIFO
+    def add(self, request, response):
+        self._history.append((request, response))
+    def back(self, n, _response):
+        response = _response  # XXX move Browser._response into this class?
+        while n > 0 or response is None:
+            try:
+                request, response = self._history.pop()
+            except IndexError:
+                raise BrowserStateError("already at start of history")
+            n -= 1
+        return request, response
+    def clear(self):
+        del self._history[:]
+    def close(self):
+        for request, response in self._history:
+            response.close()
+        del self._history[:]
+
+# Horrible, but needed, at least until fork urllib2.  Even then, may want
+# to preseve urllib2 compatibility.
+def upgrade_response(response):
+    # a urllib2 handler constructed the response, i.e. the response is an
+    # urllib.addinfourl, instead of a _Util.closeable_response as returned
+    # by e.g. mechanize.HTTPHandler
+    try:
+        code = response.code
+    except AttributeError:
+        code = None
+    try:
+        msg = response.msg
+    except AttributeError:
+        msg = None
+
+    # may have already-.read() data from .seek() cache
+    data = None
+    get_data = getattr(response, "get_data", None)
+    if get_data:
+        data = get_data()
+
+    response = closeable_response(
+        response.fp, response.info(), response.geturl(), code, msg)
+    response = response_seek_wrapper(response)
+    if data:
+        response.set_data(data)
+    return response
+class ResponseUpgradeProcessor(urllib2.BaseHandler):
+    # upgrade responses to be .close()able without becoming unusable
+    handler_order = 0  # before anything else
+    def any_response(self, request, response):
+        if not hasattr(response, 'closeable_response'):
+            response = upgrade_response(response)
+        return response
+
+
+class Browser(UserAgent):
+    """Browser-like class with support for history, forms and links.
+
+    BrowserStateError is raised whenever the browser is in the wrong state to
+    complete the requested operation - eg., when .back() is called when the
+    browser history is empty, or when .follow_link() is called when the current
+    response does not contain HTML data.
+
+    Public attributes:
+
+    request: current request (mechanize.Request or urllib2.Request)
+    form: currently selected form (see .select_form())
+
+    """
+
+    handler_classes = UserAgent.handler_classes.copy()
+    handler_classes["_response_upgrade"] = ResponseUpgradeProcessor
+    default_others = copy.copy(UserAgent.default_others)
+    default_others.append("_response_upgrade")
+
+    def __init__(self,
+                 factory=None,
+                 history=None,
+                 request_class=None,
+                 ):
+        """
+
+        Only named arguments should be passed to this constructor.
+
+        factory: object implementing the mechanize.Factory interface.
+        history: object implementing the mechanize.History interface.  Note this
+         interface is still experimental and may change in future.
+        request_class: Request class to use.  Defaults to mechanize.Request
+         by default for Pythons older than 2.4, urllib2.Request otherwise.
+
+        The Factory and History objects passed in are 'owned' by the Browser,
+        so they should not be shared across Browsers.  In particular,
+        factory.set_response() should not be called except by the owning
+        Browser itself.
+
+        Note that the supplied factory's request_class is overridden by this
+        constructor, to ensure only one Request class is used.
+
+        """
+        if history is None:
+            history = History()
+        self._history = history
+        self.request = self._response = None
+        self.form = None
+
+        if request_class is None:
+            if not hasattr(urllib2.Request, "add_unredirected_header"):
+                request_class = _request.Request
+            else:
+                request_class = urllib2.Request  # Python >= 2.4
+
+        if factory is None:
+            factory = DefaultFactory()
+        factory.set_request_class(request_class)
+        self._factory = factory
+        self.request_class = request_class
+
+        UserAgent.__init__(self)  # do this last to avoid __getattr__ problems
+
+    def close(self):
+        if self._response is not None:
+            self._response.close()    
+        UserAgent.close(self)
+        if self._history is not None:
+            self._history.close()
+            self._history = None
+        self.request = self._response = None
+
+    def open(self, url, data=None):
+        if self._response is not None:
+            self._response.close()
+        return self._mech_open(url, data)
+
+    def _mech_open(self, url, data=None, update_history=True):
+        try:
+            url.get_full_url
+        except AttributeError:
+            # string URL -- convert to absolute URL if required
+            scheme, netloc = urlparse.urlparse(url)[:2]
+            if not scheme:
+                # relative URL
+                assert not netloc, "malformed URL"
+                if self._response is None:
+                    raise BrowserStateError(
+                        "can't fetch relative URL: not viewing any document")
+                url = urlparse.urljoin(self._response.geturl(), url)
+
+        if self.request is not None and update_history:
+            self._history.add(self.request, self._response)
+        self._response = None
+        # we want self.request to be assigned even if UserAgent.open fails
+        self.request = self._request(url, data)
+        self._previous_scheme = self.request.get_type()
+
+        success = True
+        try:
+            response = UserAgent.open(self, self.request, data)
+        except urllib2.HTTPError, error:
+            success = False
+            response = error
+##         except (IOError, socket.error, OSError), error:
+##             # Yes, urllib2 really does raise all these :-((
+##             # See test_urllib2.py for examples of socket.gaierror and OSError,
+##             # plus note that FTPHandler raises IOError.
+##             # XXX I don't seem to have an example of exactly socket.error being
+##             #  raised, only socket.gaierror...
+##             # I don't want to start fixing these here, though, since this is a
+##             # subclass of OpenerDirector, and it would break old code.  Even in
+##             # Python core, a fix would need some backwards-compat. hack to be
+##             # acceptable.
+##             raise
+        self.set_response(response)
+        if not success:
+            raise error
+        return copy.copy(self._response)
+
+    def __str__(self):
+        text = []
+        text.append("<%s " % self.__class__.__name__)
+        if self._response:
+            text.append("visiting %s" % self._response.geturl())
+        else:
+            text.append("(not visiting a URL)")
+        if self.form:
+            text.append("\n selected form:\n %s\n" % str(self.form))
+        text.append(">")
+        return "".join(text)
+
+    def response(self):
+        """Return a copy of the current response.
+
+        The returned object has the same interface as the object returned by
+        .open() (or urllib2.urlopen()).
+
+        """
+        return copy.copy(self._response)
+
+    def set_response(self, response):
+        """Replace current response with (a copy of) response."""
+        # sanity check, necessary but far from sufficient
+        if not (hasattr(response, "info") and hasattr(response, "geturl") and
+                hasattr(response, "read")):
+            raise ValueError("not a response object")
+
+        self.form = None
+
+        if not hasattr(response, "seek"):
+            response = response_seek_wrapper(response)
+        if not hasattr(response, "closeable_response"):
+            response = upgrade_response(response)
+        else:
+            response = copy.copy(response)
+
+        self._response = response
+        self._factory.set_response(self._response)
+
+    def geturl(self):
+        """Get URL of current document."""
+        if self._response is None:
+            raise BrowserStateError("not viewing any document")
+        return self._response.geturl()
+
+    def reload(self):
+        """Reload current document, and return response object."""
+        if self.request is None:
+            raise BrowserStateError("no URL has yet been .open()ed")
+        if self._response is not None:
+            self._response.close()
+        return self._mech_open(self.request, update_history=False)
+
+    def back(self, n=1):
+        """Go back n steps in history, and return response object.
+
+        n: go back this number of steps (default 1 step)
+
+        """
+        if self._response is not None:
+            self._response.close()
+        self.request, response = self._history.back(n, self._response)
+        self.set_response(response)
+        return response
+
+    def clear_history(self):
+        self._history.clear()
+
+    def links(self, **kwds):
+        """Return iterable over links (mechanize.Link objects)."""
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        links = self._factory.links()
+        if kwds:
+            return self._filter_links(links, **kwds)
+        else:
+            return links
+
+    def forms(self):
+        """Return iterable over forms.
+
+        The returned form objects implement the ClientForm.HTMLForm interface.
+
+        """
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        return self._factory.forms()
+
+    def viewing_html(self):
+        """Return whether the current response contains HTML data."""
+        if self._response is None:
+            raise BrowserStateError("not viewing any document")
+        return self._factory.is_html
+
+    def encoding(self):
+        """"""
+        if self._response is None:
+            raise BrowserStateError("not viewing any document")
+        return self._factory.encoding
+
+    def title(self):
+        """Return title, or None if there is no title element in the document.
+
+        Tags are stripped or textified as described in docs for
+        PullParser.get_text() method of pullparser module.
+
+        """
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        return self._factory.title
+
+    def select_form(self, name=None, predicate=None, nr=None):
+        """Select an HTML form for input.
+
+        This is a bit like giving a form the "input focus" in a browser.
+
+        If a form is selected, the Browser object supports the HTMLForm
+        interface, so you can call methods like .set_value(), .set(), and
+        .click().
+
+        At least one of the name, predicate and nr arguments must be supplied.
+        If no matching form is found, mechanize.FormNotFoundError is raised.
+
+        If name is specified, then the form must have the indicated name.
+
+        If predicate is specified, then the form must match that function.  The
+        predicate function is passed the HTMLForm as its single argument, and
+        should return a boolean value indicating whether the form matched.
+
+        nr, if supplied, is the sequence number of the form (where 0 is the
+        first).  Note that control 0 is the first form matching all the other
+        arguments (if supplied); it is not necessarily the first control in the
+        form.
+
+        """
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        if (name is None) and (predicate is None) and (nr is None):
+            raise ValueError(
+                "at least one argument must be supplied to specify form")
+
+        orig_nr = nr
+        for form in self.forms():
+            if name is not None and name != form.name:
+                continue
+            if predicate is not None and not predicate(form):
+                continue
+            if nr:
+                nr -= 1
+                continue
+            self.form = form
+            break  # success
+        else:
+            # failure
+            description = []
+            if name is not None: description.append("name '%s'" % name)
+            if predicate is not None:
+                description.append("predicate %s" % predicate)
+            if orig_nr is not None: description.append("nr %d" % orig_nr)
+            description = ", ".join(description)
+            raise FormNotFoundError("no form matching "+description)
+
+    def _add_referer_header(self, request, origin_request=True):
+        if self.request is None:
+            return request
+        scheme = request.get_type()
+        original_scheme = self.request.get_type()
+        if scheme not in ["http", "https"]:
+            return request
+        if not origin_request and not self.request.has_header("Referer"):
+            return request
+
+        if (self._handle_referer and
+            original_scheme in ["http", "https"] and
+            not (original_scheme == "https" and scheme != "https")):
+            # strip URL fragment (RFC 2616 14.36)
+            parts = urlparse.urlparse(self.request.get_full_url())
+            parts = parts[:-1]+("",)
+            referer = urlparse.urlunparse(parts)
+            request.add_unredirected_header("Referer", referer)
+        return request
+
+    def click(self, *args, **kwds):
+        """See ClientForm.HTMLForm.click for documentation."""
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        request = self.form.click(*args, **kwds)
+        return self._add_referer_header(request)
+
+    def submit(self, *args, **kwds):
+        """Submit current form.
+
+        Arguments are as for ClientForm.HTMLForm.click().
+
+        Return value is same as for Browser.open().
+
+        """
+        return self.open(self.click(*args, **kwds))
+
+    def click_link(self, link=None, **kwds):
+        """Find a link and return a Request object for it.
+
+        Arguments are as for .find_link(), except that a link may be supplied
+        as the first argument.
+
+        """
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        if not link:
+            link = self.find_link(**kwds)
+        else:
+            if kwds:
+                raise ValueError(
+                    "either pass a Link, or keyword arguments, not both")
+        request = self.request_class(link.absolute_url)
+        return self._add_referer_header(request)
+
+    def follow_link(self, link=None, **kwds):
+        """Find a link and .open() it.
+
+        Arguments are as for .click_link().
+
+        Return value is same as for Browser.open().
+
+        """
+        return self.open(self.click_link(link, **kwds))
+
+    def find_link(self, **kwds):
+        """Find a link in current page.
+
+        Links are returned as mechanize.Link objects.
+
+        # Return third link that .search()-matches the regexp "python"
+        # (by ".search()-matches", I mean that the regular expression method
+        # .search() is used, rather than .match()).
+        find_link(text_regex=re.compile("python"), nr=2)
+
+        # Return first http link in the current page that points to somewhere
+        # on python.org whose link text (after tags have been removed) is
+        # exactly "monty python".
+        find_link(text="monty python",
+                  url_regex=re.compile("http.*python.org"))
+
+        # Return first link with exactly three HTML attributes.
+        find_link(predicate=lambda link: len(link.attrs) == 3)
+
+        Links include anchors (<a>), image maps (<area>), and frames (<frame>,
+        <iframe>).
+
+        All arguments must be passed by keyword, not position.  Zero or more
+        arguments may be supplied.  In order to find a link, all arguments
+        supplied must match.
+
+        If a matching link is not found, mechanize.LinkNotFoundError is raised.
+
+        text: link text between link tags: eg. <a href="blah">this bit</a> (as
+         returned by pullparser.get_compressed_text(), ie. without tags but
+         with opening tags "textified" as per the pullparser docs) must compare
+         equal to this argument, if supplied
+        text_regex: link text between tag (as defined above) must match the
+         regular expression object or regular expression string passed as this
+         argument, if supplied
+        name, name_regex: as for text and text_regex, but matched against the
+         name HTML attribute of the link tag
+        url, url_regex: as for text and text_regex, but matched against the
+         URL of the link tag (note this matches against Link.url, which is a
+         relative or absolute URL according to how it was written in the HTML)
+        tag: element name of opening tag, eg. "a"
+        predicate: a function taking a Link object as its single argument,
+         returning a boolean result, indicating whether the links
+        nr: matches the nth link that matches all other criteria (default 0)
+
+        """
+        try:
+            return self._filter_links(self._factory.links(), **kwds).next()
+        except StopIteration:
+            raise LinkNotFoundError()
+
+    def __getattr__(self, name):
+        # pass through ClientForm / DOMForm methods and attributes
+        form = self.__dict__.get("form")
+        if form is None:
+            raise AttributeError(
+                "%s instance has no attribute %s (perhaps you forgot to "
+                ".select_form()?)" % (self.__class__, name))
+        return getattr(form, name)
+
+#---------------------------------------------------
+# Private methods.
+
+    def _filter_links(self, links,
+                    text=None, text_regex=None,
+                    name=None, name_regex=None,
+                    url=None, url_regex=None,
+                    tag=None,
+                    predicate=None,
+                    nr=0
+                    ):
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+
+        found_links = []
+        orig_nr = nr
+
+        for link in links:
+            if url is not None and url != link.url:
+                continue
+            if url_regex is not None and not re.search(url_regex, link.url):
+                continue
+            if (text is not None and
+                (link.text is None or text != link.text)):
+                continue
+            if (text_regex is not None and
+                (link.text is None or not re.search(text_regex, link.text))):
+                continue
+            if name is not None and name != dict(link.attrs).get("name"):
+                continue
+            if name_regex is not None:
+                link_name = dict(link.attrs).get("name")
+                if link_name is None or not re.search(name_regex, link_name):
+                    continue
+            if tag is not None and tag != link.tag:
+                continue
+            if predicate is not None and not predicate(link):
+                continue
+            if nr:
+                nr -= 1
+                continue
+            yield link
+            nr = orig_nr


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_mechanize.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_mozillacookiejar.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_mozillacookiejar.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_mozillacookiejar.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,160 @@
+"""Mozilla / Netscape cookie loading / saving.
+
+Copyright 2002-2006 John J Lee <jjl at pobox.com>
+Copyright 1997-1999 Gisle Aas (original libwww-perl code)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import re, string, time, logging
+
+from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
+     MISSING_FILENAME_TEXT, LoadError
+from _util import startswith, endswith
+debug = logging.getLogger("ClientCookie").debug
+
+
+class MozillaCookieJar(FileCookieJar):
+    """
+
+    WARNING: you may want to backup your browser's cookies file if you use
+    this class to save cookies.  I *think* it works, but there have been
+    bugs in the past!
+
+    This class differs from CookieJar only in the format it uses to save and
+    load cookies to and from a file.  This class uses the Mozilla/Netscape
+    `cookies.txt' format.  lynx uses this file format, too.
+
+    Don't expect cookies saved while the browser is running to be noticed by
+    the browser (in fact, Mozilla on unix will overwrite your saved cookies if
+    you change them on disk while it's running; on Windows, you probably can't
+    save at all while the browser is running).
+
+    Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
+    Netscape cookies on saving.
+
+    In particular, the cookie version and port number information is lost,
+    together with information about whether or not Path, Port and Discard were
+    specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
+    domain as set in the HTTP header started with a dot (yes, I'm aware some
+    domains in Netscape files start with a dot and some don't -- trust me, you
+    really don't want to know any more about this).
+
+    Note that though Mozilla and Netscape use the same format, they use
+    slightly different headers.  The class saves cookies using the Netscape
+    header by default (Mozilla can cope with that).
+
+    """
+    magic_re = "#( Netscape)? HTTP Cookie File"
+    header = """\
+    # Netscape HTTP Cookie File
+    # http://www.netscape.com/newsref/std/cookie_spec.html
+    # This is a generated file!  Do not edit.
+
+"""
+
+    def _really_load(self, f, filename, ignore_discard, ignore_expires):
+        now = time.time()
+
+        magic = f.readline()
+        if not re.search(self.magic_re, magic):
+            f.close()
+            raise LoadError(
+                "%s does not look like a Netscape format cookies file" %
+                filename)
+
+        try:
+            while 1:
+                line = f.readline()
+                if line == "": break
+
+                # last field may be absent, so keep any trailing tab
+                if endswith(line, "\n"): line = line[:-1]
+
+                # skip comments and blank lines XXX what is $ for?
+                if (startswith(string.strip(line), "#") or
+                    startswith(string.strip(line), "$") or
+                    string.strip(line) == ""):
+                    continue
+
+                domain, domain_specified, path, secure, expires, name, value = \
+                        string.split(line, "\t")
+                secure = (secure == "TRUE")
+                domain_specified = (domain_specified == "TRUE")
+                if name == "":
+                    name = value
+                    value = None
+
+                initial_dot = startswith(domain, ".")
+                assert domain_specified == initial_dot
+
+                discard = False
+                if expires == "":
+                    expires = None
+                    discard = True
+
+                # assume path_specified is false
+                c = Cookie(0, name, value,
+                           None, False,
+                           domain, domain_specified, initial_dot,
+                           path, False,
+                           secure,
+                           expires,
+                           discard,
+                           None,
+                           None,
+                           {})
+                if not ignore_discard and c.discard:
+                    continue
+                if not ignore_expires and c.is_expired(now):
+                    continue
+                self.set_cookie(c)
+
+        except:
+            reraise_unmasked_exceptions((IOError,))
+            raise LoadError("invalid Netscape format file %s: %s" %
+                          (filename, line))
+
+    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+        if filename is None:
+            if self.filename is not None: filename = self.filename
+            else: raise ValueError(MISSING_FILENAME_TEXT)
+
+        f = open(filename, "w")
+        try:
+            debug("Saving Netscape cookies.txt file")
+            f.write(self.header)
+            now = time.time()
+            for cookie in self:
+                if not ignore_discard and cookie.discard:
+                    debug("   Not saving %s: marked for discard", cookie.name)
+                    continue
+                if not ignore_expires and cookie.is_expired(now):
+                    debug("   Not saving %s: expired", cookie.name)
+                    continue
+                if cookie.secure: secure = "TRUE"
+                else: secure = "FALSE"
+                if startswith(cookie.domain, "."): initial_dot = "TRUE"
+                else: initial_dot = "FALSE"
+                if cookie.expires is not None:
+                    expires = str(cookie.expires)
+                else:
+                    expires = ""
+                if cookie.value is None:
+                    # cookies.txt regards 'Set-Cookie: foo' as a cookie
+                    # with no name, whereas cookielib regards it as a
+                    # cookie with no value.
+                    name = ""
+                    value = cookie.name
+                else:
+                    name = cookie.name
+                    value = cookie.value
+                f.write(
+                    string.join([cookie.domain, initial_dot, cookie.path,
+                                 secure, expires, name, value], "\t")+
+                    "\n")
+        finally:
+            f.close()


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_mozillacookiejar.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_msiecookiejar.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_msiecookiejar.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_msiecookiejar.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,388 @@
+"""Microsoft Internet Explorer cookie loading on Windows.
+
+Copyright 2002-2003 Johnny Lee <typo_pl at hotmail.com> (MSIE Perl code)
+Copyright 2002-2006 John J Lee <jjl at pobox.com> (The Python port)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+# XXX names and comments are not great here
+
+import os, re, string, time, struct, logging
+if os.name == "nt":
+    import _winreg
+
+from _clientcookie import FileCookieJar, CookieJar, Cookie, \
+     MISSING_FILENAME_TEXT, LoadError
+from _util import startswith
+
+debug = logging.getLogger("mechanize").debug
+
+
+def regload(path, leaf):
+    key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0,
+                          _winreg.KEY_ALL_ACCESS)
+    try:
+        value = _winreg.QueryValueEx(key, leaf)[0]
+    except WindowsError:
+        value = None
+    return value
+
+WIN32_EPOCH = 0x019db1ded53e8000L  # 1970 Jan 01 00:00:00 in Win32 FILETIME
+
+def epoch_time_offset_from_win32_filetime(filetime):
+    """Convert from win32 filetime to seconds-since-epoch value.
+
+    MSIE stores create and expire times as Win32 FILETIME, which is 64
+    bits of 100 nanosecond intervals since Jan 01 1601.
+
+    mechanize expects time in 32-bit value expressed in seconds since the
+    epoch (Jan 01 1970).
+
+    """
+    if filetime < WIN32_EPOCH:
+        raise ValueError("filetime (%d) is before epoch (%d)" %
+                         (filetime, WIN32_EPOCH))
+
+    return divmod((filetime - WIN32_EPOCH), 10000000L)[0]
+
+def binary_to_char(c): return "%02X" % ord(c)
+def binary_to_str(d): return string.join(map(binary_to_char, list(d)), "")
+
+class MSIEBase:
+    magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*")
+    padding = "\x0d\xf0\xad\x0b"
+
+    msie_domain_re = re.compile(r"^([^/]+)(/.*)$")
+    cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?"
+                           "(.+\@[\x21-\xFF]+\.txt)")
+
+    # path under HKEY_CURRENT_USER from which to get location of index.dat
+    reg_path = r"software\microsoft\windows" \
+               r"\currentversion\explorer\shell folders"
+    reg_key = "Cookies"
+
+    def __init__(self):
+        self._delayload_domains = {}
+
+    def _delayload_domain(self, domain):
+        # if necessary, lazily load cookies for this domain
+        delayload_info = self._delayload_domains.get(domain)
+        if delayload_info is not None:
+            cookie_file, ignore_discard, ignore_expires = delayload_info
+            try:
+                self.load_cookie_data(cookie_file,
+                                      ignore_discard, ignore_expires)
+            except (LoadError, IOError):
+                debug("error reading cookie file, skipping: %s", cookie_file)
+            else:
+                del self._delayload_domains[domain]
+
+    def _load_cookies_from_file(self, filename):
+        debug("Loading MSIE cookies file: %s", filename)
+        cookies = []
+
+        cookies_fh = open(filename)
+
+        try:
+            while 1:
+                key = cookies_fh.readline()
+                if key == "": break
+
+                rl = cookies_fh.readline
+                def getlong(rl=rl): return long(rl().rstrip())
+                def getstr(rl=rl): return rl().rstrip()
+
+                key = key.rstrip()
+                value = getstr()
+                domain_path = getstr()
+                flags = getlong()  # 0x2000 bit is for secure I think
+                lo_expire = getlong()
+                hi_expire = getlong()
+                lo_create = getlong()
+                hi_create = getlong()
+                sep = getstr()
+
+                if "" in (key, value, domain_path, flags, hi_expire, lo_expire,
+                          hi_create, lo_create, sep) or (sep != "*"):
+                    break
+
+                m = self.msie_domain_re.search(domain_path)
+                if m:
+                    domain = m.group(1)
+                    path = m.group(2)
+
+                    cookies.append({"KEY": key, "VALUE": value, "DOMAIN": domain,
+                                    "PATH": path, "FLAGS": flags, "HIXP": hi_expire,
+                                    "LOXP": lo_expire, "HICREATE": hi_create,
+                                    "LOCREATE": lo_create})
+        finally:
+            cookies_fh.close()
+
+        return cookies
+
+    def load_cookie_data(self, filename,
+                         ignore_discard=False, ignore_expires=False):
+        """Load cookies from file containing actual cookie data.
+
+        Old cookies are kept unless overwritten by newly loaded ones.
+
+        You should not call this method if the delayload attribute is set.
+
+        I think each of these files contain all cookies for one user, domain,
+        and path.
+
+        filename: file containing cookies -- usually found in a file like
+         C:\WINNT\Profiles\joe\Cookies\joe at blah[1].txt
+
+        """
+        now = int(time.time())
+
+        cookie_data = self._load_cookies_from_file(filename)
+
+        for cookie in cookie_data:
+            flags = cookie["FLAGS"]
+            secure = ((flags & 0x2000) != 0)
+            filetime = (cookie["HIXP"] << 32) + cookie["LOXP"]
+            expires = epoch_time_offset_from_win32_filetime(filetime)
+            if expires < now:
+                discard = True
+            else:
+                discard = False
+            domain = cookie["DOMAIN"]
+            initial_dot = startswith(domain, ".")
+            if initial_dot:
+                domain_specified = True
+            else:
+                # MSIE 5 does not record whether the domain cookie-attribute
+                # was specified.
+                # Assuming it wasn't is conservative, because with strict
+                # domain matching this will match less frequently; with regular
+                # Netscape tail-matching, this will match at exactly the same
+                # times that domain_specified = True would.  It also means we
+                # don't have to prepend a dot to achieve consistency with our
+                # own & Mozilla's domain-munging scheme.
+                domain_specified = False
+
+            # assume path_specified is false
+            # XXX is there other stuff in here? -- eg. comment, commentURL?
+            c = Cookie(0,
+                       cookie["KEY"], cookie["VALUE"],
+                       None, False,
+                       domain, domain_specified, initial_dot,
+                       cookie["PATH"], False,
+                       secure,
+                       expires,
+                       discard,
+                       None,
+                       None,
+                       {"flags": flags})
+            if not ignore_discard and c.discard:
+                continue
+            if not ignore_expires and c.is_expired(now):
+                continue
+            CookieJar.set_cookie(self, c)
+
+    def load_from_registry(self, ignore_discard=False, ignore_expires=False,
+                           username=None):
+        """
+        username: only required on win9x
+
+        """
+        cookies_dir = regload(self.reg_path, self.reg_key)
+        filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT"))
+        self.load(filename, ignore_discard, ignore_expires, username)
+
+    def _really_load(self, index, filename, ignore_discard, ignore_expires,
+                     username):
+        now = int(time.time())
+
+        if username is None:
+            username = string.lower(os.environ['USERNAME'])
+
+        cookie_dir = os.path.dirname(filename)
+
+        data = index.read(256)
+        if len(data) != 256:
+            raise LoadError("%s file is too short" % filename)
+
+        # Cookies' index.dat file starts with 32 bytes of signature
+        # followed by an offset to the first record, stored as a little-
+        # endian DWORD.
+        sig, size, data = data[:32], data[32:36], data[36:]
+        size = struct.unpack("<L", size)[0]
+
+        # check that sig is valid
+        if not self.magic_re.match(sig) or size != 0x4000:
+            raise LoadError("%s ['%s' %s] does not seem to contain cookies" %
+                          (str(filename), sig, size))
+
+        # skip to start of first record
+        index.seek(size, 0)
+
+        sector = 128  # size of sector in bytes
+
+        while 1:
+            data = ""
+
+            # Cookies are usually in two contiguous sectors, so read in two
+            # sectors and adjust if not a Cookie.
+            to_read = 2 * sector
+            d = index.read(to_read)
+            if len(d) != to_read:
+                break
+            data = data + d
+
+            # Each record starts with a 4-byte signature and a count
+            # (little-endian DWORD) of sectors for the record.
+            sig, size, data = data[:4], data[4:8], data[8:]
+            size = struct.unpack("<L", size)[0]
+
+            to_read = (size - 2) * sector
+
+##             from urllib import quote
+##             print "data", quote(data)
+##             print "sig", quote(sig)
+##             print "size in sectors", size
+##             print "size in bytes", size*sector
+##             print "size in units of 16 bytes", (size*sector) / 16
+##             print "size to read in bytes", to_read
+##             print
+
+            if sig != "URL ":
+                assert (sig in ("HASH", "LEAK",
+                                self.padding, "\x00\x00\x00\x00"),
+                        "unrecognized MSIE index.dat record: %s" %
+                        binary_to_str(sig))
+                if sig == "\x00\x00\x00\x00":
+                    # assume we've got all the cookies, and stop
+                    break
+                if sig == self.padding:
+                    continue
+                # skip the rest of this record
+                assert to_read >= 0
+                if size != 2:
+                    assert to_read != 0
+                    index.seek(to_read, 1)
+                continue
+
+            # read in rest of record if necessary
+            if size > 2:
+                more_data = index.read(to_read)
+                if len(more_data) != to_read: break
+                data = data + more_data
+
+            cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username +
+                         "(%s\@[\x21-\xFF]+\.txt)" % username)
+            m = re.search(cookie_re, data, re.I)
+            if m:
+                cookie_file = os.path.join(cookie_dir, m.group(2))
+                if not self.delayload:
+                    try:
+                        self.load_cookie_data(cookie_file,
+                                              ignore_discard, ignore_expires)
+                    except (LoadError, IOError):
+                        debug("error reading cookie file, skipping: %s",
+                              cookie_file)
+                else:
+                    domain = m.group(1)
+                    i = domain.find("/")
+                    if i != -1:
+                        domain = domain[:i]
+
+                    self._delayload_domains[domain] = (
+                        cookie_file, ignore_discard, ignore_expires)
+
+
+class MSIECookieJar(MSIEBase, FileCookieJar):
+    """FileCookieJar that reads from the Windows MSIE cookies database.
+
+    MSIECookieJar can read the cookie files of Microsoft Internet Explorer
+    (MSIE) for Windows version 5 on Windows NT and version 6 on Windows XP and
+    Windows 98.  Other configurations may also work, but are untested.  Saving
+    cookies in MSIE format is NOT supported.  If you save cookies, they'll be
+    in the usual Set-Cookie3 format, which you can read back in using an
+    instance of the plain old CookieJar class.  Don't save using the same
+    filename that you loaded cookies from, because you may succeed in
+    clobbering your MSIE cookies index file!
+
+    You should be able to have LWP share Internet Explorer's cookies like
+    this (note you need to supply a username to load_from_registry if you're on
+    Windows 9x or Windows ME):
+
+    cj = MSIECookieJar(delayload=1)
+    # find cookies index file in registry and load cookies from it
+    cj.load_from_registry()
+    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
+    response = opener.open("http://example.com/")
+
+    Iterating over a delayloaded MSIECookieJar instance will not cause any
+    cookies to be read from disk.  To force reading of all cookies from disk,
+    call read_all_cookies.  Note that the following methods iterate over self:
+    clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__
+    and as_string.
+
+    Additional methods:
+
+    load_from_registry(ignore_discard=False, ignore_expires=False,
+                       username=None)
+    load_cookie_data(filename, ignore_discard=False, ignore_expires=False)
+    read_all_cookies()
+
+    """
+    def __init__(self, filename=None, delayload=False, policy=None):
+        MSIEBase.__init__(self)
+        FileCookieJar.__init__(self, filename, delayload, policy)
+
+    def set_cookie(self, cookie):
+        if self.delayload:
+            self._delayload_domain(cookie.domain)
+        CookieJar.set_cookie(self, cookie)
+
+    def _cookies_for_request(self, request):
+        """Return a list of cookies to be returned to server."""
+        domains = self._cookies.copy()
+        domains.update(self._delayload_domains)
+        domains = domains.keys()
+
+        cookies = []
+        for domain in domains:
+            cookies.extend(self._cookies_for_domain(domain, request))
+        return cookies
+
+    def _cookies_for_domain(self, domain, request):
+        if not self._policy.domain_return_ok(domain, request):
+            return []
+        debug("Checking %s for cookies to return", domain)
+        if self.delayload:
+            self._delayload_domain(domain)
+        return CookieJar._cookies_for_domain(self, domain, request)
+
+    def read_all_cookies(self):
+        """Eagerly read in all cookies."""
+        if self.delayload:
+            for domain in self._delayload_domains.keys():
+                self._delayload_domain(domain)
+
+    def load(self, filename, ignore_discard=False, ignore_expires=False,
+             username=None):
+        """Load cookies from an MSIE 'index.dat' cookies index file.
+
+        filename: full path to cookie index file
+        username: only required on win9x
+
+        """
+        if filename is None:
+            if self.filename is not None: filename = self.filename
+            else: raise ValueError(MISSING_FILENAME_TEXT)
+
+        index = open(filename, "rb")
+
+        try:
+            self._really_load(index, filename, ignore_discard, ignore_expires,
+                              username)
+        finally:
+            index.close()


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_msiecookiejar.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_opener.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_opener.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_opener.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,274 @@
+"""Integration with Python standard library module urllib2: OpenerDirector
+class.
+
+Copyright 2004-2006 John J Lee <jjl at pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import urllib2, string, bisect, urlparse
+
+from _util import startswith, isstringlike
+from _request import Request
+
+try:
+    set
+except NameError:
+    import sets
+    set = sets.Set
+
+def methnames(obj):
+    """Return method names of class instance.
+
+    dir(obj) doesn't work across Python versions, this does.
+
+    """
+    return methnames_of_instance_as_dict(obj).keys()
+
+def methnames_of_instance_as_dict(inst):
+    names = {}
+    names.update(methnames_of_class_as_dict(inst.__class__))
+    for methname in dir(inst):
+        try:
+            candidate = getattr(inst, methname)
+        except AttributeError:
+            continue
+        if callable(candidate):
+            names[methname] = None
+    return names
+
+def methnames_of_class_as_dict(klass):
+    names = {}
+    for methname in dir(klass):
+        try:
+            candidate = getattr(klass, methname)
+        except AttributeError:
+            continue
+        if callable(candidate):
+            names[methname] = None
+    for baseclass in klass.__bases__:
+        names.update(methnames_of_class_as_dict(baseclass))
+    return names
+
+
+class OpenerDirector(urllib2.OpenerDirector):
+    def __init__(self):
+        urllib2.OpenerDirector.__init__(self)
+        # really none of these are (sanely) public -- the lack of initial
+        # underscore on some is just due to following urllib2
+        self.process_response = {}
+        self.process_request = {}
+        self._any_request = {}
+        self._any_response = {}
+        self._handler_index_valid = True
+
+    def add_handler(self, handler):
+        if handler in self.handlers:
+            return
+        # XXX why does self.handlers need to be sorted?
+        bisect.insort(self.handlers, handler)
+        handler.add_parent(self)
+        self._handler_index_valid = False
+
+    def _maybe_reindex_handlers(self):
+        if self._handler_index_valid:
+            return
+
+        handle_error = {}
+        handle_open = {}
+        process_request = {}
+        process_response = {}
+        any_request = set()
+        any_response = set()
+        unwanted = []
+
+        for handler in self.handlers:
+            added = False
+            for meth in methnames(handler):
+                if meth in ["redirect_request", "do_open", "proxy_open"]:
+                    # oops, coincidental match
+                    continue
+
+                if meth == "any_request":
+                    any_request.add(handler)
+                    added = True
+                    continue
+                elif meth == "any_response":
+                    any_response.add(handler)
+                    added = True
+                    continue
+
+                ii = meth.find("_")
+                scheme = meth[:ii]
+                condition = meth[ii+1:]
+
+                if startswith(condition, "error"):
+                    jj = string.find(meth[ii+1:], "_") + ii + 1
+                    kind = meth[jj+1:]
+                    try:
+                        kind = int(kind)
+                    except ValueError:
+                        pass
+                    lookup = handle_error.setdefault(scheme, {})
+                elif condition == "open":
+                    kind = scheme
+                    lookup = handle_open
+                elif condition == "request":
+                    kind = scheme
+                    lookup = process_request
+                elif condition == "response":
+                    kind = scheme
+                    lookup = process_response
+                else:
+                    continue
+
+                lookup.setdefault(kind, set()).add(handler)
+                added = True
+
+            if not added:
+                unwanted.append(handler)
+
+        for handler in unwanted:
+            self.handlers.remove(handler)
+
+        # sort indexed methods
+        # XXX could be cleaned up
+        for lookup in [process_request, process_response]:
+            for scheme, handlers in lookup.iteritems():
+                lookup[scheme] = handlers
+        for scheme, lookup in handle_error.iteritems():
+            for code, handlers in lookup.iteritems():
+                handlers = list(handlers)
+                handlers.sort()
+                lookup[code] = handlers
+        for scheme, handlers in handle_open.iteritems():
+            handlers = list(handlers)
+            handlers.sort()
+            handle_open[scheme] = handlers
+
+        # cache the indexes
+        self.handle_error = handle_error
+        self.handle_open = handle_open
+        self.process_request = process_request
+        self.process_response = process_response
+        self._any_request = any_request
+        self._any_response = any_response
+
+    def _request(self, url_or_req, data):
+        if isstringlike(url_or_req):
+            req = Request(url_or_req, data)
+        else:
+            # already a urllib2.Request or mechanize.Request instance
+            req = url_or_req
+            if data is not None:
+                req.add_data(data)
+        return req
+
+    def open(self, fullurl, data=None):
+        req = self._request(fullurl, data)
+        req_scheme = req.get_type()
+
+        self._maybe_reindex_handlers()
+
+        # pre-process request
+        # XXX should we allow a Processor to change the URL scheme
+        #   of the request?
+        request_processors = set(self.process_request.get(req_scheme, []))
+        request_processors.update(self._any_request)
+        request_processors = list(request_processors)
+        request_processors.sort()
+        for processor in request_processors:
+            for meth_name in ["any_request", req_scheme+"_request"]:
+                meth = getattr(processor, meth_name, None)
+                if meth:
+                    req = meth(req)
+
+        # In Python >= 2.4, .open() supports processors already, so we must
+        # call ._open() instead.
+        urlopen = getattr(urllib2.OpenerDirector, "_open",
+                          urllib2.OpenerDirector.open)
+#        import pdb;pdb.set_trace()
+        response = urlopen(self, req, data)
+
+        # post-process response
+        response_processors = set(self.process_response.get(req_scheme, []))
+        response_processors.update(self._any_response)
+        response_processors = list(response_processors)
+        response_processors.sort()
+        for processor in response_processors:
+            for meth_name in ["any_response", req_scheme+"_response"]:
+                meth = getattr(processor, meth_name, None)
+                if meth:
+                    response = meth(req, response)
+
+        return response
+
+    def error(self, proto, *args):
+        if proto in ['http', 'https']:
+            # XXX http[s] protocols are special-cased
+            dict = self.handle_error['http'] # https is not different than http
+            proto = args[2]  # YUCK!
+            meth_name = 'http_error_%s' % proto
+            http_err = 1
+            orig_args = args
+        else:
+            dict = self.handle_error
+            meth_name = proto + '_error'
+            http_err = 0
+        args = (dict, proto, meth_name) + args
+        result = apply(self._call_chain, args)
+        if result:
+            return result
+
+        if http_err:
+            args = (dict, 'default', 'http_error_default') + orig_args
+            return apply(self._call_chain, args)
+
+    def retrieve(self, fullurl, filename=None, reporthook=None, data=None):
+        """Returns (filename, headers).
+
+        For remote objects, the default filename will refer to a temporary
+        file.
+
+        """
+        req = self._request(fullurl, data)
+        type_ = req.get_type()
+        fp = self.open(req)
+        headers = fp.info()
+        if filename is None and type == 'file':
+            return url2pathname(req.get_selector()), headers
+        if filename:
+            tfp = open(filename, 'wb')
+        else:
+            path = urlparse(fullurl)[2]
+            suffix = os.path.splitext(path)[1]
+            tfp = tempfile.TemporaryFile("wb", suffix=suffix)
+        result = filename, headers
+        bs = 1024*8
+        size = -1
+        read = 0
+        blocknum = 1
+        if reporthook:
+            if headers.has_key("content-length"):
+                size = int(headers["Content-Length"])
+            reporthook(0, bs, size)
+        while 1:
+            block = fp.read(bs)
+            read += len(block)
+            if reporthook:
+                reporthook(blocknum, bs, size)
+            blocknum = blocknum + 1
+            if not block:
+                break
+            tfp.write(block)
+        fp.close()
+        tfp.close()
+        del fp
+        del tfp
+        if size>=0 and read<size:
+            raise IOError("incomplete retrieval error",
+                          "got only %d bytes out of %d" % (read,size))
+        return result


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_opener.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_pullparser.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_pullparser.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_pullparser.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,334 @@
+"""A simple "pull API" for HTML parsing, after Perl's HTML::TokeParser.
+
+Examples
+
+This program extracts all links from a document.  It will print one
+line for each link, containing the URL and the textual description
+between the <A>...</A> tags:
+
+import pullparser, sys
+f = file(sys.argv[1])
+p = pullparser.PullParser(f)
+for token in p.tags("a"):
+    if token.type == "endtag": continue
+    url = dict(token.attrs).get("href", "-")
+    text = p.get_compressed_text(endat=("endtag", "a"))
+    print "%s\t%s" % (url, text)
+
+This program extracts the <TITLE> from the document:
+
+import pullparser, sys
+f = file(sys.argv[1])
+p = pullparser.PullParser(f)
+if p.get_tag("title"):
+    title = p.get_compressed_text()
+    print "Title: %s" % title
+
+
+Copyright 2003-2006 John J. Lee <jjl at pobox.com>
+Copyright 1998-2001 Gisle Aas (original libwww-perl code)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses.
+
+"""
+
+import re, htmlentitydefs
+import sgmllib, HTMLParser
+
+from _html import unescape, unescape_charref
+
+
+class NoMoreTokensError(Exception): pass
+
+class Token:
+    """Represents an HTML tag, declaration, processing instruction etc.
+
+    Behaves as both a tuple-like object (ie. iterable) and has attributes
+    .type, .data and .attrs.
+
+    >>> t = Token("starttag", "a", [("href", "http://www.python.org/")])
+    >>> t == ("starttag", "a", [("href", "http://www.python.org/")])
+    True
+    >>> (t.type, t.data) == ("starttag", "a")
+    True
+    >>> t.attrs == [("href", "http://www.python.org/")]
+    True
+
+    Public attributes
+
+    type: one of "starttag", "endtag", "startendtag", "charref", "entityref",
+     "data", "comment", "decl", "pi", after the corresponding methods of
+     HTMLParser.HTMLParser
+    data: For a tag, the tag name; otherwise, the relevant data carried by the
+     tag, as a string
+    attrs: list of (name, value) pairs representing HTML attributes
+     (or None if token does not represent an opening tag)
+
+    """
+    def __init__(self, type, data, attrs=None):
+        self.type = type
+        self.data = data
+        self.attrs = attrs
+    def __iter__(self):
+        return iter((self.type, self.data, self.attrs))
+    def __eq__(self, other):
+        type, data, attrs = other
+        if (self.type == type and
+            self.data == data and
+            self.attrs == attrs):
+            return True
+        else:
+            return False
+    def __ne__(self, other): return not self.__eq__(other)
+    def __repr__(self):
+        args = ", ".join(map(repr, [self.type, self.data, self.attrs]))
+        return self.__class__.__name__+"(%s)" % args
+
+def iter_until_exception(fn, exception, *args, **kwds):
+    while 1:
+        try:
+            yield fn(*args, **kwds)
+        except exception:
+            raise StopIteration
+
+
+class _AbstractParser:
+    chunk = 1024
+    compress_re = re.compile(r"\s+")
+    def __init__(self, fh, textify={"img": "alt", "applet": "alt"},
+                 encoding="ascii", entitydefs=None):
+        """
+        fh: file-like object (only a .read() method is required) from which to
+         read HTML to be parsed
+        textify: mapping used by .get_text() and .get_compressed_text() methods
+         to represent opening tags as text
+        encoding: encoding used to encode numeric character references by
+         .get_text() and .get_compressed_text() ("ascii" by default)
+
+        entitydefs: mapping like {"amp": "&", ...} containing HTML entity
+         definitions (a sensible default is used).  This is used to unescape
+         entities in .get_text() (and .get_compressed_text()) and attribute
+         values.  If the encoding can not represent the character, the entity
+         reference is left unescaped.  Note that entity references (both
+         numeric - e.g. &#123; or &#xabc; - and non-numeric - e.g. &amp;) are
+         unescaped in attribute values and the return value of .get_text(), but
+         not in data outside of tags.  Instead, entity references outside of
+         tags are represented as tokens.  This is a bit odd, it's true :-/
+
+        If the element name of an opening tag matches a key in the textify
+        mapping then that tag is converted to text.  The corresponding value is
+        used to specify which tag attribute to obtain the text from.  textify
+        maps from element names to either:
+
+          - an HTML attribute name, in which case the HTML attribute value is
+            used as its text value along with the element name in square
+            brackets (eg."alt text goes here[IMG]", or, if the alt attribute
+            were missing, just "[IMG]")
+          - a callable object (eg. a function) which takes a Token and returns
+            the string to be used as its text value
+
+        If textify has no key for an element name, nothing is substituted for
+        the opening tag.
+
+        Public attributes:
+
+        encoding and textify: see above
+
+        """
+        self._fh = fh
+        self._tokenstack = []  # FIFO
+        self.textify = textify
+        self.encoding = encoding
+        if entitydefs is None:
+            entitydefs = htmlentitydefs.name2codepoint
+        self._entitydefs = entitydefs
+
+    def __iter__(self): return self
+
+    def tags(self, *names):
+        return iter_until_exception(self.get_tag, NoMoreTokensError, *names)
+
+    def tokens(self, *tokentypes):
+        return iter_until_exception(self.get_token, NoMoreTokensError, *tokentypes)
+
+    def next(self):
+        try:
+            return self.get_token()
+        except NoMoreTokensError:
+            raise StopIteration()
+
+    def get_token(self, *tokentypes):
+        """Pop the next Token object from the stack of parsed tokens.
+
+        If arguments are given, they are taken to be token types in which the
+        caller is interested: tokens representing other elements will be
+        skipped.  Element names must be given in lower case.
+
+        Raises NoMoreTokensError.
+
+        """
+        while 1:
+            while self._tokenstack:
+                token = self._tokenstack.pop(0)
+                if tokentypes:
+                    if token.type in tokentypes:
+                        return token
+                else:
+                    return token
+            data = self._fh.read(self.chunk)
+            if not data:
+                raise NoMoreTokensError()
+            self.feed(data)
+
+    def unget_token(self, token):
+        """Push a Token back onto the stack."""
+        self._tokenstack.insert(0, token)
+
+    def get_tag(self, *names):
+        """Return the next Token that represents an opening or closing tag.
+
+        If arguments are given, they are taken to be element names in which the
+        caller is interested: tags representing other elements will be skipped.
+        Element names must be given in lower case.
+
+        Raises NoMoreTokensError.
+
+        """
+        while 1:
+            tok = self.get_token()
+            if tok.type not in ["starttag", "endtag", "startendtag"]:
+                continue
+            if names:
+                if tok.data in names:
+                    return tok
+            else:
+                return tok
+
+    def get_text(self, endat=None):
+        """Get some text.
+
+        endat: stop reading text at this tag (the tag is included in the
+         returned text); endtag is a tuple (type, name) where type is
+         "starttag", "endtag" or "startendtag", and name is the element name of
+         the tag (element names must be given in lower case)
+
+        If endat is not given, .get_text() will stop at the next opening or
+        closing tag, or when there are no more tokens (no exception is raised).
+        Note that .get_text() includes the text representation (if any) of the
+        opening tag, but pushes the opening tag back onto the stack.  As a
+        result, if you want to call .get_text() again, you need to call
+        .get_tag() first (unless you want an empty string returned when you
+        next call .get_text()).
+
+        Entity references are translated using the value of the entitydefs
+        constructor argument (a mapping from names to characters like that
+        provided by the standard module htmlentitydefs).  Named entity
+        references that are not in this mapping are left unchanged.
+
+        The textify attribute is used to translate opening tags into text: see
+        the class docstring.
+
+        """
+        text = []
+        tok = None
+        while 1:
+            try:
+                tok = self.get_token()
+            except NoMoreTokensError:
+                # unget last token (not the one we just failed to get)
+                if tok: self.unget_token(tok)
+                break
+            if tok.type == "data":
+                text.append(tok.data)
+            elif tok.type == "entityref":
+                t = unescape("&%s;"%tok.data, self._entitydefs, self.encoding)
+                text.append(t)
+            elif tok.type == "charref":
+                t = unescape_charref(tok.data, self.encoding)
+                text.append(t)
+            elif tok.type in ["starttag", "endtag", "startendtag"]:
+                tag_name = tok.data
+                if tok.type in ["starttag", "startendtag"]:
+                    alt = self.textify.get(tag_name)
+                    if alt is not None:
+                        if callable(alt):
+                            text.append(alt(tok))
+                        elif tok.attrs is not None:
+                            for k, v in tok.attrs:
+                                if k == alt:
+                                    text.append(v)
+                            text.append("[%s]" % tag_name.upper())
+                if endat is None or endat == (tok.type, tag_name):
+                    self.unget_token(tok)
+                    break
+        return "".join(text)
+
+    def get_compressed_text(self, *args, **kwds):
+        """
+        As .get_text(), but collapses each group of contiguous whitespace to a
+        single space character, and removes all initial and trailing
+        whitespace.
+
+        """
+        text = self.get_text(*args, **kwds)
+        text = text.strip()
+        return self.compress_re.sub(" ", text)
+
+    def handle_startendtag(self, tag, attrs):
+        self._tokenstack.append(Token("startendtag", tag, attrs))
+    def handle_starttag(self, tag, attrs):
+        self._tokenstack.append(Token("starttag", tag, attrs))
+    def handle_endtag(self, tag):
+        self._tokenstack.append(Token("endtag", tag))
+    def handle_charref(self, name):
+        self._tokenstack.append(Token("charref", name))
+    def handle_entityref(self, name):
+        self._tokenstack.append(Token("entityref", name))
+    def handle_data(self, data):
+        self._tokenstack.append(Token("data", data))
+    def handle_comment(self, data):
+        self._tokenstack.append(Token("comment", data))
+    def handle_decl(self, decl):
+        self._tokenstack.append(Token("decl", decl))
+    def unknown_decl(self, data):
+        # XXX should this call self.error instead?
+        #self.error("unknown declaration: " + `data`)
+        self._tokenstack.append(Token("decl", data))
+    def handle_pi(self, data):
+        self._tokenstack.append(Token("pi", data))
+
+    def unescape_attr(self, name):
+        return unescape(name, self._entitydefs, self.encoding)
+    def unescape_attrs(self, attrs):
+        escaped_attrs = []
+        for key, val in attrs:
+            escaped_attrs.append((key, self.unescape_attr(val)))
+        return escaped_attrs
+
+class PullParser(_AbstractParser, HTMLParser.HTMLParser):
+    def __init__(self, *args, **kwds):
+        HTMLParser.HTMLParser.__init__(self)
+        _AbstractParser.__init__(self, *args, **kwds)
+    def unescape(self, name):
+        # Use the entitydefs passed into constructor, not
+        # HTMLParser.HTMLParser's entitydefs.
+        return self.unescape_attr(name)
+
+class TolerantPullParser(_AbstractParser, sgmllib.SGMLParser):
+    def __init__(self, *args, **kwds):
+        sgmllib.SGMLParser.__init__(self)
+        _AbstractParser.__init__(self, *args, **kwds)
+    def unknown_starttag(self, tag, attrs):
+        attrs = self.unescape_attrs(attrs)
+        self._tokenstack.append(Token("starttag", tag, attrs))
+    def unknown_endtag(self, tag):
+        self._tokenstack.append(Token("endtag", tag))
+
+
+def _test():
+   import doctest, _pullparser
+   return doctest.testmod(_pullparser)
+
+if __name__ == "__main__":
+   _test()


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_pullparser.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_request.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_request.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_request.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,68 @@
+"""Integration with Python standard library module urllib2: Request class.
+
+Copyright 2004-2006 John J Lee <jjl at pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import urllib2, string
+
+from _clientcookie import request_host
+
+
+class Request(urllib2.Request):
+    def __init__(self, url, data=None, headers={},
+             origin_req_host=None, unverifiable=False):
+        urllib2.Request.__init__(self, url, data, headers)
+        self.unredirected_hdrs = {}
+
+        # All the terminology below comes from RFC 2965.
+        self.unverifiable = unverifiable
+        # Set request-host of origin transaction.
+        # The origin request-host is needed in order to decide whether
+        # unverifiable sub-requests (automatic redirects, images embedded
+        # in HTML, etc.) are to third-party hosts.  If they are, the
+        # resulting transactions might need to be conducted with cookies
+        # turned off.
+        if origin_req_host is None:
+            origin_req_host = request_host(self)
+        self.origin_req_host = origin_req_host
+
+    def get_origin_req_host(self):
+        return self.origin_req_host
+
+    def is_unverifiable(self):
+        return self.unverifiable
+
+    def add_unredirected_header(self, key, val):
+        """Add a header that will not be added to a redirected request."""
+        self.unredirected_hdrs[string.capitalize(key)] = val
+
+    def has_header(self, header_name):
+        """True iff request has named header (regular or unredirected)."""
+        if (self.headers.has_key(header_name) or
+            self.unredirected_hdrs.has_key(header_name)):
+            return True
+        return False
+
+    def get_header(self, header_name, default=None):
+        return self.headers.get(
+            header_name,
+            self.unredirected_hdrs.get(header_name, default))
+
+    def header_items(self):
+        hdrs = self.unredirected_hdrs.copy()
+        hdrs.update(self.headers)
+        return hdrs.items()
+
+    def __str__(self):
+        return "<Request for %s>" % self.get_full_url()
+
+    def get_method(self):
+        if self.has_data():
+            return "POST"
+        else:
+            return "GET"


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_request.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_urllib2.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_urllib2.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_urllib2.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,53 @@
+# urllib2 work-alike interface
+# ...from urllib2...
+from urllib2 import \
+     URLError, \
+     HTTPError, \
+     GopherError, \
+     HTTPPasswordMgr, \
+     HTTPPasswordMgrWithDefaultRealm, \
+     AbstractBasicAuthHandler, \
+     AbstractDigestAuthHandler
+# ...and from mechanize
+from _opener import OpenerDirector
+from _auth import \
+     HTTPProxyPasswordMgr, \
+     ProxyHandler, \
+     ProxyBasicAuthHandler, \
+     ProxyDigestAuthHandler, \
+     HTTPBasicAuthHandler, \
+     HTTPDigestAuthHandler
+from _urllib2_support import \
+     Request, \
+     build_opener, install_opener, urlopen, \
+     OpenerFactory, urlretrieve, \
+     RobotExclusionError
+
+# handlers...
+# ...from urllib2...
+from urllib2 import \
+     BaseHandler, \
+     HTTPDefaultErrorHandler, \
+     UnknownHandler, \
+     FTPHandler, \
+     CacheFTPHandler, \
+     FileHandler, \
+     GopherHandler
+# ...and from mechanize
+from _urllib2_support import \
+     HTTPHandler, \
+     HTTPRedirectHandler, \
+     HTTPRequestUpgradeProcessor, \
+     HTTPEquivProcessor, \
+     SeekableProcessor, \
+     HTTPCookieProcessor, \
+     HTTPRefererProcessor, \
+     HTTPRefreshProcessor, \
+     HTTPErrorProcessor, \
+     HTTPResponseDebugProcessor, \
+     HTTPRedirectDebugProcessor, \
+     HTTPRobotRulesProcessor
+import httplib
+if hasattr(httplib, 'HTTPS'):
+    from _urllib2_support import HTTPSHandler
+del httplib


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_urllib2.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_urllib2_support.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_urllib2_support.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_urllib2_support.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,718 @@
+"""Integration with Python standard library module urllib2.
+
+Also includes a redirection bugfix, support for parsing HTML HEAD blocks for
+the META HTTP-EQUIV tag contents, and following Refresh header redirects.
+
+Copyright 2002-2006 John J Lee <jjl at pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import copy, time, tempfile, htmlentitydefs, re, logging, types, \
+       string, socket, urlparse, urllib2, urllib, httplib, sgmllib
+from urllib2 import URLError, HTTPError, BaseHandler
+from cStringIO import StringIO
+try:
+    import threading as _threading
+except ImportError:
+    import dummy_threading as _threading
+
+import _opener
+from _request import Request
+from _util import isstringlike, startswith, \
+     getheaders, closeable_response, response_seek_wrapper
+from _html import unescape, unescape_charref
+from _headersutil import is_html
+from _clientcookie import CookieJar, request_host
+
+debug = logging.getLogger("mechanize.cookies").debug
+
+
+CHUNK = 1024  # size of chunks fed to HTML HEAD parser, in bytes
+DEFAULT_ENCODING = 'latin-1'
+
+
+# This fixes a bug in urllib2 as of Python 2.1.3 and 2.2.2
+#  (http://www.python.org/sf/549151)
+# 2.2.3 is broken here (my fault!), 2.3 is fixed.
+class HTTPRedirectHandler(BaseHandler):
+    # maximum number of redirections to any single URL
+    # this is needed because of the state that cookies introduce
+    max_repeats = 4
+    # maximum total number of redirections (regardless of URL) before
+    # assuming we're in a loop
+    max_redirections = 10
+
+    # Implementation notes:
+
+    # To avoid the server sending us into an infinite loop, the request
+    # object needs to track what URLs we have already seen.  Do this by
+    # adding a handler-specific attribute to the Request object.  The value
+    # of the dict is used to count the number of times the same URL has
+    # been visited.  This is needed because visiting the same URL twice
+    # does not necessarily imply a loop, thanks to state introduced by
+    # cookies.
+
+    # Always unhandled redirection codes:
+    # 300 Multiple Choices: should not handle this here.
+    # 304 Not Modified: no need to handle here: only of interest to caches
+    #     that do conditional GETs
+    # 305 Use Proxy: probably not worth dealing with here
+    # 306 Unused: what was this for in the previous versions of protocol??
+
+    def redirect_request(self, newurl, req, fp, code, msg, headers):
+        """Return a Request or None in response to a redirect.
+
+        This is called by the http_error_30x methods when a redirection
+        response is received.  If a redirection should take place, return a
+        new Request to allow http_error_30x to perform the redirect;
+        otherwise, return None to indicate that an HTTPError should be
+        raised.
+
+        """
+        if code in (301, 302, 303, "refresh") or \
+               (code == 307 and not req.has_data()):
+            # Strictly (according to RFC 2616), 301 or 302 in response to
+            # a POST MUST NOT cause a redirection without confirmation
+            # from the user (of urllib2, in this case).  In practice,
+            # essentially all clients do redirect in this case, so we do
+            # the same.
+            return Request(newurl,
+                           headers=req.headers,
+                           origin_req_host=req.get_origin_req_host(),
+                           unverifiable=True)
+        else:
+            raise HTTPError(req.get_full_url(), code, msg, headers, fp)
+
+    def http_error_302(self, req, fp, code, msg, headers):
+        # Some servers (incorrectly) return multiple Location headers
+        # (so probably same goes for URI).  Use first header.
+        if headers.has_key('location'):
+            newurl = getheaders(headers, 'location')[0]
+        elif headers.has_key('uri'):
+            newurl = getheaders(headers, 'uri')[0]
+        else:
+            return
+        newurl = urlparse.urljoin(req.get_full_url(), newurl)
+
+        # XXX Probably want to forget about the state of the current
+        # request, although that might interact poorly with other
+        # handlers that also use handler-specific request attributes
+        new = self.redirect_request(newurl, req, fp, code, msg, headers)
+        if new is None:
+            return
+
+        # loop detection
+        # .redirect_dict has a key url if url was previously visited.
+        if hasattr(req, 'redirect_dict'):
+            visited = new.redirect_dict = req.redirect_dict
+            if (visited.get(newurl, 0) >= self.max_repeats or
+                len(visited) >= self.max_redirections):
+                raise HTTPError(req.get_full_url(), code,
+                                self.inf_msg + msg, headers, fp)
+        else:
+            visited = new.redirect_dict = req.redirect_dict = {}
+        visited[newurl] = visited.get(newurl, 0) + 1
+
+        # Don't close the fp until we are sure that we won't use it
+        # with HTTPError.  
+        fp.read()
+        fp.close()
+
+        return self.parent.open(new)
+
+    http_error_301 = http_error_303 = http_error_307 = http_error_302
+    http_error_refresh = http_error_302
+
+    inf_msg = "The HTTP server returned a redirect error that would " \
+              "lead to an infinite loop.\n" \
+              "The last 30x error message was:\n"
+
+
+class HTTPRequestUpgradeProcessor(BaseHandler):
+    # upgrade urllib2.Request to this module's Request
+    # yuck!
+    handler_order = 0  # before anything else
+
+    def http_request(self, request):
+        if not hasattr(request, "add_unredirected_header"):
+            newrequest = Request(request._Request__original, request.data,
+                                 request.headers)
+            try: newrequest.origin_req_host = request.origin_req_host
+            except AttributeError: pass
+            try: newrequest.unverifiable = request.unverifiable
+            except AttributeError: pass
+            request = newrequest
+        return request
+
+    https_request = http_request
+
+# XXX would self.reset() work, instead of raising this exception?
+class EndOfHeadError(Exception): pass
+class AbstractHeadParser:
+    # only these elements are allowed in or before HEAD of document
+    head_elems = ("html", "head",
+                  "title", "base",
+                  "script", "style", "meta", "link", "object")
+    _entitydefs = htmlentitydefs.name2codepoint
+    _encoding = DEFAULT_ENCODING
+
+    def __init__(self):
+        self.http_equiv = []
+
+    def start_meta(self, attrs):
+        http_equiv = content = None
+        for key, value in attrs:
+            if key == "http-equiv":
+                http_equiv = self.unescape_attr_if_required(value)
+            elif key == "content":
+                content = self.unescape_attr_if_required(value)
+        if http_equiv is not None:
+            self.http_equiv.append((http_equiv, content))
+
+    def end_head(self):
+        raise EndOfHeadError()
+
+    def handle_entityref(self, name):
+        #debug("%s", name)
+        self.handle_data(unescape(
+            '&%s;' % name, self._entitydefs, self._encoding))
+
+    def handle_charref(self, name):
+        #debug("%s", name)
+        self.handle_data(unescape_charref(name, self._encoding))
+
+    def unescape_attr(self, name):
+        #debug("%s", name)
+        return unescape(name, self._entitydefs, self._encoding)
+
+    def unescape_attrs(self, attrs):
+        #debug("%s", attrs)
+        escaped_attrs = {}
+        for key, val in attrs.items():
+            escaped_attrs[key] = self.unescape_attr(val)
+        return escaped_attrs
+
+    def unknown_entityref(self, ref):
+        self.handle_data("&%s;" % ref)
+
+    def unknown_charref(self, ref):
+        self.handle_data("&#%s;" % ref)
+
+
+try:
+    import HTMLParser
+except ImportError:
+    pass
+else:
+    class XHTMLCompatibleHeadParser(AbstractHeadParser,
+                                    HTMLParser.HTMLParser):
+        def __init__(self):
+            HTMLParser.HTMLParser.__init__(self)
+            AbstractHeadParser.__init__(self)
+
+        def handle_starttag(self, tag, attrs):
+            if tag not in self.head_elems:
+                raise EndOfHeadError()
+            try:
+                method = getattr(self, 'start_' + tag)
+            except AttributeError:
+                try:
+                    method = getattr(self, 'do_' + tag)
+                except AttributeError:
+                    pass # unknown tag
+                else:
+                    method(attrs)
+            else:
+                method(attrs)
+
+        def handle_endtag(self, tag):
+            if tag not in self.head_elems:
+                raise EndOfHeadError()
+            try:
+                method = getattr(self, 'end_' + tag)
+            except AttributeError:
+                pass # unknown tag
+            else:
+                method()
+
+        def unescape(self, name):
+            # Use the entitydefs passed into constructor, not
+            # HTMLParser.HTMLParser's entitydefs.
+            return self.unescape_attr(name)
+
+        def unescape_attr_if_required(self, name):
+            return name  # HTMLParser.HTMLParser already did it
+
+class HeadParser(AbstractHeadParser, sgmllib.SGMLParser):
+
+    def _not_called(self):
+        assert False
+
+    def __init__(self):
+        sgmllib.SGMLParser.__init__(self)
+        AbstractHeadParser.__init__(self)
+
+    def handle_starttag(self, tag, method, attrs):
+        if tag not in self.head_elems:
+            raise EndOfHeadError()
+        if tag == "meta":
+            method(attrs)
+
+    def unknown_starttag(self, tag, attrs):
+        self.handle_starttag(tag, self._not_called, attrs)
+
+    def handle_endtag(self, tag, method):
+        if tag in self.head_elems:
+            method()
+        else:
+            raise EndOfHeadError()
+
+    def unescape_attr_if_required(self, name):
+        return self.unescape_attr(name)
+
+def parse_head(fileobj, parser):
+    """Return a list of key, value pairs."""
+    while 1:
+        data = fileobj.read(CHUNK)
+        try:
+            parser.feed(data)
+        except EndOfHeadError:
+            break
+        if len(data) != CHUNK:
+            # this should only happen if there is no HTML body, or if
+            # CHUNK is big
+            break
+    return parser.http_equiv
+
+class HTTPEquivProcessor(BaseHandler):
+    """Append META HTTP-EQUIV headers to regular HTTP headers."""
+
+    handler_order = 300  # before handlers that look at HTTP headers
+
+    def __init__(self, head_parser_class=HeadParser,
+                 i_want_broken_xhtml_support=False,
+                 ):
+        self.head_parser_class = head_parser_class
+        self._allow_xhtml = i_want_broken_xhtml_support
+
+    def http_response(self, request, response):
+        if not hasattr(response, "seek"):
+            response = response_seek_wrapper(response)
+        headers = response.info()
+        url = response.geturl()
+        ct_hdrs = getheaders(response.info(), "content-type")
+        if is_html(ct_hdrs, url, self._allow_xhtml):
+            try:
+                try:
+                    html_headers = parse_head(response, self.head_parser_class())
+                finally:
+                    response.seek(0)
+            except (HTMLParser.HTMLParseError,
+                    sgmllib.SGMLParseError):
+                pass
+            else:
+                for hdr, val in html_headers:
+                    # rfc822.Message interprets this as appending, not clobbering
+                    headers[hdr] = val
+        return response
+
+    https_response = http_response
+
+class SeekableProcessor(BaseHandler):
+    """Make responses seekable."""
+
+    def any_response(self, request, response):
+        if not hasattr(response, "seek"):
+            return response_seek_wrapper(response)
+        return response
+
+class HTTPCookieProcessor(BaseHandler):
+    """Handle HTTP cookies.
+
+    Public attributes:
+
+    cookiejar: CookieJar instance
+
+    """
+    def __init__(self, cookiejar=None):
+        if cookiejar is None:
+            cookiejar = CookieJar()
+        self.cookiejar = cookiejar
+
+    def http_request(self, request):
+        self.cookiejar.add_cookie_header(request)
+        return request
+
+    def http_response(self, request, response):
+        self.cookiejar.extract_cookies(response, request)
+        return response
+
+    https_request = http_request
+    https_response = http_response
+
+try:
+    import robotparser
+except ImportError:
+    pass
+else:
+    class RobotExclusionError(urllib2.HTTPError):
+        def __init__(self, request, *args):
+            apply(urllib2.HTTPError.__init__, (self,)+args)
+            self.request = request
+
+    class HTTPRobotRulesProcessor(BaseHandler):
+        # before redirections, after everything else
+        handler_order = 800
+
+        try:
+            from httplib import HTTPMessage
+        except:
+            from mimetools import Message
+            http_response_class = Message
+        else:
+            http_response_class = HTTPMessage
+
+        def __init__(self, rfp_class=robotparser.RobotFileParser):
+            self.rfp_class = rfp_class
+            self.rfp = None
+            self._host = None
+
+        def http_request(self, request):
+            host = request.get_host()
+            scheme = request.get_type()
+            if host != self._host:
+                self.rfp = self.rfp_class()
+                self.rfp.set_url(scheme+"://"+host+"/robots.txt")
+                self.rfp.read()
+                self._host = host
+
+            ua = request.get_header("User-agent", "")
+            if self.rfp.can_fetch(ua, request.get_full_url()):
+                return request
+            else:
+                msg = "request disallowed by robots.txt"
+                raise RobotExclusionError(
+                    request,
+                    request.get_full_url(),
+                    403, msg,
+                    self.http_response_class(StringIO()), StringIO(msg))
+
+        https_request = http_request
+
+class HTTPRefererProcessor(BaseHandler):
+    """Add Referer header to requests.
+
+    This only makes sense if you use each RefererProcessor for a single
+    chain of requests only (so, for example, if you use a single
+    HTTPRefererProcessor to fetch a series of URLs extracted from a single
+    page, this will break).
+
+    There's a proper implementation of this in module mechanize.
+
+    """
+    def __init__(self):
+        self.referer = None
+
+    def http_request(self, request):
+        if ((self.referer is not None) and
+            not request.has_header("Referer")):
+            request.add_unredirected_header("Referer", self.referer)
+        return request
+
+    def http_response(self, request, response):
+        self.referer = response.geturl()
+        return response
+
+    https_request = http_request
+    https_response = http_response
+
+class HTTPResponseDebugProcessor(BaseHandler):
+    handler_order = 900  # before redirections, after everything else
+
+    def http_response(self, request, response):
+        if not hasattr(response, "seek"):
+            response = response_seek_wrapper(response)
+        info = getLogger("mechanize.http_responses").info
+        try:
+            info(response.read())
+        finally:
+            response.seek(0)
+        info("*****************************************************")
+        return response
+
+    https_response = http_response
+
+class HTTPRedirectDebugProcessor(BaseHandler):
+    def http_request(self, request):
+        if hasattr(request, "redirect_dict"):
+            info = getLogger("mechanize.http_redirects").info
+            info("redirecting to %s", request.get_full_url())
+        return request
+
+class HTTPRefreshProcessor(BaseHandler):
+    """Perform HTTP Refresh redirections.
+
+    Note that if a non-200 HTTP code has occurred (for example, a 30x
+    redirect), this processor will do nothing.
+
+    By default, only zero-time Refresh headers are redirected.  Use the
+    max_time attribute / constructor argument to allow Refresh with longer
+    pauses.  Use the honor_time attribute / constructor argument to control
+    whether the requested pause is honoured (with a time.sleep()) or
+    skipped in favour of immediate redirection.
+
+    Public attributes:
+
+    max_time: see above
+    honor_time: see above
+
+    """
+    handler_order = 1000
+
+    def __init__(self, max_time=0, honor_time=True):
+        self.max_time = max_time
+        self.honor_time = honor_time
+
+    def http_response(self, request, response):
+        code, msg, hdrs = response.code, response.msg, response.info()
+
+        if code == 200 and hdrs.has_key("refresh"):
+            refresh = getheaders(hdrs, "refresh")[0]
+            ii = string.find(refresh, ";")
+            if ii != -1:
+                pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
+                jj = string.find(newurl_spec, "=")
+                if jj != -1:
+                    key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
+                if key.strip().lower() != "url":
+                    debug("bad Refresh header: %r" % refresh)
+                    return response
+            else:
+                pause, newurl = float(refresh), response.geturl()
+            if (self.max_time is None) or (pause <= self.max_time):
+                if pause > 1E-3 and self.honor_time:
+                    time.sleep(pause)
+                hdrs["location"] = newurl
+                # hardcoded http is NOT a bug
+                response = self.parent.error(
+                    "http", request, response,
+                    "refresh", msg, hdrs)
+
+        return response
+
+    https_response = http_response
+
+class HTTPErrorProcessor(BaseHandler):
+    """Process HTTP error responses.
+
+    The purpose of this handler is to to allow other response processors a
+    look-in by removing the call to parent.error() from
+    AbstractHTTPHandler.
+
+    For non-200 error codes, this just passes the job on to the
+    Handler.<proto>_error_<code> methods, via the OpenerDirector.error
+    method.  Eventually, urllib2.HTTPDefaultErrorHandler will raise an
+    HTTPError if no other handler handles the error.
+
+    """
+    handler_order = 1000  # after all other processors
+
+    def http_response(self, request, response):
+        code, msg, hdrs = response.code, response.msg, response.info()
+
+        if code != 200:
+            # hardcoded http is NOT a bug
+            response = self.parent.error(
+                "http", request, response, code, msg, hdrs)
+
+        return response
+
+    https_response = http_response
+
+
+class AbstractHTTPHandler(BaseHandler):
+
+    def __init__(self, debuglevel=0):
+        self._debuglevel = debuglevel
+
+    def set_http_debuglevel(self, level):
+        self._debuglevel = level
+
+    def do_request_(self, request):
+        host = request.get_host()
+        if not host:
+            raise URLError('no host given')
+
+        if request.has_data():  # POST
+            data = request.get_data()
+            if not request.has_header('Content-type'):
+                request.add_unredirected_header(
+                    'Content-type',
+                    'application/x-www-form-urlencoded')
+
+        scheme, sel = urllib.splittype(request.get_selector())
+        sel_host, sel_path = urllib.splithost(sel)
+        if not request.has_header('Host'):
+            request.add_unredirected_header('Host', sel_host or host)
+        for name, value in self.parent.addheaders:
+            name = string.capitalize(name)
+            if not request.has_header(name):
+                request.add_unredirected_header(name, value)
+
+        return request
+
+    def do_open(self, http_class, req):
+        """Return an addinfourl object for the request, using http_class.
+
+        http_class must implement the HTTPConnection API from httplib.
+        The addinfourl return value is a file-like object.  It also
+        has methods and attributes including:
+            - info(): return a mimetools.Message object for the headers
+            - geturl(): return the original request URL
+            - code: HTTP status code
+        """
+        host = req.get_host()
+        if not host:
+            raise URLError('no host given')
+
+        h = http_class(host) # will parse host:port
+        h.set_debuglevel(self._debuglevel)
+
+        headers = req.headers.copy()
+        headers.update(req.unredirected_hdrs)
+        # We want to make an HTTP/1.1 request, but the addinfourl
+        # class isn't prepared to deal with a persistent connection.
+        # It will try to read all remaining data from the socket,
+        # which will block while the server waits for the next request.
+        # So make sure the connection gets closed after the (only)
+        # request.
+        headers["Connection"] = "close"
+        try:
+            h.request(req.get_method(), req.get_selector(), req.data, headers)
+            r = h.getresponse()
+        except socket.error, err: # XXX what error?
+            raise URLError(err)
+
+        # Pick apart the HTTPResponse object to get the addinfourl
+        # object initialized properly.
+
+        # Wrap the HTTPResponse object in socket's file object adapter
+        # for Windows.  That adapter calls recv(), so delegate recv()
+        # to read().  This weird wrapping allows the returned object to
+        # have readline() and readlines() methods.
+
+        # XXX It might be better to extract the read buffering code
+        # out of socket._fileobject() and into a base class.
+
+        r.recv = r.read
+        fp = socket._fileobject(r, 'rb', -1)
+
+        resp = closeable_response(fp, r.msg, req.get_full_url(),
+                                  r.status, r.reason)
+        return resp
+
+
+class HTTPHandler(AbstractHTTPHandler):
+    def http_open(self, req):
+        return self.do_open(httplib.HTTPConnection, req)
+
+    http_request = AbstractHTTPHandler.do_request_
+
+if hasattr(httplib, 'HTTPS'):
+    class HTTPSHandler(AbstractHTTPHandler):
+        def https_open(self, req):
+            return self.do_open(httplib.HTTPSConnection, req)
+
+        https_request = AbstractHTTPHandler.do_request_
+
+class OpenerFactory:
+    """This class's interface is quite likely to change."""
+
+    default_classes = [
+        # handlers
+        urllib2.ProxyHandler,
+        urllib2.UnknownHandler,
+        HTTPHandler,  # from this module (derived from new AbstractHTTPHandler)
+        urllib2.HTTPDefaultErrorHandler,
+        HTTPRedirectHandler,  # from this module (bugfixed)
+        urllib2.FTPHandler,
+        urllib2.FileHandler,
+        # processors
+        HTTPRequestUpgradeProcessor,
+        HTTPCookieProcessor,
+        HTTPErrorProcessor
+        ]
+    handlers = []
+    replacement_handlers = []
+
+    def __init__(self, klass=_opener.OpenerDirector):
+        self.klass = klass
+
+    def build_opener(self, *handlers):
+        """Create an opener object from a list of handlers and processors.
+
+        The opener will use several default handlers and processors, including
+        support for HTTP and FTP.
+
+        If any of the handlers passed as arguments are subclasses of the
+        default handlers, the default handlers will not be used.
+
+        """
+        opener = self.klass()
+        default_classes = list(self.default_classes)
+        if hasattr(httplib, 'HTTPS'):
+            default_classes.append(HTTPSHandler)
+        skip = []
+        for klass in default_classes:
+            for check in handlers:
+                if type(check) == types.ClassType:
+                    if issubclass(check, klass):
+                        skip.append(klass)
+                elif type(check) == types.InstanceType:
+                    if isinstance(check, klass):
+                        skip.append(klass)
+        for klass in skip:
+            default_classes.remove(klass)
+
+        for klass in default_classes:
+            opener.add_handler(klass())
+        for h in handlers:
+            if type(h) == types.ClassType:
+                h = h()
+            opener.add_handler(h)
+
+        return opener
+
+build_opener = OpenerFactory().build_opener
+
+_opener = None
+urlopen_lock = _threading.Lock()
+def urlopen(url, data=None):
+    global _opener
+    if _opener is None:
+        urlopen_lock.acquire()
+        try:
+            if _opener is None:
+                _opener = build_opener()
+        finally:
+            urlopen_lock.release()
+    return _opener.open(url, data)
+
+def urlretrieve(url, filename=None, reporthook=None, data=None):
+    global _opener
+    if _opener is None:
+        urlopen_lock.acquire()
+        try:
+            if _opener is None:
+                _opener = build_opener()
+        finally:
+            urlopen_lock.release()
+    return _opener.retrieve(url, filename, reporthook, data)
+
+def install_opener(opener):
+    global _opener
+    _opener = opener


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_urllib2_support.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_useragent.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_useragent.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_useragent.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,323 @@
+"""Convenient HTTP UserAgent class.
+
+This is a subclass of urllib2.OpenerDirector.
+
+
+Copyright 2003-2006 John J. Lee <jjl at pobox.com>
+
+This code is free software; you can redistribute it and/or modify it under
+the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+import sys, warnings, urllib2
+
+from _opener import OpenerDirector
+
+import _urllib2
+import _auth
+import _gzip
+
+
+class HTTPRefererProcessor(_urllib2.BaseHandler):
+    def http_request(self, request):
+        # See RFC 2616 14.36.  The only times we know the source of the
+        # request URI has a URI associated with it are redirect, and
+        # Browser.click() / Browser.submit() / Browser.follow_link().
+        # Otherwise, it's the user's job to add any Referer header before
+        # .open()ing.
+        if hasattr(request, "redirect_dict"):
+            request = self.parent._add_referer_header(
+                request, origin_request=False)
+        return request
+
+    https_request = http_request
+
+
+class UserAgent(OpenerDirector):
+    """Convenient user-agent class.
+
+    Do not use .add_handler() to add a handler for something already dealt with
+    by this code.
+
+    Public attributes:
+
+    addheaders: list of (name, value) pairs specifying headers to send with
+     every request, unless they are overridden in the Request instance.
+
+     >>> ua = UserAgent()
+     >>> ua.addheaders = [
+     ...  ("User-agent", "Mozilla/5.0 (compatible)"),
+     ...  ("From", "responsible.person at example.com")]
+
+    """
+
+    handler_classes = {
+        # scheme handlers
+        "http": _urllib2.HTTPHandler,
+        # CacheFTPHandler is buggy, at least in 2.3, so we don't use it
+        "ftp": _urllib2.FTPHandler,
+        "file": _urllib2.FileHandler,
+        "gopher": _urllib2.GopherHandler,
+
+        # other handlers
+        "_unknown": _urllib2.UnknownHandler,
+        # HTTP{S,}Handler depend on HTTPErrorProcessor too
+        "_http_error": _urllib2.HTTPErrorProcessor,
+        "_http_request_upgrade": _urllib2.HTTPRequestUpgradeProcessor,
+        "_http_default_error": _urllib2.HTTPDefaultErrorHandler,
+
+        # feature handlers
+        "_basicauth": _urllib2.HTTPBasicAuthHandler,
+        "_digestauth": _urllib2.HTTPDigestAuthHandler,
+        "_redirect": _urllib2.HTTPRedirectHandler,
+        "_cookies": _urllib2.HTTPCookieProcessor,
+        "_refresh": _urllib2.HTTPRefreshProcessor,
+        "_referer": HTTPRefererProcessor,  # from this module, note
+        "_equiv": _urllib2.HTTPEquivProcessor,
+        "_seek": _urllib2.SeekableProcessor,
+        "_proxy": _urllib2.ProxyHandler,
+        "_proxy_basicauth": _urllib2.ProxyBasicAuthHandler,
+        "_proxy_digestauth": _urllib2.ProxyDigestAuthHandler,
+        "_robots": _urllib2.HTTPRobotRulesProcessor,
+        "_gzip": _gzip.HTTPGzipProcessor,  # experimental!
+
+        # debug handlers
+        "_debug_redirect": _urllib2.HTTPRedirectDebugProcessor,
+        "_debug_response_body": _urllib2.HTTPResponseDebugProcessor,
+        }
+
+    default_schemes = ["http", "ftp", "file", "gopher"]
+    default_others = ["_unknown", "_http_error", "_http_request_upgrade",
+                      "_http_default_error",
+                      ]
+    default_features = ["_redirect", "_cookies", "_referer",
+                        "_refresh", "_equiv",
+                        "_basicauth", "_digestauth",
+                        "_proxy", "_proxy_basicauth", "_proxy_digestauth",
+                        "_seek", "_robots",
+                        ]
+    if hasattr(_urllib2, 'HTTPSHandler'):
+        handler_classes["https"] = _urllib2.HTTPSHandler
+        default_schemes.append("https")
+
+    def __init__(self):
+        OpenerDirector.__init__(self)
+
+        ua_handlers = self._ua_handlers = {}
+        for scheme in (self.default_schemes+
+                       self.default_others+
+                       self.default_features):
+            klass = self.handler_classes[scheme]
+            ua_handlers[scheme] = klass()
+        for handler in ua_handlers.itervalues():
+            self.add_handler(handler)
+
+        # Yuck.
+        # Ensure correct default constructor args were passed to
+        # HTTPRefererProcessor and HTTPEquivProcessor.
+        if "_refresh" in ua_handlers:
+            self.set_handle_refresh(True)
+        if "_equiv" in ua_handlers:
+            self.set_handle_equiv(True)
+        # Ensure default password managers are installed.
+        pm = ppm = None
+        if "_basicauth" in ua_handlers or "_digestauth" in ua_handlers:
+            pm = _urllib2.HTTPPasswordMgrWithDefaultRealm()
+        if ("_proxy_basicauth" in ua_handlers or
+            "_proxy_digestauth" in ua_handlers):
+            ppm = _auth.HTTPProxyPasswordMgr()
+        self.set_password_manager(pm)
+        self.set_proxy_password_manager(ppm)
+
+        # special case, requires extra support from mechanize.Browser
+        self._handle_referer = True
+
+    def close(self):
+        OpenerDirector.close(self)
+        self._ua_handlers = None
+
+    # XXX
+##     def set_timeout(self, timeout):
+##         self._timeout = timeout
+##     def set_http_connection_cache(self, conn_cache):
+##         self._http_conn_cache = conn_cache
+##     def set_ftp_connection_cache(self, conn_cache):
+##         # XXX ATM, FTP has cache as part of handler; should it be separate?
+##         self._ftp_conn_cache = conn_cache
+
+    def set_handled_schemes(self, schemes):
+        """Set sequence of URL scheme (protocol) strings.
+
+        For example: ua.set_handled_schemes(["http", "ftp"])
+
+        If this fails (with ValueError) because you've passed an unknown
+        scheme, the set of handled schemes will not be changed.
+
+        """
+        want = {}
+        for scheme in schemes:
+            if scheme.startswith("_"):
+                raise ValueError("not a scheme '%s'" % scheme)
+            if scheme not in self.handler_classes:
+                raise ValueError("unknown scheme '%s'")
+            want[scheme] = None
+
+        # get rid of scheme handlers we don't want
+        for scheme, oldhandler in self._ua_handlers.items():
+            if scheme.startswith("_"): continue  # not a scheme handler
+            if scheme not in want:
+                self._replace_handler(scheme, None)
+            else:
+                del want[scheme]  # already got it
+        # add the scheme handlers that are missing
+        for scheme in want.keys():
+            self._set_handler(scheme, True)
+
+    def _add_referer_header(self, request, origin_request=True):
+        raise NotImplementedError(
+            "this class can't do HTTP Referer: use mechanize.Browser instead")
+
+    def set_cookiejar(self, cookiejar):
+        """Set a mechanize.CookieJar, or None."""
+        self._set_handler("_cookies", obj=cookiejar)
+
+    # XXX could use Greg Stein's httpx for some of this instead?
+    # or httplib2??
+    def set_proxies(self, proxies):
+        """Set a dictionary mapping URL scheme to proxy specification, or None.
+
+        e.g. {"http": "joe:password at myproxy.example.com:3128",
+              "ftp": "proxy.example.com"}
+
+        """
+        self._set_handler("_proxy", obj=proxies)
+
+    def add_password(self, url, user, password, realm=None):
+        self._password_manager.add_password(realm, url, user, password)
+    def add_proxy_password(self, user, password, hostport=None, realm=None):
+        self._proxy_password_manager.add_password(
+            realm, hostport, user, password)
+
+    # the following are rarely useful -- use add_password / add_proxy_password
+    # instead
+    def set_password_manager(self, password_manager):
+        """Set a mechanize.HTTPPasswordMgrWithDefaultRealm, or None."""
+        self._password_manager = password_manager
+        self._set_handler("_basicauth", obj=password_manager)
+        self._set_handler("_digestauth", obj=password_manager)
+    def set_proxy_password_manager(self, password_manager):
+        """Set a mechanize.HTTPProxyPasswordMgr, or None."""
+        self._proxy_password_manager = password_manager
+        self._set_handler("_proxy_basicauth", obj=password_manager)
+        self._set_handler("_proxy_digestauth", obj=password_manager)
+
+    # these methods all take a boolean parameter
+    def set_handle_robots(self, handle):
+        """Set whether to observe rules from robots.txt."""
+        self._set_handler("_robots", handle)
+    def set_handle_redirect(self, handle):
+        """Set whether to handle HTTP 30x redirections."""
+        self._set_handler("_redirect", handle)
+    def set_handle_refresh(self, handle, max_time=None, honor_time=True):
+        """Set whether to handle HTTP Refresh headers."""
+        self._set_handler("_refresh", handle, constructor_kwds=
+                          {"max_time": max_time, "honor_time": honor_time})
+    def set_handle_equiv(self, handle, head_parser_class=None):
+        """Set whether to treat HTML http-equiv headers like HTTP headers.
+
+        Response objects will be .seek()able if this is set.
+
+        """
+        if head_parser_class is not None:
+            constructor_kwds = {"head_parser_class": head_parser_class}
+        else:
+            constructor_kwds={}
+        self._set_handler("_equiv", handle, constructor_kwds=constructor_kwds)
+    def set_handle_referer(self, handle):
+        """Set whether to add Referer header to each request.
+
+        This base class does not implement this feature (so don't turn this on
+        if you're using this base class directly), but the subclass
+        mechanize.Browser does.
+
+        """
+        self._set_handler("_referer", handle)
+        self._handle_referer = bool(handle)
+    def set_handle_gzip(self, handle):
+        """Handle gzip transfer encoding.
+
+        """
+        if handle:
+            warnings.warn(
+                "gzip transfer encoding is experimental!", stacklevel=2)
+        self._set_handler("_gzip", handle)
+    def set_debug_redirects(self, handle):
+        """Log information about HTTP redirects (including refreshes).
+
+        Logging is performed using module logging.  The logger name is
+        "mechanize.http_redirects".  To actually print some debug output,
+        eg:
+
+        import sys, logging
+        logger = logging.getLogger("mechanize.http_redirects")
+        logger.addHandler(logging.StreamHandler(sys.stdout))
+        logger.setLevel(logging.INFO)
+
+        Other logger names relevant to this module:
+
+        "mechanize.http_responses"
+        "mechanize.cookies" (or "cookielib" if running Python 2.4)
+
+        To turn on everything:
+
+        import sys, logging
+        logger = logging.getLogger("mechanize")
+        logger.addHandler(logging.StreamHandler(sys.stdout))
+        logger.setLevel(logging.INFO)
+
+        """
+        self._set_handler("_debug_redirect", handle)
+    def set_debug_responses(self, handle):
+        """Log HTTP response bodies.
+
+        See docstring for .set_debug_redirects() for details of logging.
+
+        """
+        self._set_handler("_debug_response_body", handle)
+    def set_debug_http(self, handle):
+        """Print HTTP headers to sys.stdout."""
+        level = int(bool(handle))
+        for scheme in "http", "https":
+            h = self._ua_handlers.get(scheme)
+            if h is not None:
+                h.set_http_debuglevel(level)
+
+    def _set_handler(self, name, handle=None, obj=None,
+                     constructor_args=(), constructor_kwds={}):
+        if handle is None:
+            handle = obj is not None
+        if handle:
+            handler_class = self.handler_classes[name]
+            if obj is not None:
+                newhandler = handler_class(obj)
+            else:
+                newhandler = handler_class(*constructor_args, **constructor_kwds)
+        else:
+            newhandler = None
+        self._replace_handler(name, newhandler)
+
+    def _replace_handler(self, name, newhandler=None):
+        # first, if handler was previously added, remove it
+        if name is not None:
+            handler = self._ua_handlers.get(name)
+            if handler:
+                try:
+                    self.handlers.remove(handler)
+                except ValueError:
+                    pass
+        # then add the replacement, if any
+        if newhandler is not None:
+            self.add_handler(newhandler)
+            self._ua_handlers[name] = newhandler


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_useragent.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_util.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_util.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_util.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -0,0 +1,650 @@
+"""Python backwards-compat., date/time routines, seekable file object wrapper.
+
+ Copyright 2002-2006 John J Lee <jjl at pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import re, string, time, copy, urllib, mimetools
+from types import TupleType
+from cStringIO import StringIO
+
+def startswith(string, initial):
+    if len(initial) > len(string): return False
+    return string[:len(initial)] == initial
+
+def endswith(string, final):
+    if len(final) > len(string): return False
+    return string[-len(final):] == final
+
+def isstringlike(x):
+    try: x+""
+    except: return False
+    else: return True
+
+SPACE_DICT = {}
+for c in string.whitespace:
+    SPACE_DICT[c] = None
+del c
+def isspace(string):
+    for c in string:
+        if not SPACE_DICT.has_key(c): return False
+    return True
+
+## def caller():
+##     try:
+##         raise SyntaxError
+##     except:
+##         import sys
+##     return sys.exc_traceback.tb_frame.f_back.f_back.f_code.co_name
+
+
+# this is here rather than in _HeadersUtil as it's just for
+# compatibility with old Python versions, rather than entirely new code
+def getheaders(msg, name):
+    """Get all values for a header.
+
+    This returns a list of values for headers given more than once; each
+    value in the result list is stripped in the same way as the result of
+    getheader().  If the header is not given, return an empty list.
+    """
+    result = []
+    current = ''
+    have_header = 0
+    for s in msg.getallmatchingheaders(name):
+        if isspace(s[0]):
+            if current:
+                current = "%s\n %s" % (current, string.strip(s))
+            else:
+                current = string.strip(s)
+        else:
+            if have_header:
+                result.append(current)
+            current = string.strip(s[string.find(s, ":") + 1:])
+            have_header = 1
+    if have_header:
+        result.append(current)
+    return result
+
+from calendar import timegm
+
+# Date/time conversion routines for formats used by the HTTP protocol.
+
+EPOCH = 1970
+def my_timegm(tt):
+    year, month, mday, hour, min, sec = tt[:6]
+    if ((year >= EPOCH) and (1 <= month <= 12) and (1 <= mday <= 31) and
+        (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
+        return timegm(tt)
+    else:
+        return None
+
+days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
+months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
+          "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
+months_lower = []
+for month in months: months_lower.append(string.lower(month))
+
+
+def time2isoz(t=None):
+    """Return a string representing time in seconds since epoch, t.
+
+    If the function is called without an argument, it will use the current
+    time.
+
+    The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
+    representing Universal Time (UTC, aka GMT).  An example of this format is:
+
+    1994-11-24 08:49:37Z
+
+    """
+    if t is None: t = time.time()
+    year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
+    return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
+        year, mon, mday, hour, min, sec)
+
+def time2netscape(t=None):
+    """Return a string representing time in seconds since epoch, t.
+
+    If the function is called without an argument, it will use the current
+    time.
+
+    The format of the returned string is like this:
+
+    Wed, DD-Mon-YYYY HH:MM:SS GMT
+
+    """
+    if t is None: t = time.time()
+    year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
+    return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
+        days[wday], mday, months[mon-1], year, hour, min, sec)
+
+
+UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
+
+timezone_re = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
+def offset_from_tz_string(tz):
+    offset = None
+    if UTC_ZONES.has_key(tz):
+        offset = 0
+    else:
+        m = timezone_re.search(tz)
+        if m:
+            offset = 3600 * int(m.group(2))
+            if m.group(3):
+                offset = offset + 60 * int(m.group(3))
+            if m.group(1) == '-':
+                offset = -offset
+    return offset
+
+def _str2time(day, mon, yr, hr, min, sec, tz):
+    # translate month name to number
+    # month numbers start with 1 (January)
+    try:
+        mon = months_lower.index(string.lower(mon))+1
+    except ValueError:
+        # maybe it's already a number
+        try:
+            imon = int(mon)
+        except ValueError:
+            return None
+        if 1 <= imon <= 12:
+            mon = imon
+        else:
+            return None
+
+    # make sure clock elements are defined
+    if hr is None: hr = 0
+    if min is None: min = 0
+    if sec is None: sec = 0
+
+    yr = int(yr)
+    day = int(day)
+    hr = int(hr)
+    min = int(min)
+    sec = int(sec)
+
+    if yr < 1000:
+        # find "obvious" year
+        cur_yr = time.localtime(time.time())[0]
+        m = cur_yr % 100
+        tmp = yr
+        yr = yr + cur_yr - m
+        m = m - tmp
+        if abs(m) > 50:
+            if m > 0: yr = yr + 100
+            else: yr = yr - 100
+
+    # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
+    t = my_timegm((yr, mon, day, hr, min, sec, tz))
+
+    if t is not None:
+        # adjust time using timezone string, to get absolute time since epoch
+        if tz is None:
+            tz = "UTC"
+        tz = string.upper(tz)
+        offset = offset_from_tz_string(tz)
+        if offset is None:
+            return None
+        t = t - offset
+
+    return t
+
+
+strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) (\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
+wkday_re = re.compile(
+    r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
+loose_http_re = re.compile(
+    r"""^
+    (\d\d?)            # day
+       (?:\s+|[-\/])
+    (\w+)              # month
+        (?:\s+|[-\/])
+    (\d+)              # year
+    (?:
+          (?:\s+|:)    # separator before clock
+       (\d\d?):(\d\d)  # hour:min
+       (?::(\d\d))?    # optional seconds
+    )?                 # optional clock
+       \s*
+    ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
+       \s*
+    (?:\(\w+\))?       # ASCII representation of timezone in parens.
+       \s*$""", re.X)
+def http2time(text):
+    """Returns time in seconds since epoch of time represented by a string.
+
+    Return value is an integer.
+
+    None is returned if the format of str is unrecognized, the time is outside
+    the representable range, or the timezone string is not recognized.  If the
+    string contains no timezone, UTC is assumed.
+
+    The timezone in the string may be numerical (like "-0800" or "+0100") or a
+    string timezone (like "UTC", "GMT", "BST" or "EST").  Currently, only the
+    timezone strings equivalent to UTC (zero offset) are known to the function.
+
+    The function loosely parses the following formats:
+
+    Wed, 09 Feb 1994 22:23:32 GMT       -- HTTP format
+    Tuesday, 08-Feb-94 14:15:29 GMT     -- old rfc850 HTTP format
+    Tuesday, 08-Feb-1994 14:15:29 GMT   -- broken rfc850 HTTP format
+    09 Feb 1994 22:23:32 GMT            -- HTTP format (no weekday)
+    08-Feb-94 14:15:29 GMT              -- rfc850 format (no weekday)
+    08-Feb-1994 14:15:29 GMT            -- broken rfc850 format (no weekday)
+
+    The parser ignores leading and trailing whitespace.  The time may be
+    absent.
+
+    If the year is given with only 2 digits, the function will select the
+    century that makes the year closest to the current date.
+
+    """
+    # fast exit for strictly conforming string
+    m = strict_re.search(text)
+    if m:
+        g = m.groups()
+        mon = months_lower.index(string.lower(g[1])) + 1
+        tt = (int(g[2]), mon, int(g[0]),
+              int(g[3]), int(g[4]), float(g[5]))
+        return my_timegm(tt)
+
+    # No, we need some messy parsing...
+
+    # clean up
+    text = string.lstrip(text)
+    text = wkday_re.sub("", text, 1)  # Useless weekday
+
+    # tz is time zone specifier string
+    day, mon, yr, hr, min, sec, tz = [None]*7
+
+    # loose regexp parse
+    m = loose_http_re.search(text)
+    if m is not None:
+        day, mon, yr, hr, min, sec, tz = m.groups()
+    else:
+        return None  # bad format
+
+    return _str2time(day, mon, yr, hr, min, sec, tz)
+
+
+iso_re = re.compile(
+    """^
+    (\d{4})              # year
+       [-\/]?
+    (\d\d?)              # numerical month
+       [-\/]?
+    (\d\d?)              # day
+   (?:
+         (?:\s+|[-:Tt])  # separator before clock
+      (\d\d?):?(\d\d)    # hour:min
+      (?::?(\d\d(?:\.\d*)?))?  # optional seconds (and fractional)
+   )?                    # optional clock
+      \s*
+   ([-+]?\d\d?:?(:?\d\d)?
+    |Z|z)?               # timezone  (Z is "zero meridian", i.e. GMT)
+      \s*$""", re.X)
+def iso2time(text):
+    """
+    As for http2time, but parses the ISO 8601 formats:
+
+    1994-02-03 14:15:29 -0100    -- ISO 8601 format
+    1994-02-03 14:15:29          -- zone is optional
+    1994-02-03                   -- only date
+    1994-02-03T14:15:29          -- Use T as separator
+    19940203T141529Z             -- ISO 8601 compact format
+    19940203                     -- only date
+
+    """
+    # clean up
+    text = string.lstrip(text)
+
+    # tz is time zone specifier string
+    day, mon, yr, hr, min, sec, tz = [None]*7
+
+    # loose regexp parse
+    m = iso_re.search(text)
+    if m is not None:
+        # XXX there's an extra bit of the timezone I'm ignoring here: is
+        #   this the right thing to do?
+        yr, mon, day, hr, min, sec, tz, _ = m.groups()
+    else:
+        return None  # bad format
+
+    return _str2time(day, mon, yr, hr, min, sec, tz)
+
+
+# XXX Andrew Dalke kindly sent me a similar class in response to my request on
+# comp.lang.python, which I then proceeded to lose.  I wrote this class
+# instead, but I think he's released his code publicly since, could pinch the
+# tests from it, at least...
+
+# For testing seek_wrapper invariant (note that
+# test_urllib2.HandlerTest.test_seekable is expected to fail when this
+# invariant checking is turned on).  The invariant checking is done by module
+# ipdc, which is available here:
+# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/436834
+## from ipdbc import ContractBase
+## class seek_wrapper(ContractBase):
+class seek_wrapper:
+    """Adds a seek method to a file object.
+
+    This is only designed for seeking on readonly file-like objects.
+
+    Wrapped file-like object must have a read method.  The readline method is
+    only supported if that method is present on the wrapped object.  The
+    readlines method is always supported.  xreadlines and iteration are
+    supported only for Python 2.2 and above.
+
+    Public attribute: wrapped (the wrapped file object).
+
+    WARNING: All other attributes of the wrapped object (ie. those that are not
+    one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
+    are passed through unaltered, which may or may not make sense for your
+    particular file object.
+
+    """
+    # General strategy is to check that cache is full enough, then delegate to
+    # the cache (self.__cache, which is a cStringIO.StringIO instance).  A seek
+    # position (self.__pos) is maintained independently of the cache, in order
+    # that a single cache may be shared between multiple seek_wrapper objects.
+    # Copying using module copy shares the cache in this way.
+
+    def __init__(self, wrapped):
+        self.wrapped = wrapped
+        self.__have_readline = hasattr(self.wrapped, "readline")
+        self.__cache = StringIO()
+        self.__pos = 0  # seek position
+
+    def invariant(self):
+        # The end of the cache is always at the same place as the end of the
+        # wrapped file.
+        return self.wrapped.tell() == len(self.__cache.getvalue())
+
+    def __getattr__(self, name):
+        wrapped = self.__dict__.get("wrapped")
+        if wrapped:
+            return getattr(wrapped, name)
+        return getattr(self.__class__, name)
+
+    def seek(self, offset, whence=0):
+        assert whence in [0,1,2]
+
+        # how much data, if any, do we need to read?
+        if whence == 2:  # 2: relative to end of *wrapped* file
+            if offset < 0: raise ValueError("negative seek offset")
+            # since we don't know yet where the end of that file is, we must
+            # read everything
+            to_read = None
+        else:
+            if whence == 0:  # 0: absolute
+                if offset < 0: raise ValueError("negative seek offset")
+                dest = offset
+            else:  # 1: relative to current position
+                pos = self.__pos
+                if pos < offset:
+                    raise ValueError("seek to before start of file")
+                dest = pos + offset
+            end = len(self.__cache.getvalue())
+            to_read = dest - end
+            if to_read < 0:
+                to_read = 0
+
+        if to_read != 0:
+            self.__cache.seek(0, 2)
+            if to_read is None:
+                assert whence == 2
+                self.__cache.write(self.wrapped.read())
+                self.__pos = self.__cache.tell() - offset
+            else:
+                self.__cache.write(self.wrapped.read(to_read))
+                # Don't raise an exception even if we've seek()ed past the end
+                # of .wrapped, since fseek() doesn't complain in that case.
+                # Also like fseek(), pretend we have seek()ed past the end,
+                # i.e. not:
+                #self.__pos = self.__cache.tell()
+                # but rather:
+                self.__pos = dest
+        else:
+            self.__pos = dest
+
+    def tell(self):
+        return self.__pos
+
+    def __copy__(self):
+        cpy = self.__class__(self.wrapped)
+        cpy.__cache = self.__cache
+        return cpy
+
+    def get_data(self):
+        pos = self.__pos
+        try:
+            self.seek(0)
+            return self.read(-1)
+        finally:
+            self.__pos = pos
+
+    def read(self, size=-1):
+        pos = self.__pos
+        end = len(self.__cache.getvalue())
+        available = end - pos
+
+        # enough data already cached?
+        if size <= available and size != -1:
+            self.__cache.seek(pos)
+            self.__pos = pos+size
+            return self.__cache.read(size)
+
+        # no, so read sufficient data from wrapped file and cache it
+        self.__cache.seek(0, 2)
+        if size == -1:
+            self.__cache.write(self.wrapped.read())
+        else:
+            to_read = size - available
+            assert to_read > 0
+            self.__cache.write(self.wrapped.read(to_read))
+        self.__cache.seek(pos)
+
+        data = self.__cache.read(size)
+        self.__pos = self.__cache.tell()
+        assert self.__pos == pos + len(data)
+        return data
+
+    def readline(self, size=-1):
+        if not self.__have_readline:
+            raise NotImplementedError("no readline method on wrapped object")
+
+        # line we're about to read might not be complete in the cache, so
+        # read another line first
+        pos = self.__pos
+        self.__cache.seek(0, 2)
+        self.__cache.write(self.wrapped.readline())
+        self.__cache.seek(pos)
+
+        data = self.__cache.readline()
+        if size != -1:
+            r = data[:size]
+            self.__pos = pos+size
+        else:
+            r = data
+            self.__pos = pos+len(data)
+        return r
+
+    def readlines(self, sizehint=-1):
+        pos = self.__pos
+        self.__cache.seek(0, 2)
+        self.__cache.write(self.wrapped.read())
+        self.__cache.seek(pos)
+        data = self.__cache.readlines(sizehint)
+        self.__pos = self.__cache.tell()
+        return data
+
+    def __iter__(self): return self
+    def next(self):
+        line = self.readline()
+        if line == "": raise StopIteration
+        return line
+
+    xreadlines = __iter__
+
+    def __repr__(self):
+        return ("<%s at %s whose wrapped object = %r>" %
+                (self.__class__.__name__, hex(id(self)), self.wrapped))
+
+
+class response_seek_wrapper(seek_wrapper):
+
+    """
+    Supports copying response objects and setting response body data.
+
+    """
+
+    def __init__(self, wrapped):
+        seek_wrapper.__init__(self, wrapped)
+        self._headers = self.wrapped.info()
+
+    def __copy__(self):
+        cpy = seek_wrapper.__copy__(self)
+        # copy headers from delegate
+        cpy._headers = copy.copy(self.info())
+        return cpy
+
+    def info(self):
+        return self._headers
+
+    def set_data(self, data):
+        self.seek(0)
+        self.read()
+        self.close()
+        cache = self._seek_wrapper__cache = StringIO()
+        cache.write(data)
+        self.seek(0)
+
+
+class eoffile:
+    # file-like object that always claims to be at end-of-file...
+    def read(self, size=-1): return ""
+    def readline(self, size=-1): return ""
+    def __iter__(self): return self
+    def next(self): return ""
+    def close(self): pass
+
+class eofresponse(eoffile):
+    def __init__(self, url, headers, code, msg):
+        self._url = url
+        self._headers = headers
+        self.code = code
+        self.msg = msg
+    def geturl(self): return self._url
+    def info(self): return self._headers
+
+
+class closeable_response:
+    """Avoids unnecessarily clobbering urllib.addinfourl methods on .close().
+
+    Only supports responses returned by mechanize.HTTPHandler.
+
+    After .close(), the following methods are supported:
+
+    .read()
+    .readline()
+    .readlines()
+    .seek()
+    .tell()
+    .info()
+    .geturl()
+    .__iter__()
+    .next()
+    .close()
+
+    and the following attributes are supported:
+
+    .code
+    .msg
+
+    Also supports pickling (but the stdlib currently does something to prevent
+    it: http://python.org/sf/1144636).
+
+    """
+    # presence of this attr indicates is useable after .close()
+    closeable_response = None
+
+    def __init__(self, fp, headers, url, code, msg):
+        self._set_fp(fp)
+        self._headers = headers
+        self._url = url
+        self.code = code
+        self.msg = msg
+
+    def _set_fp(self, fp):
+        self.fp = fp
+        self.read = self.fp.read
+        self.readline = self.fp.readline
+        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
+        if hasattr(self.fp, "fileno"):
+            self.fileno = self.fp.fileno
+        else:
+            self.fileno = lambda: None
+        if hasattr(self.fp, "__iter__"):
+            self.__iter__ = self.fp.__iter__
+            if hasattr(self.fp, "next"):
+                self.next = self.fp.next
+
+    def __repr__(self):
+        return '<%s at %s whose fp = %r>' % (
+            self.__class__.__name__, hex(id(self)), self.fp)
+
+    def info(self):
+        return self._headers
+
+    def geturl(self):
+        return self._url
+
+    def close(self):
+        wrapped = self.fp
+        wrapped.close()
+        new_wrapped = eofresponse(
+            self._url, self._headers, self.code, self.msg)
+        self._set_fp(new_wrapped)
+
+    def __getstate__(self):
+        # There are three obvious options here:
+        # 1. truncate
+        # 2. read to end
+        # 3. close socket, pickle state including read position, then open
+        #    again on unpickle and use Range header
+        # XXXX um, 4. refuse to pickle unless .close()d.  This is better,
+        #  actually ("errors should never pass silently").  Pickling doesn't
+        #  work anyway ATM, because of http://python.org/sf/1144636 so fix
+        #  this later
+
+        # 2 breaks pickle protocol, because one expects the original object
+        # to be left unscathed by pickling.  3 is too complicated and
+        # surprising (and too much work ;-) to happen in a sane __getstate__.
+        # So we do 1.
+
+        state = self.__dict__.copy()
+        new_wrapped = eofresponse(
+            self._url, self._headers, self.code, self.msg)
+        state["wrapped"] = new_wrapped
+        return state
+
+def make_response(data, headers, url, code, msg):
+    """Convenient factory for objects implementing response interface.
+
+    data: string containing response body data
+    headers: sequence of (name, value) pairs
+    url: URL of response
+    code: integer response code (e.g. 200)
+    msg: string response code message (e.g. "OK")
+
+    """
+    hdr_text = []
+    for name_value in headers:
+        hdr_text.append("%s: %s" % name_value)
+    mime_headers = mimetools.Message(StringIO("\n".join(hdr_text)))
+    r = closeable_response(StringIO(data), mime_headers, url, code, msg)
+    return response_seek_wrapper(r)


Property changes on: Zope3/branches/benji-integrate-new-mechanize/src/mechanize/_util.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Modified: Zope3/branches/benji-integrate-new-mechanize/src/zope/testbrowser/testing.py
===================================================================
--- Zope3/branches/benji-integrate-new-mechanize/src/zope/testbrowser/testing.py	2006-06-19 15:22:02 UTC (rev 68754)
+++ Zope3/branches/benji-integrate-new-mechanize/src/zope/testbrowser/testing.py	2006-06-19 15:38:18 UTC (rev 68755)
@@ -127,21 +127,16 @@
 class PublisherMechanizeBrowser(mechanize.Browser):
     """Special ``mechanize`` browser using the Zope Publisher HTTP handler."""
 
-    default_schemes = ["http"]
-    default_others = ["_http_error", "_http_request_upgrade",
-                      "_http_default_error"]
-    default_features = ["_authen", "_redirect", "_cookies", "_seek"]
+    default_schemes = ['http']
+    default_others = ['_http_error', '_http_request_upgrade',
+                      '_http_default_error']
+    default_features = ['_redirect', '_cookies', '_referer', '_refresh',
+                        '_equiv', '_basicauth', '_digestauth', '_seek' ]
 
-    default_features = ["_redirect", "_cookies", "_referer",
-                        "_refresh", "_equiv",
-                        "_basicauth", "_digestauth",
-                        "_seek",
-                        ]
-
     def __init__(self, *args, **kws):
         inherited_handlers = ['_unknown', '_http_error',
             '_http_request_upgrade', '_http_default_error', '_basicauth',
-            '_digestauth', '_authen', '_redirect', '_cookies', '_referer',
+            '_digestauth', '_redirect', '_cookies', '_referer',
             '_refresh', '_equiv', '_seek', '_gzip']
 
         self.handler_classes = {"http": PublisherHTTPHandler}