[Zope3-dev] making ldapadapter search into a generator and support sizelimit

Laurence Rowe l at lrowe.co.uk
Tue Mar 28 11:52:57 EST 2006


Hi there,

I've run into a few problems with using ldapadapter with large search 
results taking a long time. The attached patch makes LDAPAdapter.search 
support ldap.LDAPObject.searc_ext's sizelimit parameter, and also turns 
the method into a generator, so partial search results (i.e. when you 
specify searchlimit) get returned.

This is an important feature for working with large ldap servers, but 
I'm unsure if this change could cause problems for current users of 
ldapadapter. Perhaps I should put the extra functionality it a new 
method (itersearch perhaps)? Anyway, I do not currently have checking 
priveliges so I shall defer to someone who does to make a decision ;-)

Laurence
-------------- next part --------------
Index: tests/fakeldap.py
===================================================================
--- tests/fakeldap.py	(revision 65768)
+++ tests/fakeldap.py	(working copy)
@@ -159,6 +159,31 @@
             raise NO_SUCH_OBJECT
         return res
 
+    def search_ext(self, base, scope=SCOPE_SUBTREE, filter='(objectClass=*)',
+                   attrs=[], sizelimit=0):
+        #
+        # Async search
+        #
+        self._sizelimit = sizelimit
+        self._async_results = self.search_s(base, scope, filter, attrs)
+        self._async_counter = 0
+        return 1 # result msgid (unused here)
+
+    def result(self, msgid=1, all=1):
+        #
+        # Retrieve result from the async result list
+        #
+        if all:
+            return (101, self._async_results[self._async_counter:])
+        else:
+            try:
+                r = self._async_results[self._async_counter]
+                self._async_counter += 1
+                return (100, [r])
+            except IndexError:
+                return (101, [])
+        
+
     def modify_s(self, dn, mod_list):
         dnl = tuple(dn.split(','))
         entry = the_data.get(dnl)
Index: interfaces.py
===================================================================
--- interfaces.py	(revision 65768)
+++ interfaces.py	(working copy)
@@ -134,7 +134,8 @@
         May raise NoSuchObject.
         """
 
-    def search(base, scope='one', filter='(objectClass=*)', attrs=None):
+    def search(base, scope='one', filter='(objectClass=*)', attrs=None,
+               sizelimit=0):
         """Search an LDAP server.
 
         - base is a unicode dn.
@@ -146,7 +147,12 @@
         - attrs may be a list of entry attributes to return, or None to
           return them all.
 
-        Returns a sequence of (dn, entry), where dn is unicode and entry
+        - sizelimit is the client-side search limit. If non-zero not more than
+          sizelimit results are returned by the server. If there exist more than
+          sizelimit results then ldap.SIZELIMIT_EXCEEDED will be raised when
+          iterating to the sizelimit+1 result.
+
+        Returns an iterator of (dn, entry), where dn is unicode and entry
         is a mapping whose values are lists of unicode strings.
 
         May raise NoSuchObject.
Index: utility.py
===================================================================
--- utility.py	(revision 65768)
+++ utility.py	(working copy)
@@ -94,7 +94,7 @@
 
     def modify(self, dn, entry):
         # Get current entry
-        res = self.search(dn, 'base')
+        res = list(self.search(dn, 'base'))
         if not res:
             raise interfaces.NoSuchObject(dn)
         cur_dn, cur_entry = res[0]
@@ -119,7 +119,7 @@
         self.conn.modify_s(dn.encode('utf-8'), mod_list)
 
     def search(self, base, scope='sub', filter='(objectClass=*)',
-               attrs=None):
+               attrs=None, sizelimit=0):
         # Convert from unicode to UTF-8, and attrs must be ASCII strings.
         base = base.encode('utf-8')
         scope = convertScope(scope)
@@ -127,25 +127,32 @@
         if attrs is not None:
             attrs = [str(attr) for attr in attrs]
         try:
-            ldap_entries = self.conn.search_s(base, scope, filter, attrs)
+            result_id = self.conn.search_ext(base, scope, filter, attrs,
+                                                sizelimit=sizelimit)
+            # get the results one at a time
+            while 1:
+                result_type, result_data = self.conn.result(result_id, all=0)
+                if result_data == []:
+                    break
+                else:
+                    # Convert returned values from utf-8 to unicode.
+                    for dn, entry in result_data: # list should be 1 long
+                        dn = unicode(dn, 'utf-8')
+                        for key, values in entry.items():
+                            # TODO: Can key be non-ascii? Check LDAP spec.
+                            # FIXME: there may be non-textual binary values.
+                            try:
+                                values[:] = [unicode(v, 'utf-8') for v in values]
+                            except UnicodeDecodeError:
+                                # Not all data is unicode, so decoding does not always work.
+                                pass
+                        yield (dn, entry)
+    
         except ldap.NO_SUCH_OBJECT:
             raise interfaces.NoSuchObject(base)
-        # May raise SIZELIMIT_EXCEEDED
+        # May raise SIZELIMIT_EXCEEDED, for instance if sizelimit is non zero and
+        # there are are more than sizelimit total results.
 
-        # Convert returned values from utf-8 to unicode.
-        results = []
-        for dn, entry in ldap_entries:
-            dn = unicode(dn, 'utf-8')
-            for key, values in entry.items():
-                # TODO: Can key be non-ascii? Check LDAP spec.
-                # FIXME: there may be non-textual binary values.
-                try:
-                    values[:] = [unicode(v, 'utf-8') for v in values]
-                except UnicodeDecodeError:
-                    # Not all data is unicode, so decoding does not always work.
-                    pass
-            results.append((dn, entry))
-        return results
 
 
 class ManageableLDAPAdapter(LDAPAdapter, persistent.Persistent, Contained):
Index: README.txt
===================================================================
--- README.txt	(revision 65768)
+++ README.txt	(working copy)
@@ -100,13 +100,14 @@
 Search
 ------
 
-Let's now search for entries. The scope argument controls what kind of
-search is done. You can choose to return a subset of the attributes.
+Let's now search for entries. Searching is asynchronous, returning a generator
+object. The scope argument controls what kind of search is done. You can choose
+to return a subset of the attributes.
 
-  >>> conn.search('dc=test', scope='base')
+  >>> list(conn.search('dc=test', scope='base'))
   [(u'dc=test', {'dc': [u'test']})]
 
-  >>> res = conn.search('dc=test', scope='one', attrs=['cn'])
+  >>> res = list(conn.search('dc=test', scope='one', attrs=['cn']))
   >>> pprint(res)
   [(u'cn=foo,dc=test', {'cn': [u'foo']}),
    (u'cn=bar,dc=test', {'cn': [u'bar']}),
@@ -114,7 +115,7 @@
 
 The default scope is 'sub':
 
-  >>> res = conn.search('dc=test', attrs=['givenName'])
+  >>> res = list(conn.search('dc=test', attrs=['givenName']))
   >>> pprint(res)
   [(u'cn=foo,dc=test', {'givenName': [u'John']}),
    (u'cn=bar,dc=test', {'givenName': [u'Joey']}),
@@ -123,15 +124,15 @@
 
 You can use a search filter to filter the entries returned:
 
-  >>> res = conn.search('dc=test', scope='sub', filter='(cn=ba*)',
-  ...                   attrs=['cn'])
+  >>> res = list(conn.search('dc=test', scope='sub', filter='(cn=ba*)',
+  ...                   attrs=['cn']))
   >>> pprint(res)
   [(u'cn=bar,dc=test', {'cn': [u'bar']}),
    (u'cn=baz,dc=test', {'cn': [u'baz']})]
 
 Searching on an base that doesn't exist returns an exception:
 
-  >>> conn.search('dc=bzzt')
+  >>> list(conn.search('dc=bzzt'))
   Traceback (most recent call last):
   ...
   NoSuchObject: dc=bzzt
@@ -142,17 +143,17 @@
 When modifying an entry, you pass new values for some attributes:
 
   >>> conn.modify('cn=foo,dc=test', {'givenName': ['Pete']})
-  >>> conn.search('cn=foo,dc=test', attrs=['givenName'])
+  >>> list(conn.search('cn=foo,dc=test', attrs=['givenName']))
   [(u'cn=foo,dc=test', {'givenName': [u'Pete']})]
 
   >>> conn.modify('cn=foo,dc=test', {'givenName': ['Bob', 'Robert']})
-  >>> conn.search('cn=foo,dc=test', attrs=['givenName'])
+  >>> list(conn.search('cn=foo,dc=test', attrs=['givenName']))
   [(u'cn=foo,dc=test', {'givenName': [u'Bob', u'Robert']})]
 
 Passing an empty value for an attribute remove it from the entry:
 
   >>> conn.modify('cn=foo,dc=test', {'givenName': []})
-  >>> conn.search('cn=foo,dc=test')
+  >>> list(conn.search('cn=foo,dc=test'))
   [(u'cn=foo,dc=test', {'cn': [u'foo']})]
 
 Delete
@@ -161,7 +162,7 @@
 You can delete an entry.
 
   >>> conn.delete('cn=foo,dc=test')
-  >>> conn.search('cn=foo,dc=test')
+  >>> list(conn.search('cn=foo,dc=test'))
   Traceback (most recent call last):
   ...
   NoSuchObject: cn=foo,dc=test


More information about the Zope3-dev mailing list