[Zope] Problem with external method locking threads

David Swift dswift at apmindsf.com
Mon Jan 24 16:02:47 EST 2005


I am running an External method, to fix some hard links in our ZWiki 
back to relative links.  On two of our three ZWikis, this script
runs well.

 On the third ZWiki, which has about 530 pages, this script causes the 
Zope instance to stop responding to any requests, after it
processes about 140 pages.  I have tried deleting the pages it freezes 
on, but it still freezes at about the same place each time.

After examing the lists, and running strace, I am certain that one of 
the Python/Zope threads refuses to respond to SIGRTMIN, and then
somehow locks up all threads - however, this is deeper than my knowledge 
goes, and I am hoping someone on this list can point me towards
solutions I have not yet tried.

My environment is:
2.5Ghz Intel single processor
1GB RAM
Fedora 1 kernel - Linux our.server.com 2.4.22-1.2199.nptl #1 Wed Aug 4 
12:21:48 EDT 2004 i686 i686 i386 GNU/Linux
Software RAID 1, using a 36GB partition for Data.fs.  Directory 
instance/var is a symlink to this partition.
Python 2.3.3 and Python 2.4
Zope 2.7.3
ZWiki ZWiki-0-37-0
glibc-2.3.4-3

Below, I have copied:
The External Method script I am using
A brief description of the various things I have tried
A sample of the environment, from within zopectl
A copy of the strace run I am getting during script execution

I use the command 'setuidgid zope bin/zopectl' to run the Zope instance.

Any help is greatly appreciated!
David

-------------------- The script looks like this:
from StringIO import StringIO
import zLOG
import re

def addpage(self,o):
    """ Given a file or image, convert links to LocalFS """
    nid = o.getId()
    fileOnlyLink = re.compile(r'href=\"([- .\w]+\.\w+)\"')
    fileLink = re.compile(r'https://zwiki(?:.*?/)+?([- .%\w]+.\w+)"')
    otherLink = re.compile(r'https://zwiki(?:.*?/)+([- %\w]+)"')

    pageText = o.read()

    pageText = fileOnlyLink.sub(r'href="uploads/\1"', pageText)
    pageText = fileLink.sub(r'uploads/\1"', pageText)
    pageText = otherLink.sub(r'\1"', pageText)
    o.setText(pageText)
    zLOG.LOG('addpage', zLOG.WARNING, 'working on %s' % nid)
    o.reindex_object()

--------------------

tried Python 2.4 and Zope 2.7.3-0
tried Python 2.3.3 and Zope 2.7.3-0
tried refreshing glibc, to version glibc-2.3.4-3
tried LD_ASSUME_KERNEL=2.4.1 in runzope and zopectl
tried LD_ASSUME_KERNEL=2.2.5 in runzope and zopectl
tried deleting data for pages it stops on
tried THREAD_STACK_SIZE of 0x20000 when compiling Python
tried compiling Python with ./configure --prefix=/usr/local/python-2.4 
--with-threads --with-signal-module --with-libs='rt'
tried compiling Python with ./configure --prefix=/usr/local/python-2.4 
--with-threads --with-signal-module
tried compiling Python with ./configure --prefix=/usr/local/python-2.4 
--with-threads
tried compiling Python with ./configure --prefix=/usr/local/python-2.3.3 
--with-threads --with-signal-module --with-libs='rt'
tried compiling Python with ./configure --prefix=/usr/local/python-2.3.3 
--with-threads --with-signal-module
tried compiling Python with ./configure --prefix=/usr/local/python-2.3.3 
--with-threads

-------------------- environment of zopectl

zopectl> stop
. . . . . . . . . . . daemon process stopped
zopectl> shell
bash: /root/.bashrc: Permission denied
bash-2.05b$ set
BASH=/bin/bash
BASH_ENV=/root/.bashrc
BASH_VERSINFO=([0]="2" [1]="05b" [2]="0" [3]="1" [4]="release" 
[5]="i386-redhat-linux-gnu")
BASH_VERSION='2.05b.0(1)-release'
CLIENT_HOME=/usr/local/zope/apmindsf/var
COLUMNS=140
DIRSTACK=()
EUID=88
GROUPS=()
G_BROKEN_FILENAMES=1
HISTFILE=/root/.bash_history
HISTFILESIZE=1000
HISTSIZE=1000
HOME=/root
HOSTNAME=rope.apmindsf.com
HOSTTYPE=i386
IFS=$' \t\n'
INPUTRC=/etc/inputrc
INSTANCE_HOME=/usr/local/zope/apmindsf
LANG=en_US.UTF-8
LD_ASSUME_KERNEL=2.4.1
LESSOPEN='|/usr/bin/lesspipe.sh %s'
LINES=61
LOGNAME=root
MACHTYPE=i386-redhat-linux-gnu
MAIL=/var/spool/mail/root
MAILCHECK=60
OPTERR=1
OPTIND=1
OSTYPE=linux-gnu
PATH=/usr/kerberos/sbin:/usr/kerberos/bin:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/usr/X11R6/bin:/root/bin
PPID=8735
PS1='\s-\v\$ '
PS2='> '
PS4='+ '
PWD=/usr/local/zope/apmindsf
PYTHONPATH=/usr/local/zope/lib/python
SHELL=/bin/bash
SHELLOPTS=braceexpand:emacs:hashall:histexpand:history:interactive-comments:monitor
SHLVL=3
SOFTWARE_HOME=/usr/local/zope/lib/python
SSH_CLIENT='192.168.1.20 3660 22'
SSH_CONNECTION='192.168.1.20 3660 192.168.1.72 22'
SSH_TTY=/dev/pts/1
TERM=cygwin
UID=88
USER=root
USERNAME=root
ZMANAGED=1
ZOPE_HOME=/usr/local/zope
_=/bin/bash
bash-2.05b$


--------------------  strace of script execution


[root at server Python-2.4]# strace -s2000 -p 8738
Process 8738 attached - interrupt to quit
select(9, [3 4 8], [], [3 4 8], {4, 460000}) = 0 (Timeout)
select(9, [3 4 8], [], [3 4 8], {30, 0}) = 1 (in [3], left {12, 590000})
accept(3, {sa_family=AF_INET, sin_port=htons(3429), 
sin_addr=inet_addr("192.168.1.20")}, [16]) = 14
fcntl64(14, F_GETFL)                    = 0x2 (flags O_RDWR)
fcntl64(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0
getpeername(14, {sa_family=AF_INET, sin_port=htons(3429), 
sin_addr=inet_addr("192.168.1.20")}, [16]) = 0
gettimeofday({1106285562, 100163}, NULL) = 0
fcntl64(14, F_SETFD, FD_CLOEXEC)        = 0
select(15, [3 4 8 14], [], [3 4 8 14], {30, 0}) = 1 (in [14], left {30, 0})
recv(14, "GET /am/convertLinks2local/ZScriptHTML_tryForm 
HTTP/1.1\r\nHost: our.server.com:8080\r\nUser-Agent: Mozilla/5.0 
(Windows; U; Wi
ndows NT 5.1; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0\r\nAccept: 
text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/pla
in;q=0.8,image/png,*/*;q=0.5\r\nAccept-Language: 
en-us,en;q=0.5\r\nAccept-Encoding: gzip,deflate\r\nAccept-Charset: 
ISO-8859-1,utf-8;q=0.7,*
;q=0.7\r\nKeep-Alive: 300\r\nConnection: keep-alive\r\nReferer: 
http://our.server.com:8080/am/convertLinks2local/ZPythonScriptHTML_editFo
rm\r\nCookie: 
tree-s=\"eJzTyCkw5NLIKTDiClZ3hANXW3WuAmOuxEQ9AIOOB9Q\"\r\nAuthorization: 
Basic ZHN3aWZ0OjR0aGZsMDBy\r\n\r\n", 4096, 0) = 643
gettimeofday({1106285562, 102930}, NULL) = 0
kill(8744, SIGRTMIN)                    = 0
kill(8744, SIGRTMIN)                    = 0
rt_sigprocmask(SIG_SETMASK, NULL, [RTMIN], 8) = 0
rt_sigsuspend([] <unfinished ...>
--- SIGRTMIN (Unknown signal 32) @ 0 (0) ---
<... rt_sigsuspend resumed> )           = -1 EINTR (Interrupted system call)
sigreturn()                             = ? (mask now [RTMIN])
select(15, [3 4 8 14], [], [3 4 8 14], {30, 0}) = 1 (in [8], left {29, 
900000})
read(8, "x", 8192)                      = 1
select(15, [3 4 8 14], [14], [3 4 8 14], {30, 0}) = 1 (out [14], left 
{30, 0})
send(14, "HTTP/1.1 302 Moved Temporarily\r\nServer: Zope/(Zope 2.7.3-0, 
python 2.4.0, linux2) ZServer/1.1\r\nDate: Fri, 21 Jan 2005 05:32:42
 GMT\r\nBobo-Exception-Line: 56\r\nContent-Length: 
0\r\nBobo-Exception-Value: See the server error log for 
details\r\nBobo-Exception-File: D
T_Raise.py\r\nBobo-Exception-Type: Redirect\r\nConnection: 
close\r\nLocation: 
http://our.server.com:8080/am/convertLinks2local\r\nContent
-Type: text/html;charset=iso-8859-1\r\n\r\n", 429, 0) = 429
select(15, [3 4 8 14], [], [3 4 8 14], {30, 0}) = 1 (in [14], left {30, 0})
rt_sigprocmask(SIG_SETMASK, NULL, [RTMIN], 8) = 0
rt_sigsuspend([] <unfinished ...>
--- SIGRTMIN (Unknown signal 32) @ 0 (0) ---
<... rt_sigsuspend resumed> )           = -1 EINTR (Interrupted system call)
sigreturn()                             = ? (mask now [RTMIN])
recv(14, "", 4096, 0)                   = 0
close(14)                               = 0
select(9, [3 4 8], [], [3 4 8], {30, 0}) = 1 (in [8], left {30, 0})
read(8, "x", 8192)                      = 1
select(9, [3 4 8], [], [3 4 8], {30, 0}) = 1 (in [3], left {29, 990000})
accept(3, {sa_family=AF_INET, sin_port=htons(3436), 
sin_addr=inet_addr("192.168.1.20")}, [16]) = 14
fcntl64(14, F_GETFL)                    = 0x2 (flags O_RDWR)
fcntl64(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0
getpeername(14, {sa_family=AF_INET, sin_port=htons(3436), 
sin_addr=inet_addr("192.168.1.20")}, [16]) = 0
gettimeofday({1106285562, 261677}, NULL) = 0
fcntl64(14, F_SETFD, FD_CLOEXEC)        = 0
select(15, [3 4 8 14], [], [3 4 8 14], {30, 0}) = 1 (in [14], left {30, 0})
recv(14, "GET /am/convertLinks2local HTTP/1.1\r\nHost: 
our.server.com:8080\r\nUser-Agent: Mozilla/5.0 (Windows; U; Windows NT 
5.1; en-US;
 rv:1.7.5) Gecko/20041107 Firefox/1.0\r\nAccept: 
text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*
/*;q=0.5\r\nAccept-Language: en-us,en;q=0.5\r\nAccept-Encoding: 
gzip,deflate\r\nAccept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\nKeep-Alive
: 300\r\nConnection: keep-alive\r\nReferer: 
http://our.server.com:8080/am/convertLinks2local/ZPythonScriptHTML_editForm\r\nCookie: 
tree-s
=\"eJzTyCkw5NLIKTDiClZ3hANXW3WuAmOuxEQ9AIOOB9Q\"\r\nAuthorization: Basic 
ZHN3aWZ0OjR0aGZsMDBy\r\n\r\n", 4096, 0) = 623
gettimeofday({1106285562, 264328}, NULL) = 0
kill(8744, SIGRTMIN)                    = 0
select(15, [3 4 8 14], [], [3 4 8 14], {30, 0}) = 0 (Timeout)
rt_sigprocmask(SIG_SETMASK, NULL, [RTMIN], 8) = 0
rt_sigsuspend([] <unfinished ...>
--- SIGRTMIN (Unknown signal 32) @ 0 (0) ---
<... rt_sigsuspend resumed> )           = -1 EINTR (Interrupted system call)
sigreturn()                             = ? (mask now [RTMIN])
select(15, [3 4 8 14], [], [3 4 8 14], {30, 0}) = 0 (Timeout)
rt_sigprocmask(SIG_SETMASK, NULL, [RTMIN], 8) = 0
rt_sigsuspend([] <unfinished ...>
--- SIGTERM (Terminated) @ 0 (0) ---
<... rt_sigsuspend resumed> )           = -1 EINTR (Interrupted system call)
getpid()                                = 8738
rt_sigaction(SIGTERM, {0x80e070, [], SA_RESTORER, 0x28da08}, {0x80e070, 
[], SA_RESTORER, 0x28da08}, 8) = 0
sigreturn()                             = ? (mask now [RTMIN])
rt_sigsuspend([] <unfinished ...>
+++ killed by SIGKILL +++
You have new mail in /var/spool/mail/root
[root at server Python-2.4]#




More information about the Zope mailing list