[Zope] Seperate Log Files
emf
mindlace@imeme.net
Mon, 26 Nov 2001 21:58:26 -0700
This is a multi-part message in MIME format.
--------------070206010409060309000507
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit
Vid Bijelic wrote:
> Hi,
>
> I have a two (for now) sites running on the same Zope.
> Is it possible to have Zope store log files
> separately for each site, so webalizer can easily
> generate site statistics for each site separately.
I generate the log files from apache, as it seems more flexible. THe
main thing I had to do was: Store the hostname, and separate the entries
by some delimiter I found useful. The entry I use in apache is:
LogFormat
"%V|%h|%l|%u|%t|\"%r\"|%>s|%b|\"%{Referer}i\"|\"%{User-Agent}i\"" vcombined
then i use a CustomLog directive to make sure vcombined gets used.
Finally, I use a script I wrote called LumberJack (it splits up logs :)
to separate and webalize the logs individually.
I've attached it. Hope that helps!
--
ethan mindlace fremen | iMeme - The most full featured Zope Host
http://mindlace.net | Root, ZEO, MySQL, Mailman, Unlimited Domains
iMeme Partner | http://iMeme.net
"It is our desire to remain what we are that limits us. -- Project 2501"
--------------070206010409060309000507
Content-Type: text/plain;
name="LumberJack.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="LumberJack.py"
#!/usr/local/bin/python
# This is the LumberJack:
# It splits up logs,
# and wears high heels,
# suspenders and a bra.
import os, sys, string, re
def createDirs(base_path, domains):
"""create directories for each domain's log files
and webalizer files"""
for domain in domains:
if not os.path.isdir(base_path + domain):
os.mkdir(base_path + domain)
class NSLookup:
def __init__(self,domain):
self.domain = domain
def ourDNS(self):
self.querytype="ns"
self._lookup()
if string.find(self.lookup,'ns.imeme.net') != -1:
return 1
def wildCard(self):
if self.domain[:5] == 'lists':
self.real_domain = self.domain[6:]
return 1
self.querytype="txt"
self._lookup()
if string.find(self.lookup,'wildcard') != -1:
self.lines = string.split(self.lookup, '\n')
for line in self.lines:
if string.find(line, 'nameserver') != -1:
self.real_domain = string.split(line,'\t')[0]
return 1
def _lookup(self):
NSLOOKUP = os.popen('/usr/sbin/nslookup -norecurse -querytype='+self.querytype+' '+self.domain).read()
self.lookup = string.lower(NSLOOKUP)
def webalize(base_path):
"""run webalizer on each generated logfile"""
user = string.split(base_path, '/')[2]
domains = os.listdir(base_path)
try:
domains.remove('cronlog.log')
domains.remove('empty.file')
domains.remove('raw_log')
domains.remove('zope_log')
except:
pass
for domain in domains:
print "webalizing: %s" % domain
os.system('/usr/local/bin/webalizer -n ' +
domain +
' -p ' +
' -t "iMeme logs for "' +
' -o ' + base_path + domain +
' -r ' + domain + '/' +
' -s *' + domain +
' ' + base_path + domain + '/log'
)
def writeLogs(base_path, source_log, domains):
"""split the IP based log file into a series of
domain based log files"""
logfile = open(base_path + source_log, 'r')
branchfiles = {}
for domain in domains:
branchfiles[domain] = open(base_path + domain + '/log','a')
while 1:
line = logfile.readline()
if not line:
break
line_items = string.split(line, '|')
clean_line = string.join(line_items[1:], ' ')
lower_domain = string.lower(line_items[0])
for domain in domains:
if string.find(lower_domain, domain) != -1:
branchfiles[domain].write(clean_line)
logfile.close()
for domain in domains:
branchfiles[domain].close()
def collectDomains(base_path, source_log):
"""generate a unique list of accessed domains"""
log = open(base_path + source_log, 'r')
domains = {}
while 1:
line = log.readline()
if not line:
break
line_items = string.split(line, '|')
domain = string.lower(line_items[0])
if domain != '':
domains[domain] = None
return domains
def cleanDomains(domains, base_path):
"""eliminate subdomains, I.E. for spam.com consider spam.spam.com
a subdomain and remove it from our list."""
log_list = os.listdir(base_path)
for domain in domains.keys():
# match the domain from the end of the string
illegal = re.compile('[^a-zA-Z0-9.-]')
if illegal.search(domain):
try:
print 'deleting ' + domain
del domains[domain]
except KeyError:
pass
return domains
def realDomains(domains, path):
for domain in domains.keys():
# first we check if we've already got logs. If so,
# we can assume we are safe. initial maintinance required.
if not os.path.exists(path + domain):
# if we don't do DNS for it, don't make logs.
if domain == '':
del domains[domain]
continue
dns=NSLookup(domain)
if not dns.ourDNS():
del domains[domain]
#also check if this dns is wildcarded.
if dns.wildCard():
del domains[domain]
domains[dns.real_domain] = None
return domains.keys()
def createLogs( base_path, source_log):
"""create the logs, where base_path is where and source_log is what"""
domains = collectDomains(base_path, source_log)
clean_domains = cleanDomains(domains, base_path)
real_domains = realDomains(clean_domains, base_path)
# real_domains is a list!
print string.join(real_domains,'\n')
createDirs(base_path, real_domains)
writeLogs(base_path, source_log, real_domains)
# arguably, "webalize" should be broken out of this,
# because what analysis proggie you want is independant
# of the rest of this code. whatever.
webalize(base_path)
def test():
base_path= '/var/log/imeme/'
source_log= 'raw_log'
"""create the logs, where base_path is where and source_log is what"""
domains = collectDomains(base_path, source_log)
clean_domains = cleanDomains(domains, base_path)
return realDomains(clean_domains, base_path)
def createAllLogs():
createLogs('/var/log/imeme/','raw_log')
if __name__ == '__main__':
if len(sys.argv) == 1:
createLogs('/var/log/imeme/', 'raw_log')
else:
if sys.argv[1] == ('help' or '-h' or '-?'):
print """
The LumberJack can accept a
base directory (don't forget trailing / )
and a source log name ( must be inside base dir )
as the first and second argument.
Otherwise,
/usr/local/iMeme_scripts/LumberJack.py here
will create logs in the directory you invoked it in
(must have a raw_log file available)"""
elif sys.argv[1] == 'here':
createLogs(os.getcwd()+ '/raw_log')
else:
createLogs( sys.argv[1], sys.argv[2] )
--------------070206010409060309000507--