[Zope] Seperate Log Files

Mon, 26 Nov 2001 21:58:26 -0700

This is a multi-part message in MIME format.
--------------070206010409060309000507
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit

Vid Bijelic wrote:

> Hi,
> 
> I have a two (for now) sites running on the same Zope.
>  Is it possible to have Zope store log files
> separately for each site, so webalizer can easily
> generate site statistics for each site separately.

I generate the log files from apache, as it seems more flexible. THe 
main thing I had to do was: Store the hostname, and separate the entries 
by some delimiter I found useful. The entry I use in apache is:

LogFormat 
"%V|%h|%l|%u|%t|\"%r\"|%>s|%b|\"%{Referer}i\"|\"%{User-Agent}i\"" vcombined

then i use a CustomLog directive to make sure vcombined gets used.

Finally, I use a script I wrote called LumberJack (it splits up logs :) 
to separate and webalize the logs individually.

I've attached it.  Hope that helps!
-- 
ethan mindlace fremen  |  iMeme - The most full featured Zope Host
http://mindlace.net    |  Root, ZEO, MySQL, Mailman, Unlimited Domains
iMeme Partner          |  http://iMeme.net
"It is our desire to remain what we are that limits us. -- Project 2501"

--------------070206010409060309000507
Content-Type: text/plain;
 name="LumberJack.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="LumberJack.py"

#!/usr/local/bin/python
# This is the LumberJack:
# It splits up logs, 
# and wears high heels,
# suspenders and a bra.

import os, sys, string, re

def createDirs(base_path, domains):
    """create directories for each domain's log files
       and webalizer files"""
    for domain in domains:
        if not os.path.isdir(base_path + domain):
	    os.mkdir(base_path + domain)

class NSLookup:
    def __init__(self,domain):
        self.domain = domain

    def ourDNS(self):
        self.querytype="ns"
        self._lookup()
	if string.find(self.lookup,'ns.imeme.net') != -1:
	   return 1

    def wildCard(self):
        if self.domain[:5] == 'lists': 
           self.real_domain = self.domain[6:]
           return 1
        self.querytype="txt"
	self._lookup()
	if string.find(self.lookup,'wildcard') != -1:
	   self.lines = string.split(self.lookup, '\n')
	   for line in self.lines:
	       if string.find(line, 'nameserver') != -1:
	          self.real_domain = string.split(line,'\t')[0]
		  return 1

    def _lookup(self):
        NSLOOKUP = os.popen('/usr/sbin/nslookup -norecurse -querytype='+self.querytype+' '+self.domain).read()
	self.lookup = string.lower(NSLOOKUP)

def webalize(base_path):
    """run webalizer on each generated logfile"""
    user = string.split(base_path, '/')[2]
    domains = os.listdir(base_path)
    try:
	domains.remove('cronlog.log')
	domains.remove('empty.file')
	domains.remove('raw_log')
	domains.remove('zope_log')
    except:
        pass
    for domain in domains:
        print "webalizing: %s" % domain
        os.system('/usr/local/bin/webalizer -n ' +
	           domain +
		   ' -p ' +
		   ' -t "iMeme logs for "' +
                   ' -o ' + base_path + domain +
		   ' -r ' + domain + '/' +
		   ' -s *' + domain +
		   ' ' + base_path + domain + '/log'
		   )

def writeLogs(base_path, source_log, domains):
    """split the IP based log file into a series of
       domain based log files"""
    logfile = open(base_path + source_log, 'r')
    branchfiles = {}
    for domain in domains:
        branchfiles[domain] = open(base_path + domain + '/log','a')
    while 1:
        line = logfile.readline()
        if not line:
	   break
	line_items = string.split(line, '|')
	clean_line = string.join(line_items[1:], ' ')
	lower_domain = string.lower(line_items[0])
	for domain in domains:
	    if string.find(lower_domain, domain) != -1:
	        branchfiles[domain].write(clean_line)
    logfile.close()
    for domain in domains:
        branchfiles[domain].close()

def collectDomains(base_path, source_log):
    """generate a unique list of accessed domains"""
    log = open(base_path + source_log, 'r')
    domains = {}
    while 1:
       line = log.readline()
       if not line:
          break
       line_items = string.split(line, '|')
       domain = string.lower(line_items[0])
       if domain != '':
          domains[domain] = None

    return domains

def cleanDomains(domains, base_path):
    """eliminate subdomains, I.E. for spam.com consider spam.spam.com
       a subdomain and remove it from our list."""
    log_list = os.listdir(base_path)
    for domain in domains.keys():
        # match the domain from the end of the string
	illegal = re.compile('[^a-zA-Z0-9.-]')
        if illegal.search(domain):
           try: 
              print 'deleting ' + domain
              del domains[domain]
           except KeyError:
              pass
    return domains

def realDomains(domains, path):
    for domain in domains.keys():
        # first we check if we've already got logs. If so,
	# we can assume we are safe. initial maintinance required.
        if not os.path.exists(path + domain):
	   # if we don't do DNS for it, don't make logs.
           if domain == '':
              del domains[domain]
              continue
	   dns=NSLookup(domain)
	   if not dns.ourDNS():
	      del domains[domain]
	   #also check if this dns is wildcarded.
	   if dns.wildCard():
	      del domains[domain]
	      domains[dns.real_domain] = None
    return domains.keys()

def createLogs( base_path, source_log):
    """create the logs, where base_path is where and source_log is what"""
    domains = collectDomains(base_path, source_log)
    clean_domains = cleanDomains(domains, base_path)
    real_domains = realDomains(clean_domains, base_path)
    # real_domains is a list!
    print string.join(real_domains,'\n')
    createDirs(base_path, real_domains)
    writeLogs(base_path, source_log, real_domains)
    # arguably, "webalize" should be broken out of this,
    # because what analysis proggie you want is independant
    # of the rest of this code.  whatever.
    webalize(base_path)

def test():
    base_path= '/var/log/imeme/'
    source_log= 'raw_log'
    """create the logs, where base_path is where and source_log is what"""
    domains = collectDomains(base_path, source_log)
    clean_domains = cleanDomains(domains, base_path)
    return realDomains(clean_domains, base_path)

def createAllLogs():
    createLogs('/var/log/imeme/','raw_log')

if __name__ == '__main__':
    if len(sys.argv) == 1:
        createLogs('/var/log/imeme/', 'raw_log')
    else:
        if sys.argv[1] == ('help' or '-h' or '-?'):
           print """
The LumberJack can accept a 
base directory (don't forget trailing / )
and a source log name ( must be inside base dir )
as the first and second argument.
Otherwise, 
/usr/local/iMeme_scripts/LumberJack.py here
will create logs in the directory you invoked it in 
(must have a raw_log file available)"""

        elif sys.argv[1] == 'here':
            createLogs(os.getcwd()+ '/raw_log')
        else:
            createLogs( sys.argv[1], sys.argv[2] )

--------------070206010409060309000507--