That's it ... it took me a while to fiddle this code apart but it worked. Thanks to all of you for your help. --- "Farrell, Troy" <troy.farrell@wilcom.com> wrote:
I am writing a log file parser to do (buzzword alert) "Streaming Media Metrics". I have many logfiles from streaming video/audio servers. My Streaming Provider makes the logs available on an ftp server. I import the logfiles (by hand for now, soon by Xron), and parse them with this python script (not external method):
""" This is a set of Python functions that parse and report the information contained in a NetShow Server log files. """ # We begin to sort the lines by spaces. # Unlike Real Media Servers, NetShow Server log # files are entirely separated by spaces. This # makes the code really easy.
import string
# number of bad lines in the log file badline=0
for line in string.split(logfile,"\n"): # process each line of the log
e = string.split(line) # e is a list of each element, split by the spaces. loe = len(e) # crunch the number of elements in the list
if (loe == 44):
# see if for some weird reason, the line is a comment line: if e[0][0] == '#': pass else: c_ip = e[0] date = e[1] time = e[2] c_dns = e[3] cs_uri_stem = e[4] c_starttime = e[5] x_duration = e[6] c_rate = e[7] c_status = e[8] c_playerid = e[9] c_playerversion = e[10] c_playerlanguage = e[11] cs_user_agent = e[12] cs_referer = e[13] c_hostexe = e[14] c_hostexever = e[15] c_os = e[16] c_osversion = e[17] c_cpu = e[18] filelength = e[19] filesize = e[20] avgbandwidth = e[21] protocol = e[22] transport = e[23] audiocodec = e[24] videocodec = e[25] channel_url = e[26] sc_bytes = e[27] c_bytes = e[28] s_pkts_sent = e[29] c_pkts_received = e[30] c_pkts_lost_client = e[31] c_pkts_lost_net = e[32] c_pkts_lost_cont_net = e[33] c_resendreqs = e[34] c_pkts_recovered_ecc = e[35] c_pkts_recovered_resent = e[36] c_buffercount = e[37] c_totalbuffertime = e[38] c_quality = e[39] s_ip = e[40] s_dns = e[41] s_totalclients = e[42] s_cpu_util = e[43] #cs_uri_query = e[44]
#insert all that junk into PostgreSQL # context.sqlStreamInsertIntoNetShowRaw(c_ip = c_ip, date = date, time = time, c_dns = c_dns, cs_uri_stem = cs_uri_stem, c_starttime = c_starttime, x_duration = x_duration, c_rate = c_rate, c_status = c_status, c_playerid = c_playerid, c_playerversion = c_playerversion, c_playerlanguage = c_playerlanguage, cs_user_agent = cs_user_agent, cs_referer = cs_referer, c_hostexe = c_hostexe, c_hostexever = c_hostexever, c_os = c_os, c_osversion = c_osversion, c_cpu = c_cpu, filelength = filelength, filesize = filesize, avgbandwidth = avgbandwidth, protocol = protocol, transport = transport, audiocodec = audiocodec, videocodec = videocodec, channel_url = channel_url, sc_bytes = sc_bytes, c_bytes = c_bytes, s_pkts_sent = s_pkts_sent, c_pkts_received = c_pkts_received, c_pkts_lost_client = c_pkts_lost_client, c_pkts_lost_net = c_pkts_lost_net, c_pkts_lost_cont_net = c_pkts_lost_cont_net, c_resendreqs = c_resendreqs, c_pkts_recovered_ecc = c_pkts_recovered_ecc, c_pkts_recovered_resent = c_pkts_recovered_resent, c_buffercount = c_buffercount, c_totalbuffertime = c_totalbuffertime, c_quality = c_quality, s_ip = s_ip, s_dns = s_dns, s_totalclients = s_totalclients, s_cpu_util = s_cpu_util)
# # Debugging print statements. Ughh. That is a bunch of print statements. #
#print "c_ip %s\n" % c_ip #print "date %s\n" % date #print "time %s\n" % time #print "c_dns %s\n" % c_dns #print "cs_uri_stem %s\n" % cs_uri_stem #print "c_starttime %s\n" % c_starttime #print "x_duration %s\n" % x_duration #print "c_rate %s\n" % c_rate #print "c_status %s\n" % c_status #print "c_playerid %s\n" % c_playerid #print "c_playerversion %s\n" % c_playerversion #print "c_playerlanguage %s\n" % c_playerlanguage #print "cs_user_agent %s\n" % cs_user_agent #print "cs_referer %s\n" % cs_referer #print "c_hostexe %s\n" % c_hostexe #print "c_hostexever %s\n" % c_hostexever #print "c_os %s\n" % c_os #print "c_osversion %s\n" % c_osversion #print "c_cpu %s\n" % c_cpu #print "filelength %s\n" % filelength #print "filesize %s\n" % filesize #print "avgbandwidth %s\n" % avgbandwidth #print "protocol %s\n" % protocol #print "transport %s\n" % transport #print "audiocodec %s\n" % audiocodec #print "videocodec %s\n" % videocodec #print "channel_url %s\n" % channel_url #print "sc_bytes %s\n" % sc_bytes #print "c_bytes %s\n" % c_bytes #print "s_pkts_sent %s\n" % s_pkts_sent #print "c_pkts_received %s\n" % c_pkts_received #print "c_pkts_lost_client %s\n" % c_pkts_lost_client #print "c_pkts_lost_net %s\n" % c_pkts_lost_net #print "c_pkts_lost_cont_net %s\n" % c_pkts_lost_cont_net #print "c_resendreqs %s\n" % c_resendreqs #print "c_pkts_recovered_ecc %s\n" % c_pkts_recovered_ecc #print "c_pkts_recovered_resent %s\n" % c_pkts_recovered_resent #print "c_buffercount %s\n" % c_buffercount #print "c_totalbuffertime %s\n" % c_totalbuffertime #print "c_quality %s\n" % c_quality #print "s_ip %s\n" % s_ip #print "s_dns %s\n" % s_dns #print "s_totalclients %s\n" % s_totalclients #print "s_cpu_util %s\n" % s_cpu_util ##print "cs_uri_query %s\n" % cs_uri_query else: # loe != 44 # we have an error if (loe > 0): # see if for some weird reason, the line is a comment line: if e[0][0] == '#': pass else: outline = "###A faulty line of log file: " + e[0] + " with %d" % (loe) + " units" ###" print outline else: print "*** An empty line in the log file! ***" print "*** Ususally this is the end of the log ***" badline = badline + 1
=== message truncated === __________________________________________________ Do You Yahoo!? Yahoo! Auctions - Buy the things you want at great prices! http://auctions.yahoo.com/