#!/usr/bin/env python3
#=========================================================================================
# Read data from a file to a 2dim array - one record at a time
# Display the 2dim array 
# Write the 2dim array to another file 
#=========================================================================================
 
fileIn  = '/var/log/httpd/access_log-20170813';         #input file
fileOut = '/home/sultans/web/data/web_log.txt';         #output file

searchFor = 'Apr/2018'                                  #only take 

#========================================================================================
# format_date: format HTTP date to MySQL date
#========================================================================================
def format_date(date):
    mths = {'Jan':'01','Feb':'02','Mar':'03','Apr':'04','May':'05','Jun':'06',
            'Jul':'07','Aug':'08','Sep':'09','Oct':'10','Nov':'11','Dec':'12'}
    name = date[3:6]
    num  = mths[name]
    date2 = date[7:11] +'/'+ num +'/'+ date[0:2] +' '+ date[12:20]
    return date2
    
#========================================================================================
# read_write: read data, search and write
#========================================================================================
def read_write(fileIn, fileOut):
 
    input  = open(fileIn, 'r')                                  #Open file for reading
#   output = open(fileOut,'w')                                  #Open file for writing
    
    for rec in input:                                           #loop thru the data rows
        if searchFor in rec:
            start     = 0
            end       = rec.find(' ')
            ip        = rec[start:end]                          #client IP

            start     = rec.find('[',end)
            end       = rec.find(']',start)
            date      = rec[start+1:end]
            date      = format_date(date)                       #request date

            start     = rec.find('"',end)
            reqMethod = rec[start+1:start+5]                    #request method

            start     = rec.find('"',end)
            end       = rec.find('HTTP',start)
            requested = rec[start+4:end-1]                      #requested resource
            requested = requested.replace("%", ' ')
            if len(requested) > 250 :
                requested = requested[0:250] 

            requestor = ''
            start     = rec.find('http',end)
            end       = rec.find(' ',start)
            if start == -1 or end == -1:                        #requestor not found
                continue                                        #skip record
            requestor = rec[start+7:end-1]                      #requesting page 
            requestor = requestor.replace("'", ' ') 
            if len(requestor) > 250 :
                requestor = requestor[0:250] 

            start = end
            end   = len(rec)
            os_browser    = rec[start+26:end-2]                 #O/S and browser
            os_browser    = os_browser.replace('"', ' ')
            os_browser    = os_browser.replace("'", ' ') 
            os_browser    = os_browser.replace(";", ' ') 

            str  = "insert into web_log values("
            str += "'"+ip+"','"+date+"','"+requestor+"','"+reqMethod+"','"+requested+"','"+os_browser+"'"
            str += ");"                                         

            print(str)                              #print the output
#           output.write(str +'\n')                 #write the record
                
    input.close()                                   #close the file
#   output.close()                                  #close the file

#===================================================================================
# main code
#===================================================================================
read_write(fileIn, fileOut)