#!/usr/bin/env python3
#============================================================================================
# Join any 2 mongo collections
# parameters: collect1, key1, collect2, key2
# can be supplied after the url?collect1=...   or via command line args
# call web service to obtain collections 
# convert collections 2 objects (list of dictionaries) 
# join the 2 objects on the 2 keys
# return the joined data using JSON format
#============================================================================================
import sys                                 #to obtain command args
import urllib.request                      #urllib.request module allows fetching of urls 
import urllib.parse                        #urllib.parse module allows you to url encode
import re                                  #for regular expression 
import json                                #to convert list of dictionary to JSON
import cgi                                 #import cgi 
import cgitb                               #import cgi traceback for error handling
cgitb.enable()

print("Content-Type: text/html \n")               #required cgi response header(for web only)

param = cgi.FieldStorage()                        #get ONLINE web parameters (if any)
collect1 = param.getvalue('collect1')             #get value for web param collect1=
key1     = param.getvalue('key1')
collect2 = param.getvalue('collect2')
key2     = param.getvalue('key2')

if len(sys.argv) > 1:                             #BATCH command line arguments were entered
    if len(sys.argv) < 5:                             
        print("<p style='color:red'>Please provide name for collect1, key1, collect2, key2") 
        exit()
    collect1 = sys.argv[1]                        #input arguments   
    key1     = sys.argv[2]                              
    collect2 = sys.argv[3]                              
    key2     = sys.argv[4]                              
    
url = "https://workshop.sps.nyu.edu/~sultans/util/mongo/mongo.php"    #The mongo web service

#-------------------------------------------------------------------------------------
#encode64: encrypt user:pswd to base64
#          this is not needed if your website is not htaccess secured
#-------------------------------------------------------------------------------------
def encode64(input):
    import base64                              #encode base64

    input = input.replace('Basic','')          #remove 'Basic'
    input = input.strip()                      #trim whitespace
    bArray      = input.encode()               #convert the string to byte array
    encoded_arr = base64.b64encode(bArray)     #encode it base64
    encoded_str = encoded_arr.decode()         #convert the byte array back to string
    encoded_str = 'Basic ' + encoded_str       #re-add 'Basic'
    return(encoded_str)

authValue    = 'Basic student:123'
encryptValue = encode64(authValue)             # Basic c3R1ZGVudDoxMjM=

#-------------------------------------------------------------------------------------
#getMongoData: Retrieve Mongo data by calling a web service
#              https://workshop.sps.nyu.edu/~sultans/util/mongo/mongo.php?param
#       param: user=&pswd=&db=demo&collect=&api=native&format=native&cmd=mongoCmd
#    mongoCmd: cmd=db.instructor.find({lname:"Sultan"})
#-------------------------------------------------------------------------------------
def getMongoData(collection, query=''):

    global url

    user     = ''                                         #various parameters to supply to the web service           
    pswd     = ''           
    db       = 'demo'              
    collect  = ''              
    api      = 'native'        
    format   = 'native'

    mongoCmd = F'db.{collection}.find({query})'          #perform specific find for 1 instructor
    mongoCmd = urllib.parse.quote(mongoCmd)              #URL encode the mongoCmd

    url += F"?user={user}&pswd={pswd}&db={db}&collect={collect}&api={api}&format={format}"
    url += F"&cmd={mongoCmd}"

    try:
        req  = urllib.request.Request(url)               #create a request object
        req.add_header('Authorization', encryptValue)    #add 'Authorization: value' header (optional) 
        resp = urllib.request.urlopen(req)               #send the request
        json = resp.read()                               #read the returned data
        json = json.decode('utf-8')                      #convert it from array of bytes to string 
    except:
        print(F"<p style='color:red'>Error calling web service</p>{url}")
        sys.exit() 

    json = json.replace("ObjectId", "")                  #get rid of ObjectId
    json = json.replace("ISODate",  "")                  #get rid of ISODate
    json= '[' + json+ ']'                                #for multiple documents, make it a list
    json = re.sub("}\s*{", "},\n{", json)                #for every }{  make it },{

    try:
        obj = eval(json)                                 #convert JSON to Python list of dictionaries
    except:
        print(F"<p style='color:red'>Error converting your returned data</p>{json}")
        sys.exit()

    return(obj)        

#-------------------------------------------------------------------------------------
#join: Join 2 objects based on provided keys
#-------------------------------------------------------------------------------------
def join(object1, key1, object2, key2):

    key1a = ''
    key2a = ''
    
    try:
        key1list = key1.split('.')                            #if compound key includes . (e.g stu.ssn) 
        key1  = key1list[0]                                   #split and make it multi-part [stu][ssn]
        key1a = key1list[1]
    except: pass
    try:
        key2list = key2.split('.')
        key2  = key2list[0]
        key2a = key2list[1]
    except: pass

    list_dict = []                                           #create an empty list
    
    try:
        for obj1 in object1:                                 #loop through object1
            if key1a: key1join = obj1[key1][key1a]
            else    : key1join = obj1[key1]

            for obj2 in object2:                             #loop through object2            
                if key2a: key2join = obj2[key2][key2a]
                else    : key2join = obj2[key2]

                if key1join == key2join:                     #join on the keys
                    dict = {}                                #create an empty dictionary
                    dict.update(obj1)                        #append it with obj1
                    dict.update(obj2)                        #append it with obj2
                    list_dict.append(dict)                   #append the dictionary to the list  
    except:
        pass
        
    return(list_dict)  

#-------------------------------------------------------------------------------------
# main code
#-------------------------------------------------------------------------------------

object1 = getMongoData(collect1)                            #get mongo data from instructor collection                   
object2 = getMongoData(collect2)                            #get mongo data from class collection

joinedObj = join(object1,key1,object2,key2)                 #join the 2 objects

if not joinedObj:
    print(F"<p style='color:red'>Could not join collections {collect1} and {collect2} on {key1}={key2}")
    exit()

#json = json.dumps(joinedObj, indent=4)                     #convert list of dictionary to JSON
json = json.dumps(joinedObj)

json = json.replace("[{", "[\n{", 1)                        #for better presentation
json = json.replace("}, {", "},\n{")                        #for better presentation

print('<br><pre>')
print(json)

print("Total Number of Rows:", len(joinedObj))





#=== link to see the python code =================================================
import os, sys
sys.path.insert(0,'/home/s/sultans/web/python/demo')
import zCode                          #import func to display the Python code
filename = os.path.abspath(__file__)  #get absolute file name 
zCode.display(filename)               #call it
#=================================================================================