#!/usr/bin/env python3

# Program to convert xref JSON metadata files into MongoDB JSON files suitable
# for importing into a mongo database

import pymongo, argparse
import json, re, os, sys
import threading, concurrent.futures
import alive_progress

import logging

Logger  = None
Verbose = 0

                
def openMongoDBCollection(mongodb_uri,mongodb_database,mongodb_collection):

    logging.info("Connecting to MongoDB Database")
    mongodb_client = pymongo.MongoClient(mongodb_uri)
    mongodb_database = mongodb_client[mongodb_database]
    mongodb_collection = mongodb_database[mongodb_collection]

    return mongodb_collection


def readMongoJsonFile(full_input_filename):
    # dictionary for all items in the current file
    try:
        jsonItems = None

        with open(full_input_filename, "r") as inputFile:
        # The following is probably a better expressed version of the above (DB)
        #with open(full_input_filename, "rt", encoding="utf-8") as inputFile:
            jsonItems = json.load(inputFile)

        if len(jsonItems) == 0: return None 

        #now we have an array of things to dump, let's return it as a string
        stringToReturn = ""

        #format for mongodb
        for item in jsonItems:
            stringToReturn += json.dumps(item)
            stringToReturn += ","

        #should get format {item},{item},{item}
        stringToReturn = stringToReturn[:-1]

        # logging.info("Returning processed Items from " + full_input_filename)
        
        return stringToReturn

    except Exception as inst:
        #handle error
        logging.critical("Failed to process " + full_input_filename + " with exception " + str(inst))
        return None


def processMongoJsonFile(full_input_filename,mongodb_collection):

    #print(full_input_filename + "\n")

    
    input_dirname  = os.path.dirname(full_input_filename)
    input_filename = os.path.basename(full_input_filename)
    
    converted_md_json_string = readMongoJsonFile(full_input_filename)

    if mongodb_collection != None:
        try:
            converted_md = json.loads("[" + converted_md_json_string + "]")
            mongodb_collection.insert_many(converted_md)
            logging.info("   Inserted " + full_input_filename)
        except Exception as inst:
            logging.critical("Failed to insert " + full_input_filename + " with exception " + str(inst))
    else:
        logging.critical("No MongoDB collection URL to connect to.");
        sys.exit(1)
        return
            
    return "Processed: " + full_input_filename


def listDirFiles(input_dirname,maxfile_count):

    json_filenames = []

    file_count = 0
    for filename in os.listdir(input_dirname):
        if filename.endswith(".json"):
            full_filename = os.path.join(input_dirname,filename)
            json_filenames.append(full_filename)

            file_count = file_count + 1
            if file_count == maxfile_count:
                break

    return json_filenames

    
#process a directory of Mongo-friendly json metadata files
def processDir(input_dirname, num_threads,maxfile_count, mongodb_collection):

    json_filenames = listDirFiles(input_dirname,maxfile_count)

    #print("**** json_filenames:\n")
    #print("\n  ".join(json_filenames))
    
    # Based on:
    #   https://www.digitalocean.com/community/tutorials/how-to-use-threadpoolexecutor-in-python-3
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
        futures = []

        for json_filename in json_filenames:
            futures.append(
                executor.submit(
                    processMongoJsonFile, json_filename, mongodb_collection
                )
            )

        with alive_progress.alive_bar(len(futures)) as bar:
                
            for future in concurrent.futures.as_completed(futures):
                try:
                    print(future.result())
                except Exception as exception:
                    logging.critical(str(exception))
                    
                bar()
    
    return

# main entry point and argument parser    
def main():
    global Verbose

    parser = argparse.ArgumentParser(
        prog="import-mongodb-json.py",
        description="Imports the already processed Mongo-JSON friendly files into MongoDB"
    )
    parser.add_argument("inputDirOrFile", metavar="input-directory|inputfile.json",
                        help="input directory (or specific file) of Mongo-JSON friendly metadata records")
    parser.add_argument("dbConnection", metavar="MongoDB URL connection",
                        help="MongoDB URL connection in the form mongodb://localhost:27017/ to stream the processed JSON data to")
    
    parser.add_argument("-v", "--verbose",              dest="verbose",                 default=Verbose, action="count",
                        help="enable verbose output")
    parser.add_argument("-c", "--maxfile-count",        dest="maxfile_count",           metavar="<int>", type=int,
                        help="number of files to convert if working with full metadata archive")
    parser.add_argument("-t", "--num-threads",          dest="num_threads",             metavar="<int>", type=int, default=1,
                        help="number of files to convert if working with full metadata archive")
    parser.add_argument("-db", "--mongodb-database",    dest="mongodb_database_name",   metavar="<db-name>", default="MetadataQuality",
                        help="export directly to mongodb database (requires collection)")
    parser.add_argument("-col", "--mongodb-collection", dest="mongodb_collection_name", metavar="<col-name>", default="xref",
                        help="the collection to store JSON metadata records, in the specified database")

    # when maxfile_count not specified, equals None
    parsedArgs = parser.parse_args()

    input_dir_or_file             = parsedArgs.inputDirOrFile
    db_connection   = parsedArgs.dbConnection

    # set up logger so that verbosity works
    Verbose       = parsedArgs.verbose
    logging.basicConfig(level=logging.INFO)
    Logger = logging.getLogger("verbosityLogger")
    Logger.setLevel(Verbose * 10)

    # determine if dealing with an input dir or file
    input_dirname  = None
    input_filename = None

    if os.path.isdir(input_dir_or_file):
        input_dirname = input_dir_or_file
    else:
        input_filename = input_dir_or_file
        
    mongodb_uri    = None    

    if db_connection.startswith("mongodb://"):
        mongodb_uri = db_connection

    # Initial variables optional argument
    maxfile_count           = parsedArgs.maxfile_count
    num_threads             = parsedArgs.num_threads    
    mongodb_database_name   = parsedArgs.mongodb_database_name
    mongodb_collection_name = parsedArgs.mongodb_collection_name

    mongodb_collection = None
    
    if mongodb_uri != None:     
        mongodb_collection = openMongoDBCollection(mongodb_uri,mongodb_database_name,mongodb_collection_name)
        
    # if filepath is a directory
    if input_dirname != None:
        processDir(input_dirname, num_threads,maxfile_count, mongodb_collection)
    else:
        processFile(input_filename,mongodb_collection)

if __name__ == "__main__": 
    main()
