mongoDB.py

import json
import DataBase.JsonParser as parser
import os
from dotenv import load_dotenv
from pymongo import MongoClient


def get_db():
    """ return the database of goodReads crawler """
    load_dotenv()
    url = os.getenv('MONGODB_URL')
    client = MongoClient(url)
    return client.get_database("crawler_db")


def insert_document(docu, opt):
    db = get_db()
    if opt == 0:
        records = db.test_books
    elif opt == 1:
        records = db.test_authors
    else:
        print("failed to get json file: wrong opt for selecting collection")
        return
    records.insert_one(docu)


def insert_dicts(dictionary, opt):
    """
    Insert books or authors collection in database
    :param dictionary: the dictionary to insert to collection
    :param opt: =0 means books collection; =1 means authors collection
    :return: no return value
    """
    db = get_db()
    if opt == 0:
        records = db.test_books
    elif opt == 1:
        records = db.test_authors
    else:
        print("failed to get json file: wrong opt for selecting collection")
        return
    json_list = []
    if opt == 0:
        json_list = parser.parse_book_dict_to_json(dictionary)
    elif opt == 1:
        json_list = parser.parse_author_dict_to_json(dictionary)
    records.insert_many(json_list)


def update_dicts(opt, identifier, content):
    """
    Update documentations in a given collection
    :param opt: =0 means books collection; =1 means authors collection
    :param identifier: the identifier of the documentation we want to find
    :param content: the content to update
    :return: no return value
    """
    db = get_db()
    if opt == 0:
        records = db.test_books
    elif opt == 1:
        records = db.test_authors
    else:
        print("failed to get json file: wrong opt for selecting collection")
        return
    result = records.update_one(
        identifier,
        {"$set": content},
        upsert=True
    )
    print("matched documentation: " + str(result.matched_count))
    print("modified documentation: " + str(result.modified_count))


def get_documents_json(opt, identifier):
    """
    find documentations specified by the identifier and output a json data
    :param opt: =0 means books collection; =1 means authors collection
    :param identifier: identifier of the documents we want, {} means locate the whole collection
    :return: json file of selected documentations
    """
    db = get_db()
    if opt == 0:
        records = db.test_books
    elif opt == 1:
        records = db.test_authors
    else:
        print("failed to get json file: wrong opt for selecting collection")
        return json.dumps({})
    data = records.find(identifier)
    file = {}
    if opt == 0:
        typeName = "books"
    else:
        typeName = "authors"
    file[typeName] = []
    for item in data:
        item.pop("_id")
        file[typeName].append(item)
    return json.dumps(file)


def download_collection(opt, identifier, name):
    """
    download books collection or authors collection
    :param opt: =0 means books collection; =1 means authors collection
    :param identifier: identifier of the documents we want to download;
    empty({}) means selected all documents in given collection
    :param name: file name of downloaded json
    :return: JSON file of the collection
    """
    json_file = get_documents_json(opt, identifier)
    load_dotenv()
    file_root = os.getenv('FILE_ROOT')
    with open(file_root + name + ".json", "w") as output:
        output.write(json_file)


def clean(opt, identifier):
    """
    delete specific documents in given collection
    :param opt: =0 means books collection; =1 means authors collection
    :param identifier: identifier of the documents we want to delete;
    empty({}) means selected all documents in given collection
    :return: no return value
    """
    db = get_db()
    if opt == 0:
        records = db.test_books
    elif opt == 1:
        records = db.test_authors
    else:
        print("failed to get json file: wrong opt for selecting collection")
        return
    records.delete_many(identifier)