import json import DataBase.JsonParser as parser import os from dotenv import load_dotenv from pymongo import MongoClient def get_db(): """ return the database of goodReads crawler """ load_dotenv() url = os.getenv('MONGODB_URL') client = MongoClient(url) return client.get_database("crawler_db") def insert_document(docu, opt): db = get_db() if opt == 0: records = db.test_books elif opt == 1: records = db.test_authors else: print("failed to get json file: wrong opt for selecting collection") return records.insert_one(docu) def insert_dicts(dictionary, opt): """ Insert books or authors collection in database :param dictionary: the dictionary to insert to collection :param opt: =0 means books collection; =1 means authors collection :return: no return value """ db = get_db() if opt == 0: records = db.test_books elif opt == 1: records = db.test_authors else: print("failed to get json file: wrong opt for selecting collection") return json_list = [] if opt == 0: json_list = parser.parse_book_dict_to_json(dictionary) elif opt == 1: json_list = parser.parse_author_dict_to_json(dictionary) records.insert_many(json_list) def update_dicts(opt, identifier, content): """ Update documentations in a given collection :param opt: =0 means books collection; =1 means authors collection :param identifier: the identifier of the documentation we want to find :param content: the content to update :return: no return value """ db = get_db() if opt == 0: records = db.test_books elif opt == 1: records = db.test_authors else: print("failed to get json file: wrong opt for selecting collection") return result = records.update_one( identifier, {"$set": content}, upsert=True ) print("matched documentation: " + str(result.matched_count)) print("modified documentation: " + str(result.modified_count)) def get_documents_json(opt, identifier): """ find documentations specified by the identifier and output a json data :param opt: =0 means books collection; =1 means authors collection :param identifier: identifier of the documents we want, {} means locate the whole collection :return: json file of selected documentations """ db = get_db() if opt == 0: records = db.test_books elif opt == 1: records = db.test_authors else: print("failed to get json file: wrong opt for selecting collection") return json.dumps({}) data = records.find(identifier) file = {} if opt == 0: typeName = "books" else: typeName = "authors" file[typeName] = [] for item in data: item.pop("_id") file[typeName].append(item) return json.dumps(file) def download_collection(opt, identifier, name): """ download books collection or authors collection :param opt: =0 means books collection; =1 means authors collection :param identifier: identifier of the documents we want to download; empty({}) means selected all documents in given collection :param name: file name of downloaded json :return: JSON file of the collection """ json_file = get_documents_json(opt, identifier) load_dotenv() file_root = os.getenv('FILE_ROOT') with open(file_root + name + ".json", "w") as output: output.write(json_file) def clean(opt, identifier): """ delete specific documents in given collection :param opt: =0 means books collection; =1 means authors collection :param identifier: identifier of the documents we want to delete; empty({}) means selected all documents in given collection :return: no return value """ db = get_db() if opt == 0: records = db.test_books elif opt == 1: records = db.test_authors else: print("failed to get json file: wrong opt for selecting collection") return records.delete_many(identifier)