Newer
Older
import json
import DataBase.JsonParser as parser
import os
from dotenv import load_dotenv
from pymongo import MongoClient
def get_db():
""" return the database of goodReads crawler """
load_dotenv()
url = os.getenv('MONGODB_URL')
client = MongoClient(url)
return client.get_database("crawler_db")
def insert_document(docu, opt):
db = get_db()
if opt == 0:
elif opt == 1:
else:
print("failed to get json file: wrong opt for selecting collection")
return
records.insert_one(docu)
def insert_dicts(dictionary, opt):
"""
Insert books or authors collection in database
:param dictionary: the dictionary to insert to collection
:param opt: =0 means books collection; =1 means authors collection
:return: no return value
"""
db = get_db()
if opt == 0:
else:
print("failed to get json file: wrong opt for selecting collection")
return
json_list = []
if opt == 0:
json_list = parser.parse_book_dict_to_json(dictionary)
elif opt == 1:
json_list = parser.parse_author_dict_to_json(dictionary)
records.insert_many(json_list)
def update_dicts(opt, identifier, content):
"""
Update documentations in a given collection
:param opt: =0 means books collection; =1 means authors collection
:param identifier: the identifier of the documentation we want to find
:param content: the content to update
:return: no return value
"""
db = get_db()
if opt == 0:
else:
print("failed to get json file: wrong opt for selecting collection")
return
result = records.update_one(
)
print("matched documentation: " + str(result.matched_count))
print("modified documentation: " + str(result.modified_count))
def get_documents_json(opt, identifier):
"""
find documentations specified by the identifier and output a json data
:param opt: =0 means books collection; =1 means authors collection
:param identifier: identifier of the documents we want, {} means locate the whole collection
:return: json file of selected documentations
"""
db = get_db()
if opt == 0:
else:
print("failed to get json file: wrong opt for selecting collection")
return json.dumps({})
data = records.find(identifier)
file = {}
if opt == 0:
typeName = "books"
else:
typeName = "authors"
file[typeName] = []
for item in data:
item.pop("_id")
file[typeName].append(item)
return json.dumps(file)
def download_collection(opt, identifier, name):
"""
download books collection or authors collection
:param opt: =0 means books collection; =1 means authors collection
:param identifier: identifier of the documents we want to download;
empty({}) means selected all documents in given collection
:param name: file name of downloaded json
:return: JSON file of the collection
"""
json_file = get_documents_json(opt, identifier)
load_dotenv()
file_root = os.getenv('FILE_ROOT')
with open(file_root + name + ".json", "w") as output:
output.write(json_file)
def clean(opt, identifier):
"""
delete specific documents in given collection
:param opt: =0 means books collection; =1 means authors collection
:param identifier: identifier of the documents we want to delete;
empty({}) means selected all documents in given collection
:return: no return value
"""
db = get_db()
if opt == 0: