Skip to content
Snippets Groups Projects
Commit 1e7d0c41 authored by pkim62's avatar pkim62
Browse files

Merge branch 'week1' into 'main'

Implementation of Week1

See merge request jim20/fa21-cs242-project!1
parents 4ac40373 75dd7252
No related branches found
No related tags found
No related merge requests found
# fa21-cs242-project
The project : fa21-cs242-project (week1) consists of python modules and classes
that include functions to take care of many of the core functionalities
of web scraping, transferring data to an external database, and creating our own API.
The scraper class utilizes the Beautiful Soup Library to be able to scrape data
off of the website op.gg and retrieves desired information in a nice format.
The Database Python Module utilizes the MongoDB Database services and stores data
that has been fetched by the scraper class mentioned above.
The api module has taken care of all types of API requests including (GET, PUT, POST, DELETE).
Finally, the 20 required unit tests have been written inside the test module and test various
aspects of the code including scraper, API, and database functionality.
File added
SECRET_KEY = "mongodb+srv://JohnIm:4MY7jaApcsPmj4Kl@cluster0.0fsik.mongodb.net/Cluster0?ssl=true&ssl_cert_reqs=CERT_NONE"
\ No newline at end of file
import unittest
import sys
sys.path.insert(0, '../src')
from database import valid_champ
from scraper import Scraper
import requests
import json
class TestFinal(unittest.TestCase):
#unittest.mock for db testing
def setUp(self):
print("Starting another test.")
"""Test valid_champ() in scrape tell if authors are valid"""
def test_valid_champ(self):
doc = { "name":"Lex",
"pick_rate" : "5.7%",
"win_rate" : "79%",
"champ_tier" :"Tier 0",
"counter_champs": ["Alistar", "Akali", "Zed"],
"strong_against" :["Yasuo", "Yone", "Riven"]
}
self.assertTrue(valid_champ(doc))
"""Test valid_champ() in scrape can tell if authors are invalid """
def test_valid_champ_invalid(self):
doc = {
"name":"Lex",
"pick_rate" : "5.7%",
"win_rate" : "79%",
"champ_tier" :"Tier 0",
"counter_champs": ["Alistar", "Akali", "Zed"]
}
self.assertFalse(valid_champ(doc))
def test_add_champ(self):
"""Test API POST Request"""
test_champ = {
"name":"Lex",
"pick_rate" : "5.7%",
"win_rate" : "79%",
"champ_tier" :"Tier 0",
"counter_champs": ["Alistar", "Akali", "Zed"]
}
response = requests.post('http://127.0.0.1:5000/champion', json=test_champ)
self.assertEqual(response.status_code, 200)
def test_invalid_add_champ(self):
"""Test Invalid API POST Request"""
test_champ = {
"name":"Lex",
"pick_rate" : "5.7%",
"win_rate" : "79%",
"champ_tier" :"Tier 0",
"counter_champs": ["Alistar", "Akali", "Zed"],
"kekW" : "lol"
}
response = requests.post('http://127.0.0.1:5000/champion', json=test_champ)
self.assertEqual(response.json()['status'], 400)
def test_get_champs(self):
"""Test API GET All Champs Request"""
response = requests.get('http://127.0.0.1:5000/champions')
self.assertEqual(response.status_code, 200)
def test_invalid_get_champs(self):
"""Test invalid API GET All Champs Request"""
response = requests.get('http://127.0.0.1:5000/champions', params = {'name' : "Aatrox"})
self.assertEqual(response.json()['status'], 400)
def test_get_champ(self):
"""Test API GET Request"""
response = requests.get('http://127.0.0.1:5000/champion', params = {'name' : "Aatrox"})
self.assertEqual(response.status_code, 200)
def test_invalid_get_champ(self):
"""Test invalid API GET Request"""
response = requests.get('http://127.0.0.1:5000/champion', params = {'name' : "invalid_name"})
self.assertEqual(response.json()['result'][0]['status'], 400)
def test_update_champ(self):
"""Test API PUT Request"""
test_champ = {
"name":"Lex",
"pick_rate" : "5.7%",
"win_rate" : "79%",
"champ_tier" :"Tier 0",
"counter_champs": ["Alistar", "Akali", "Zed"]
}
response = requests.put('http://127.0.0.1:5000/champion', params = {'name' : "Lex"}, json=test_champ)
self.assertEqual(response.status_code, 200)
def test_invalid_update_champ(self):
"""Test Invalid API PUT Request"""
test_champ = {
"name":"Lex",
"pick_rate" : "5.7%",
"win_rate" : "79%",
"champ_tier" :"Tier 0",
"counter_champs": ["Alistar", "Akali", "Zed"]
}
response = requests.put('http://127.0.0.1:5000/champion', json=test_champ)
self.assertEqual(response.json()['status'], 400)
def test_delete_champ(self):
"""Test API DELETE Request"""
response = requests.delete('http://127.0.0.1:5000/champion', params = {'name' : "Lex"})
self.assertEqual(response.status_code, 200)
def test_invalid_delete_champ(self):
"""Test invalid API DELETE Request"""
response = requests.delete('http://127.0.0.1:5000/champion', params = {'name' : "Lex", 'invalid': 'invalid'})
self.assertEqual(response.json()['status'], 400)
def test_scrape_champ1(self):
"""Test Scraping a Champion"""
s = Scraper()
url = 'https://na.op.gg/champion/nasus/statistics/top/build'
ret_arr = s.scrape_champion_page(url)
self.assertEqual(ret_arr[0], 'Nasus')
def test_scrape_champ2(self):
"""Test Scraping a Champion"""
s = Scraper()
url = 'https://na.op.gg/champion/aatrox/statistics/top/build'
ret_arr = s.scrape_champion_page(url)
self.assertEqual(ret_arr[0], 'Aatrox')
def test_scrape_champ3(self):
"""Test Scraping a Champion"""
s = Scraper()
url = 'https://na.op.gg/champion/graves/statistics/top/build'
ret_arr = s.scrape_champion_page(url)
self.assertEqual(ret_arr[0], 'Graves')
def test_scrape_champ4(self):
"""Test Scraping a Champion"""
s = Scraper()
url = 'https://na.op.gg/champion/akali/statistics/mid/build'
ret_arr = s.scrape_champion_page(url)
self.assertEqual(ret_arr[0], 'Akali')
def test_scrape_champ5(self):
"""Test Scraping a Champion"""
s = Scraper()
url = 'https://na.op.gg/champion/zed/statistics/mid/build'
ret_arr = s.scrape_champion_page(url)
self.assertEqual(ret_arr[0], 'Zed')
def test_scrape_champ6(self):
"""Test Scraping a Champion"""
s = Scraper()
url = 'https://na.op.gg/champion/lucian/statistics/mid/build'
ret_arr = s.scrape_champion_page(url)
self.assertEqual(ret_arr[0], 'Lucian')
def test_scrape_champ7(self):
"""Test Scraping a Champion"""
s = Scraper()
url = 'https://na.op.gg/champion/kennen/statistics/top/build'
ret_arr = s.scrape_champion_page(url)
self.assertEqual(ret_arr[0], 'Kennen')
def test_scrape_champ8(self):
"""Test Scraping a Champion"""
s = Scraper()
url = 'https://na.op.gg/champion/zoe/statistics/mid/build'
ret_arr = s.scrape_champion_page(url)
self.assertEqual(ret_arr[0], 'Zoe')
if __name__ == '__main__':
unittest.main()
\ No newline at end of file
'''
set FLASK_APP=src/api
flask run
api for our champions application
'''
import datetime
import requests
import json
import pymongo
import argparse
from flask import Flask, request, jsonify, render_template
from database import get_key, get_collection, valid_champ
app = Flask(__name__)
@app.route('/champions', methods=['GET'])
def getAllChampions():
"""
Function for an API Get Request
"""
if len(request.args) > 0 :
bad_input_error = {
'status': 400,
"error":"Bad Request"
}
return bad_input_error
champions = get_collection()
output = []
if champions.find() is None:
output.append({'time' : datetime.datetime.now(), 'status' : 400,
'message' : 'Get Failed.'})
return jsonify(output)
for champ in champions.find():
output.append({"name" : champ['name'],
'win_rate' : champ['win_rate'],
'pick_rate' : champ['pick_rate'],
'counter_champs' : champ['counter_champs'],
'strong_against' : champ['strong_against']})
return jsonify({'result' : output})
@app.route('/champion', methods=['GET'])
def getChampion():
"""
Function for an API GET Request by name
"""
champ_name = request.args.get("name")
champions = get_collection()
output = []
champ = champions.find_one({'name' : champ_name})
if champ:
output.append({"name" : champ['name'],
'win_rate' : champ['win_rate'],
'pick_rate' : champ['pick_rate'],
'counter_champs' : champ['counter_champs'],
'strong_against' : champ['strong_against']})
return jsonify(output)
output.append({'time' : datetime.datetime.now(), 'status' : 400,
'message' : 'Get Failed.'})
return jsonify({'result' : output})
"""PUT request for champion."""
@app.route('/champion', methods=['PUT'])
def put_champion():
id_ = request.args.get("name") # @UndefinedVariable
update_values = request.get_json()
if id_ is None or update_values is None or len(request.args) > 1:
bad_input_error = {
"status": 400,
"error":"Bad Request"
}
return bad_input_error
result = get_collection().update({'name' : id_}, {"$set": update_values})
if not result["updatedExisting"]:
error = {
"status": 500,
"error":"Internal Server Error",
"message":"No champions found with given name"
}
return error
return "updated champion entry: " + id_
"""POST request for champion."""
@app.route('/champion', methods=['POST'])
def make_new_champion():
update_values = request.get_json()
if update_values is None or len(request.args) > 0 or not valid_champ(request.get_json()):
bad_input_error = {
"status": 400,
"error":"Bad Request"
}
return bad_input_error
r = get_collection().update({'name' : update_values['name']}, {"$setOnInsert":update_values}, upsert=True)
if r["updatedExisting"]:
error = {
"status": 500,
"error":"Internal Server Error",
"message":"Champion entry already exists"
}
return error
return jsonify({'result' : update_values})
"""DELETE a champion."""
@app.route('/champion', methods=['DELETE'])
def delete_champion():
_id = request.args.get("name") # @UndefinedVariable
if _id is None or len(request.args) > 1:
bad_input_error = {
"status": 400,
"error":"Bad Request"
}
return bad_input_error
get_collection().remove({"name" : _id})
return "deleted champion with name " + _id
'''
database handler that uses enviroment variables to hide our key for our database from github
'''
import pymongo
import os
from dotenv import load_dotenv
from scraper import Scraper
import time
def get_key():
"""
This function gets the unique key to access the MongoDB database collection
be in src in terminal
"""
load_dotenv('./key.env')
return os.getenv("SECRET_KEY")
def database_handler(ret_arr1):
"""
This function handles the web scraped data of a book and transfers it into the MongoDB database.
"""
if ret_arr1 is not None:
client = pymongo.MongoClient(get_key())
database = client.Collection
collection = database.Champions
champ = {"name": ret_arr1[0],
"win_rate" : ret_arr1[1],
"pick_rate" : ret_arr1[2],
"champ_tier" : ret_arr1[3],
"counter_champs" : ret_arr1[4],
"strong_against" : ret_arr1[5],
}
collection.update(champ, champ, upsert = True)
def get_collection():
"""
this function returns the collection of database
"""
client = pymongo.MongoClient(get_key())
data_base = client['Collection']
champions = data_base["Champions"]
return champions
#This Function checks if doc is author doc
def valid_champ(doc):
return (bool(doc.get('name')) and bool(doc.get('pick_rate')) and bool(doc.get('win_rate'))
and bool(doc.get('champ_tier')) and bool(doc.get("counter_champs"))
and bool(doc.get("strong_against")))
\ No newline at end of file
from scraper import Scraper
import time
from database import get_key
from database import database_handler
"""
Implements the scraping of data of all the champions from op.gg
each champions data is based off their current meta role
e.g aatrox meta role is top lane so we scrape data based of his top lane stats
"""
def main():
s = Scraper()
arr = s.scrape_champion_links()
c_counter = 0
N = len(arr)
while(c_counter < N):
retArr = s.scrape_champion_page(arr[c_counter])
if retArr is None:
continue
c_counter+=1;
time.sleep(10)
database_handler(retArr)
if __name__ == "__main__":
main()
\ No newline at end of file
"""
Implements the scraping of data from op.gg
"""
import re
import requests
from bs4 import BeautifulSoup as bs
class Scraper:
#This Function is the Constructor of the Scrape Class
def __init__(self):
print("scraper start")
#this function scrapes the links of all champion pages of all the champions on op.gg
def scrape_champion_links(self):
url = 'https://na.op.gg/champion/statistics'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
request = requests.get(url, headers=headers)
soup = bs(request.text, 'html.parser')
champion_links = soup.find('div', class_="champion-index__champion-list").findAll('a')
temp = []
retArr = []
for x in champion_links:
temp.append(x['href'] + "/" + x.find('span').text.strip().lower() + "/build")
for link in temp:
retArr.append('http://op.gg'+ link)
return retArr
#the individual scraping of each champion page of op.gg
def scrape_champion_page(self, url):
print("we are scraping " + url)
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
request = requests.get(url, headers=headers)
soup = bs(request.text, 'html.parser')
soup.find('h1', class_= "champion-stats-header-info__name").find('span').extract()
name = soup.find('h1', class_= "champion-stats-header-info__name").text.strip()
rate = soup.findAll('div', class_= 'champion-stats-trend-rate')
win_rate = rate[0].text.strip()
pick_rate = rate[1].text.strip()
champ_tier = soup.find('div', class_='champion-stats-header-info__tier').b.text
counter_tmp = soup.find('table', class_='champion-stats-header-matchup__table champion-stats-header-matchup__table--strong tabItem').tbody
counter_list = counter_tmp.findAll('tr')
counter = []
for i in counter_list:
i.find('img').extract()
counter.append(i.find('td', class_='champion-stats-header-matchup__table__champion').text.strip())
strong_tmp = soup.find('table', class_='champion-stats-header-matchup__table champion-stats-header-matchup__table--weak tabItem').tbody
strong_list = strong_tmp.findAll('tr')
strong = []
for i in strong_list:
i.find('img').extract()
strong.append(i.find('td', class_='champion-stats-header-matchup__table__champion').text.strip())
return name, win_rate, pick_rate, champ_tier, counter, strong
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment