Merge branch 'week1' into 'main'

Implementation of Week1 See merge request jim20/fa21-cs242-project!1

Merge branch 'week1' into 'main'
1e7d0c41 · pkim62 · 4ac40373 · 75dd7252 · 1e7d0c41 · 1e7d0c41
Commit 1e7d0c41 authored 3 years ago by pkim62
--- a/README.md
+++ b/README.md
 # fa21-cs242-project

+ The project : fa21-cs242-project (week1) consists of python modules and classes  
+ that include functions to take care of many of the core functionalities  
+ of web scraping, transferring data to an external database, and creating our own API.  
+ The scraper class utilizes the Beautiful Soup Library to be able to scrape data  
+ off of the website op.gg and retrieves desired information in a nice format.  
+ The Database Python Module utilizes the MongoDB Database services and stores data  
+ that has been fetched by the scraper class mentioned above.  
+ The api module has taken care of all types of API requests including (GET, PUT, POST, DELETE).  
+ Finally, the 20 required unit tests have been written inside the test module and test various  
+ aspects of the code including scraper, API, and database functionality.
--- a/Test/__pycache__/test_one.cpython-39.pyc
+++ b/Test/__pycache__/test_one.cpython-39.pyc
--- a/Test/key.env
+++ b/Test/key.env
+SECRET_KEY = "mongodb+srv://JohnIm:4MY7jaApcsPmj4Kl@cluster0.0fsik.mongodb.net/Cluster0?ssl=true&ssl_cert_reqs=CERT_NONE"
\ No newline at end of file
--- a/Test/test_one.py
+++ b/Test/test_one.py
+import unittest
+import sys
+sys.path.insert(0, '../src')
+from database import valid_champ
+from scraper import Scraper
+import requests
+import json
+
+class TestFinal(unittest.TestCase):
+    #unittest.mock for db testing
+    
+    def setUp(self):
+        print("Starting another test.")
+    
+    """Test valid_champ() in scrape tell if authors are valid"""    
+    def test_valid_champ(self):
+        doc = {   "name":"Lex",
+               "pick_rate" : "5.7%",
+               "win_rate" : "79%",
+               "champ_tier" :"Tier 0",
+               "counter_champs": ["Alistar", "Akali", "Zed"],
+               "strong_against" :["Yasuo", "Yone", "Riven"] 
+            }
+        self.assertTrue(valid_champ(doc))
+        
+    """Test valid_champ() in scrape can tell if authors are invalid """    
+    def test_valid_champ_invalid(self):
+        doc = {
+             "name":"Lex",
+             "pick_rate" : "5.7%",
+             "win_rate" : "79%",
+             "champ_tier" :"Tier 0",
+             "counter_champs": ["Alistar", "Akali", "Zed"]
+            }
+        self.assertFalse(valid_champ(doc))
+        
+    def test_add_champ(self):
+        """Test API POST Request"""    
+        test_champ = {
+            "name":"Lex",
+            "pick_rate" : "5.7%",
+            "win_rate" : "79%",
+            "champ_tier" :"Tier 0",
+            "counter_champs": ["Alistar", "Akali", "Zed"]
+            }
+        response = requests.post('http://127.0.0.1:5000/champion', json=test_champ)
+        self.assertEqual(response.status_code, 200)
+        
+    def test_invalid_add_champ(self):
+        """Test Invalid API POST Request""" 
+        test_champ = {
+            "name":"Lex",
+            "pick_rate" : "5.7%",
+            "win_rate" : "79%",
+            "champ_tier" :"Tier 0",
+            "counter_champs": ["Alistar", "Akali", "Zed"],
+            "kekW" : "lol"
+            }
+        response = requests.post('http://127.0.0.1:5000/champion', json=test_champ)
+        self.assertEqual(response.json()['status'], 400)
+       
+    
+    def test_get_champs(self):
+        """Test API GET All Champs Request""" 
+        response = requests.get('http://127.0.0.1:5000/champions')
+        self.assertEqual(response.status_code, 200)
+    
+    def test_invalid_get_champs(self):
+        """Test invalid API GET All Champs Request""" 
+        response = requests.get('http://127.0.0.1:5000/champions', params = {'name' : "Aatrox"})
+        self.assertEqual(response.json()['status'], 400)
+        
+    def test_get_champ(self):
+        """Test API GET Request""" 
+        response = requests.get('http://127.0.0.1:5000/champion', params = {'name' : "Aatrox"})
+        self.assertEqual(response.status_code, 200)
+        
+    def test_invalid_get_champ(self):
+        """Test invalid API GET Request""" 
+        response = requests.get('http://127.0.0.1:5000/champion', params = {'name' : "invalid_name"})
+        self.assertEqual(response.json()['result'][0]['status'], 400)
+       
+    def test_update_champ(self):
+        """Test API PUT Request""" 
+        test_champ = {
+            "name":"Lex",
+            "pick_rate" : "5.7%",
+            "win_rate" : "79%",
+            "champ_tier" :"Tier 0",
+            "counter_champs": ["Alistar", "Akali", "Zed"]
+            }
+        response = requests.put('http://127.0.0.1:5000/champion', params = {'name' : "Lex"}, json=test_champ)
+        self.assertEqual(response.status_code, 200)
+        
+    def test_invalid_update_champ(self):
+        """Test Invalid API PUT Request""" 
+        test_champ = {
+            "name":"Lex",
+            "pick_rate" : "5.7%",
+            "win_rate" : "79%",
+            "champ_tier" :"Tier 0",
+            "counter_champs": ["Alistar", "Akali", "Zed"]
+            }
+        response = requests.put('http://127.0.0.1:5000/champion',  json=test_champ)
+        self.assertEqual(response.json()['status'], 400)
+    
+    def test_delete_champ(self):
+        """Test API DELETE Request"""
+        response = requests.delete('http://127.0.0.1:5000/champion', params = {'name' : "Lex"})
+        self.assertEqual(response.status_code, 200)
+    
+    def test_invalid_delete_champ(self):
+        """Test invalid API DELETE Request"""
+        response = requests.delete('http://127.0.0.1:5000/champion', params = {'name' : "Lex", 'invalid': 'invalid'})
+        self.assertEqual(response.json()['status'], 400)
+        
+        
+    def test_scrape_champ1(self):
+        """Test Scraping a Champion"""
+        s = Scraper()
+        url = 'https://na.op.gg/champion/nasus/statistics/top/build'
+        ret_arr = s.scrape_champion_page(url)
+        self.assertEqual(ret_arr[0], 'Nasus')
+    def test_scrape_champ2(self):
+        """Test Scraping a Champion"""
+        s = Scraper()
+        url = 'https://na.op.gg/champion/aatrox/statistics/top/build'
+        ret_arr = s.scrape_champion_page(url)
+        self.assertEqual(ret_arr[0], 'Aatrox')
+    def test_scrape_champ3(self):
+        """Test Scraping a Champion"""
+        s = Scraper()
+        url = 'https://na.op.gg/champion/graves/statistics/top/build'
+        ret_arr = s.scrape_champion_page(url)
+        self.assertEqual(ret_arr[0], 'Graves')
+    
+    def test_scrape_champ4(self):
+        """Test Scraping a Champion"""
+        s = Scraper()
+        url = 'https://na.op.gg/champion/akali/statistics/mid/build'
+        ret_arr = s.scrape_champion_page(url)
+        self.assertEqual(ret_arr[0], 'Akali')
+        
+    def test_scrape_champ5(self):
+        """Test Scraping a Champion"""
+        s = Scraper()
+        url = 'https://na.op.gg/champion/zed/statistics/mid/build'
+        ret_arr = s.scrape_champion_page(url) 
+        self.assertEqual(ret_arr[0], 'Zed')
+    
+    def test_scrape_champ6(self):
+        """Test Scraping a Champion"""
+        s = Scraper()
+        url = 'https://na.op.gg/champion/lucian/statistics/mid/build'
+        ret_arr = s.scrape_champion_page(url) 
+        self.assertEqual(ret_arr[0], 'Lucian')
+    
+    def test_scrape_champ7(self):
+        """Test Scraping a Champion"""
+        s = Scraper()
+        url = 'https://na.op.gg/champion/kennen/statistics/top/build'
+        ret_arr = s.scrape_champion_page(url) 
+        self.assertEqual(ret_arr[0], 'Kennen')
+    
+    def test_scrape_champ8(self):
+        """Test Scraping a Champion"""
+        s = Scraper()
+        url = 'https://na.op.gg/champion/zoe/statistics/mid/build'
+        ret_arr = s.scrape_champion_page(url) 
+        self.assertEqual(ret_arr[0], 'Zoe')
+    
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file
--- a/src/api.py
+++ b/src/api.py
+'''
+set FLASK_APP=src/api
+flask run
+api for our champions application
+'''
+
+import datetime
+import requests
+import json
+import pymongo
+import argparse
+from flask import Flask, request, jsonify, render_template
+from database import get_key, get_collection, valid_champ
+
+
+app = Flask(__name__)
+
+@app.route('/champions', methods=['GET'])
+def getAllChampions():
+    """
+    Function for an API Get Request
+    """
+    if len(request.args) > 0 :
+        bad_input_error = {
+                'status': 400,
+                "error":"Bad Request"
+                }
+        
+        return bad_input_error
+    champions = get_collection()
+    output = []
+    if champions.find() is None:
+        output.append({'time' : datetime.datetime.now(), 'status' : 400,
+                       'message' : 'Get Failed.'})
+        return jsonify(output)
+    for champ in champions.find():
+        output.append({"name" : champ['name'],
+        'win_rate' :  champ['win_rate'],
+        'pick_rate' : champ['pick_rate'],
+        'counter_champs' : champ['counter_champs'],
+        'strong_against' : champ['strong_against']})
+    return jsonify({'result' : output})
+
+@app.route('/champion', methods=['GET'])
+def getChampion():
+    """
+    Function for an API GET Request by name 
+    """
+    champ_name = request.args.get("name")
+    champions = get_collection()
+    output = []
+    champ = champions.find_one({'name' : champ_name})
+    if champ:
+        output.append({"name" : champ['name'],
+        'win_rate' :  champ['win_rate'],
+        'pick_rate' : champ['pick_rate'],
+        'counter_champs' : champ['counter_champs'],
+        'strong_against' : champ['strong_against']})
+        return jsonify(output)
+    output.append({'time' : datetime.datetime.now(), 'status' : 400,
+                       'message' : 'Get Failed.'})
+    return jsonify({'result' : output})
+
+"""PUT request for champion."""
+@app.route('/champion', methods=['PUT'])
+def put_champion():
+    id_ = request.args.get("name") # @UndefinedVariable
+    update_values = request.get_json()
+    if id_ is None or update_values is None or len(request.args) > 1:
+        bad_input_error = {
+                "status": 400,
+                "error":"Bad Request"
+                }
+        
+        return bad_input_error
+    
+    result = get_collection().update({'name' : id_}, {"$set": update_values})
+    
+    if not result["updatedExisting"]:
+        error = {
+                "status": 500,
+                "error":"Internal Server Error",
+                "message":"No champions found with given name"
+                }
+        
+        return error
+    
+    return "updated champion entry: " + id_
+
+"""POST request for champion."""
+@app.route('/champion', methods=['POST'])
+def make_new_champion():
+    update_values = request.get_json()
+    
+    if update_values is None or len(request.args) > 0 or not valid_champ(request.get_json()):
+        bad_input_error = {
+                "status": 400,
+                "error":"Bad Request"
+                }
+        
+        return bad_input_error
+    
+    r = get_collection().update({'name' : update_values['name']}, {"$setOnInsert":update_values}, upsert=True)
+    
+    if r["updatedExisting"]:
+        error = {
+                "status": 500,
+                "error":"Internal Server Error",
+                "message":"Champion entry already exists"
+                }
+        
+        return error
+    
+    return jsonify({'result' : update_values})
+
+"""DELETE a champion."""
+@app.route('/champion', methods=['DELETE'])
+def delete_champion():
+    _id = request.args.get("name") # @UndefinedVariable
+    
+    if _id is None or len(request.args) > 1:
+        bad_input_error = {
+                "status": 400,
+                "error":"Bad Request"
+                }
+        
+        return bad_input_error
+    
+    get_collection().remove({"name" : _id})
+    
+    return "deleted champion with name " + _id
+
--- a/src/database.py
+++ b/src/database.py
+'''
+database handler that uses enviroment variables to hide our key for our database from github
+'''
+import pymongo
+import os
+from dotenv import load_dotenv
+from scraper import Scraper
+import time
+
+def get_key():
+    """
+    This function gets the unique key to access the MongoDB database collection
+    be in src in terminal
+    """
+    load_dotenv('./key.env')
+    return os.getenv("SECRET_KEY")
+
+def database_handler(ret_arr1):
+    """
+    This function handles the web scraped data of a book and transfers it into the MongoDB database.
+    """
+    if ret_arr1 is not None:
+        client = pymongo.MongoClient(get_key())
+        database = client.Collection
+        collection = database.Champions
+        champ = {"name": ret_arr1[0],
+                "win_rate" : ret_arr1[1],
+                "pick_rate" : ret_arr1[2],
+                "champ_tier" : ret_arr1[3],
+                "counter_champs" : ret_arr1[4],
+                "strong_against" : ret_arr1[5],
+                }
+        collection.update(champ, champ, upsert = True)
+
+def get_collection():
+    """
+    this function returns the collection of database
+    """
+    client = pymongo.MongoClient(get_key())
+    data_base = client['Collection']
+    champions = data_base["Champions"]
+    return champions
+
+#This Function checks if doc is author doc  
+def valid_champ(doc):
+    return (bool(doc.get('name')) and bool(doc.get('pick_rate')) and bool(doc.get('win_rate'))
+            and bool(doc.get('champ_tier')) and bool(doc.get("counter_champs"))
+            and bool(doc.get("strong_against")))
\ No newline at end of file
--- a/src/scrape_exe.py
+++ b/src/scrape_exe.py
+from scraper import Scraper
+import time
+from database import get_key
+from database import database_handler
+
+"""
+Implements the scraping of data of all the champions from op.gg
+each champions data is based off their current meta role
+e.g aatrox meta role is top lane so we scrape data based of his top lane stats
+"""
+def main():
+    s = Scraper()
+    arr = s.scrape_champion_links()
+    c_counter = 0
+    N = len(arr)
+    while(c_counter < N):
+        retArr = s.scrape_champion_page(arr[c_counter])
+        if retArr is None:
+            continue
+        c_counter+=1;
+        time.sleep(10)
+        database_handler(retArr)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
--- a/src/scraper.py
+++ b/src/scraper.py
+"""
+Implements the scraping of data from op.gg
+"""
+import re
+import requests
+from bs4 import BeautifulSoup as bs
+
+class Scraper:
+    #This Function is the Constructor of the Scrape Class
+    def __init__(self):
+        print("scraper start")
+    
+    #this function scrapes the links of all champion pages of all the champions on op.gg
+    def scrape_champion_links(self):
+        url = 'https://na.op.gg/champion/statistics'
+        headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
+        request = requests.get(url, headers=headers)
+        soup = bs(request.text, 'html.parser')
+        champion_links = soup.find('div', class_="champion-index__champion-list").findAll('a')
+        temp = []
+        retArr = []
+        for x in champion_links:
+            temp.append(x['href'] + "/" + x.find('span').text.strip().lower() + "/build")
+        for link in temp:
+            retArr.append('http://op.gg'+ link)
+        return retArr
+        
+    #the individual scraping of each champion page of op.gg
+    def scrape_champion_page(self, url):
+        print("we are scraping " + url)
+        headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
+        request = requests.get(url, headers=headers)
+        soup = bs(request.text, 'html.parser')
+        
+        soup.find('h1', class_= "champion-stats-header-info__name").find('span').extract()
+        name = soup.find('h1', class_= "champion-stats-header-info__name").text.strip()
+        rate = soup.findAll('div', class_= 'champion-stats-trend-rate')
+        win_rate = rate[0].text.strip()
+        pick_rate = rate[1].text.strip()
+        champ_tier = soup.find('div', class_='champion-stats-header-info__tier').b.text
+        
+        counter_tmp = soup.find('table', class_='champion-stats-header-matchup__table champion-stats-header-matchup__table--strong tabItem').tbody
+        counter_list = counter_tmp.findAll('tr')
+        counter = []
+        for i in counter_list:
+            i.find('img').extract()
+            counter.append(i.find('td', class_='champion-stats-header-matchup__table__champion').text.strip())
+        
+        strong_tmp = soup.find('table', class_='champion-stats-header-matchup__table champion-stats-header-matchup__table--weak tabItem').tbody
+        strong_list = strong_tmp.findAll('tr')
+        strong = []
+        for i in strong_list:
+            i.find('img').extract()
+            strong.append(i.find('td', class_='champion-stats-header-matchup__table__champion').text.strip())
+            
+        
+        
+        return name, win_rate, pick_rate, champ_tier, counter, strong
\ No newline at end of file