Skip to content
Snippets Groups Projects
Commit 842e3b69 authored by nwp2's avatar nwp2
Browse files

Update file data.ipynb

parent eee90c8d
Branches main
No related tags found
No related merge requests found
%% Cell type:code id:b95ed24f-1e5f-402a-baac-30acb133e051 tags:
``` python
import boto3
```
%% Cell type:code id:30bd7123-0500-46bd-bc95-5064c8786a08 tags:
``` python
AWS_ACCESS_KEY = 'AKIAZZNHM3AWMAPGYYX2'
AWS_SECRET_KEY = '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'
AWS_REGION = "us-east-2"
athena_client = boto3.client(
"athena",
aws_access_key_id=AWS_ACCESS_KEY,
aws_secret_access_key=AWS_SECRET_KEY,
region_name=AWS_REGION,
)
```
%% Cell type:code id:4feb0d13-add5-4c99-a871-23b7da2e6c1b tags:
``` python
import time
import numpy as np
def run_query(query):
query_response = athena_client.start_query_execution(
QueryString=QUERY,
QueryExecutionContext={"Database": '"AwsDataCatalog"."crawled-database"."myawsbucket_npiphi"'},
ResultConfiguration={
"OutputLocation": "s3://myawsbucket-npiphi/staging",
"EncryptionConfiguration": {"EncryptionOption": "SSE_S3"},
},
)
time.sleep(2)
result = athena_client.get_query_results(
QueryExecutionId=query_response["QueryExecutionId"]
)
headers = [x['VarCharValue'] for x in result['ResultSet']['Rows'][0]['Data']]
body = [x['Data'] for x in result['ResultSet']['Rows'][1:]]
rows = [[float(x['VarCharValue']) for x in x] for x in body]
return (headers,np.array(rows))
```
%% Cell type:code id:fde037bf-cc90-480d-b526-62209b94a3b8 tags:
``` python
QUERY = 'SELECT vehicle_noise, vehicle_fuel FROM "AwsDataCatalog"."crawled-database"."myawsbucket_npiphi" where vehicle_fuel is Not null limit 1000';
query_response = athena_client.start_query_execution(
QueryString=QUERY,
QueryExecutionContext={"Database": '"AwsDataCatalog"."crawled-database"."myawsbucket_npiphi"'},
ResultConfiguration={
"OutputLocation": "s3://myawsbucket-npiphi/staging",
"EncryptionConfiguration": {"EncryptionOption": "SSE_S3"},
},
)
```
%% Cell type:code id:42e949ac-c298-4704-80fb-44344a320d30 tags:
``` python
result = athena_client.get_query_results(
QueryExecutionId=query_response["QueryExecutionId"]
)
```
%% Cell type:code id:2f8f7d17-fc59-4314-aaf5-819f0e4a5ac3 tags:
``` python
import matplotlib.pyplot as plt
QUERY = 'SELECT vehicle_noise, vehicle_fuel FROM "AwsDataCatalog"."crawled-database"."myawsbucket_npiphi" where vehicle_fuel is Not null limit 10000';
headers,rows = run_query(QUERY)
plt.xlabel(headers[0])
plt.ylabel(headers[1])
plt.scatter(rows[:,0], rows[:,1])
```
%% Output
<matplotlib.collections.PathCollection at 0x7feef9de2620>
%% Cell type:code id:b1332efb-cfff-498b-a32d-348206890d0a tags:
``` python
QUERY = 'SELECT vehicle_PMx, vehicle_speed FROM "AwsDataCatalog"."crawled-database"."myawsbucket_npiphi" where vehicle_NOx is Not null and vehicle_NOx != 0 limit 10000';
headers,rows = run_query(QUERY)
plt.xlabel(headers[0])
plt.ylabel(headers[1])
plt.scatter(rows[:,0], rows[:,1])
```
%% Output
<matplotlib.collections.PathCollection at 0x7feef9e46590>
%% Cell type:code id:5e5e93f6-d44b-4986-bd90-2f4f26c94b03 tags:
``` python
QUERY = 'SELECT vehicle_PMx, vehicle_NOx FROM "AwsDataCatalog"."crawled-database"."myawsbucket_npiphi" where vehicle_NOx is Not null and vehicle_NOx != 0 limit 10000';
headers,rows = run_query(QUERY)
plt.xlabel(headers[0])
plt.ylabel(headers[1])
plt.scatter(rows[:,0], rows[:,1])
```
%% Output
<matplotlib.collections.PathCollection at 0x7feef9b78760>
%% Cell type:code id:a39dc881-28fb-46dc-b290-5c97427c477e tags:
``` python
headers
```
%% Output
['vehicle_noise', 'vehicle_fuel']
%% Cell type:code id:7100e61b-3888-4dc2-be1e-fd64b0897f2c tags:
``` python
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment