Skip to content
Snippets Groups Projects
Commit e607aebb authored by Jiaqi Zhao (Roy)'s avatar Jiaqi Zhao (Roy)
Browse files

edited notebook

parent 1acd2096
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:cb73c637-68cf-4617-940b-eb8cf781fb0c tags:
``` python
!pip install pyathena
```
%% Output
Collecting pyathena
Downloading pyathena-3.12.2-py3-none-any.whl.metadata (6.3 kB)
Collecting boto3>=1.26.4 (from pyathena)
Downloading boto3-1.37.33-py3-none-any.whl.metadata (6.7 kB)
Collecting botocore>=1.29.4 (from pyathena)
Downloading botocore-1.37.33-py3-none-any.whl.metadata (5.7 kB)
Requirement already satisfied: fsspec in /opt/homebrew/lib/python3.11/site-packages (from pyathena) (2023.10.0)
Requirement already satisfied: python-dateutil in /opt/homebrew/lib/python3.11/site-packages (from pyathena) (2.8.2)
Collecting tenacity>=4.1.0 (from pyathena)
Downloading tenacity-9.1.2-py3-none-any.whl.metadata (1.2 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from boto3>=1.26.4->pyathena)
Using cached jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)
Collecting s3transfer<0.12.0,>=0.11.0 (from boto3>=1.26.4->pyathena)
Using cached s3transfer-0.11.4-py3-none-any.whl.metadata (1.7 kB)
Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /opt/homebrew/lib/python3.11/site-packages (from botocore>=1.29.4->pyathena) (2.0.7)
Requirement already satisfied: six>=1.5 in /opt/homebrew/lib/python3.11/site-packages (from python-dateutil->pyathena) (1.16.0)
Downloading pyathena-3.12.2-py3-none-any.whl (75 kB)
Downloading boto3-1.37.33-py3-none-any.whl (139 kB)
Downloading botocore-1.37.33-py3-none-any.whl (13.5 MB)
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.5/13.5 MB 8.2 MB/s eta 0:00:00a 0:00:01
[?25hDownloading tenacity-9.1.2-py3-none-any.whl (28 kB)
Using cached jmespath-1.0.1-py3-none-any.whl (20 kB)
Using cached s3transfer-0.11.4-py3-none-any.whl (84 kB)
Installing collected packages: tenacity, jmespath, botocore, s3transfer, boto3, pyathena
Successfully installed boto3-1.37.33 botocore-1.37.33 jmespath-1.0.1 pyathena-3.12.2 s3transfer-0.11.4 tenacity-9.1.2
[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python3.11 -m pip install --upgrade pip
%% Cell type:code id:9dac8beb-e37a-4a84-b1b7-79df228ce9f8 tags:
``` python
import pandas as pd
from pyathena import connect
import matplotlib.pyplot as plt
import boto3
from pyathena import connect
# Establish connection to Athena
conn = connect(s3_staging_dir='s3://mylesathenaresults/', region_name='us-west-2')
conn = connect(s3_staging_dir='s3://step3.3/', region_name='us-east-2')
```
%% Cell type:code id:b9fd614aafe049e tags:
``` python
# Query Athena
query = 'SELECT device_id, data.vehicle_CO2 FROM vehicledatabase.lab4data_mylesai2'
query = 'SELECT device_id, data.vehicle_CO2 FROM vehicledatabase.lab4data_mylesai2' # change this!
df = pd.read_sql(query, conn)
print("Column names in the DataFrame:", df.columns)
```
%% Output
/var/folders/jm/rgjqkd2s5j5d309wnmpsvddr0000gn/T/ipykernel_75159/1256295774.py:3: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.
df = pd.read_sql(query, conn)
Failed to execute query.
Traceback (most recent call last):
File "/opt/homebrew/lib/python3.11/site-packages/pyathena/common.py", line 586, in _execute
query_id = retry_api_call(
^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/pyathena/util.py", line 84, in retry_api_call
return retry(func, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/tenacity/__init__.py", line 477, in __call__
do = self.iter(retry_state=retry_state)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/tenacity/__init__.py", line 378, in iter
result = action(retry_state)
^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/tenacity/__init__.py", line 400, in <lambda>
self._add_action_func(lambda rs: rs.outcome.result())
^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/Cellar/python@3.11/3.11.11/Frameworks/Python.framework/Versions/3.11/lib/python3.11/concurrent/futures/_base.py", line 449, in result
return self.__get_result()
^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/Cellar/python@3.11/3.11.11/Frameworks/Python.framework/Versions/3.11/lib/python3.11/concurrent/futures/_base.py", line 401, in __get_result
raise self._exception
File "/opt/homebrew/lib/python3.11/site-packages/tenacity/__init__.py", line 480, in __call__
result = fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/botocore/client.py", line 570, in _api_call
return self._make_api_call(operation_name, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/botocore/context.py", line 124, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/botocore/client.py", line 1031, in _make_api_call
raise error_class(parsed_response, operation_name)
botocore.exceptions.ClientError: An error occurred (AccessDeniedException) when calling the StartQueryExecution operation: You are not authorized to perform: athena:StartQueryExecution on the resource. After your AWS administrator or you have updated your permissions, please try again.
---------------------------------------------------------------------------
ClientError Traceback (most recent call last)
File /opt/homebrew/lib/python3.11/site-packages/pyathena/common.py:586, in BaseCursor._execute(self, operation, parameters, work_group, s3_staging_dir, cache_size, cache_expiration_time, result_reuse_enable, result_reuse_minutes, paramstyle)
585 try:
--> 586 query_id = retry_api_call(
587 self._connection.client.start_query_execution,
588 config=self._retry_config,
589 logger=_logger,
590 **request,
591 ).get("QueryExecutionId")
592 except Exception as e:
File /opt/homebrew/lib/python3.11/site-packages/pyathena/util.py:84, in retry_api_call(func, config, logger, *args, **kwargs)
69 retry = tenacity.Retrying(
70 retry=retry_if_exception(
71 lambda e: getattr(e, "response", {}).get("Error", {}).get("Code") in config.exceptions
(...)
82 reraise=True,
83 )
---> 84 return retry(func, *args, **kwargs)
File /opt/homebrew/lib/python3.11/site-packages/tenacity/__init__.py:477, in Retrying.__call__(self, fn, *args, **kwargs)
476 while True:
--> 477 do = self.iter(retry_state=retry_state)
478 if isinstance(do, DoAttempt):
File /opt/homebrew/lib/python3.11/site-packages/tenacity/__init__.py:378, in BaseRetrying.iter(self, retry_state)
377 for action in self.iter_state.actions:
--> 378 result = action(retry_state)
379 return result
File /opt/homebrew/lib/python3.11/site-packages/tenacity/__init__.py:400, in BaseRetrying._post_retry_check_actions.<locals>.<lambda>(rs)
399 if not (self.iter_state.is_explicit_retry or self.iter_state.retry_run_result):
--> 400 self._add_action_func(lambda rs: rs.outcome.result())
401 return
File /opt/homebrew/Cellar/python@3.11/3.11.11/Frameworks/Python.framework/Versions/3.11/lib/python3.11/concurrent/futures/_base.py:449, in Future.result(self, timeout)
448 elif self._state == FINISHED:
--> 449 return self.__get_result()
451 self._condition.wait(timeout)
File /opt/homebrew/Cellar/python@3.11/3.11.11/Frameworks/Python.framework/Versions/3.11/lib/python3.11/concurrent/futures/_base.py:401, in Future.__get_result(self)
400 try:
--> 401 raise self._exception
402 finally:
403 # Break a reference cycle with the exception in self._exception
File /opt/homebrew/lib/python3.11/site-packages/tenacity/__init__.py:480, in Retrying.__call__(self, fn, *args, **kwargs)
479 try:
--> 480 result = fn(*args, **kwargs)
481 except BaseException: # noqa: B902
File /opt/homebrew/lib/python3.11/site-packages/botocore/client.py:570, in ClientCreator._create_api_method.<locals>._api_call(self, *args, **kwargs)
569 # The "self" in this scope is referring to the BaseClient.
--> 570 return self._make_api_call(operation_name, kwargs)
File /opt/homebrew/lib/python3.11/site-packages/botocore/context.py:124, in with_current_context.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
123 hook()
--> 124 return func(*args, **kwargs)
File /opt/homebrew/lib/python3.11/site-packages/botocore/client.py:1031, in BaseClient._make_api_call(self, operation_name, api_params)
1030 error_class = self.exceptions.from_code(error_code)
-> 1031 raise error_class(parsed_response, operation_name)
1032 else:
ClientError: An error occurred (AccessDeniedException) when calling the StartQueryExecution operation: You are not authorized to perform: athena:StartQueryExecution on the resource. After your AWS administrator or you have updated your permissions, please try again.
The above exception was the direct cause of the following exception:
DatabaseError Traceback (most recent call last)
File /opt/homebrew/lib/python3.11/site-packages/pandas/io/sql.py:2262, in SQLiteDatabase.execute(self, sql, params)
2261 try:
-> 2262 cur.execute(sql, *args)
2263 return cur
File /opt/homebrew/lib/python3.11/site-packages/pyathena/cursor.py:89, in Cursor.execute(self, operation, parameters, work_group, s3_staging_dir, cache_size, cache_expiration_time, result_reuse_enable, result_reuse_minutes, paramstyle, **kwargs)
88 self._reset_state()
---> 89 self.query_id = self._execute(
90 operation,
91 parameters=parameters,
92 work_group=work_group,
93 s3_staging_dir=s3_staging_dir,
94 cache_size=cache_size,
95 cache_expiration_time=cache_expiration_time,
96 result_reuse_enable=result_reuse_enable,
97 result_reuse_minutes=result_reuse_minutes,
98 paramstyle=paramstyle,
99 )
100 query_execution = cast(AthenaQueryExecution, self._poll(self.query_id))
File /opt/homebrew/lib/python3.11/site-packages/pyathena/common.py:594, in BaseCursor._execute(self, operation, parameters, work_group, s3_staging_dir, cache_size, cache_expiration_time, result_reuse_enable, result_reuse_minutes, paramstyle)
593 _logger.exception("Failed to execute query.")
--> 594 raise DatabaseError(*e.args) from e
595 return query_id
DatabaseError: An error occurred (AccessDeniedException) when calling the StartQueryExecution operation: You are not authorized to perform: athena:StartQueryExecution on the resource. After your AWS administrator or you have updated your permissions, please try again.
During handling of the above exception, another exception occurred:
NotSupportedError Traceback (most recent call last)
File /opt/homebrew/lib/python3.11/site-packages/pandas/io/sql.py:2266, in SQLiteDatabase.execute(self, sql, params)
2265 try:
-> 2266 self.con.rollback()
2267 except Exception as inner_exc: # pragma: no cover
File /opt/homebrew/lib/python3.11/site-packages/pyathena/connection.py:368, in Connection.rollback(self)
367 def rollback(self) -> None:
--> 368 raise NotSupportedError
NotSupportedError:
The above exception was the direct cause of the following exception:
DatabaseError Traceback (most recent call last)
Cell In[4], line 3
1 # Query Athena
2 query = 'SELECT device_id, data.vehicle_CO2 FROM vehicledatabase.lab4data_mylesai2'
----> 3 df = pd.read_sql(query, conn)
4 print("Column names in the DataFrame:", df.columns)
File /opt/homebrew/lib/python3.11/site-packages/pandas/io/sql.py:654, in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize, dtype_backend, dtype)
652 with pandasSQL_builder(con) as pandas_sql:
653 if isinstance(pandas_sql, SQLiteDatabase):
--> 654 return pandas_sql.read_query(
655 sql,
656 index_col=index_col,
657 params=params,
658 coerce_float=coerce_float,
659 parse_dates=parse_dates,
660 chunksize=chunksize,
661 dtype_backend=dtype_backend,
662 dtype=dtype,
663 )
665 try:
666 _is_table_name = pandas_sql.has_table(sql)
File /opt/homebrew/lib/python3.11/site-packages/pandas/io/sql.py:2326, in SQLiteDatabase.read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize, dtype, dtype_backend)
2315 def read_query(
2316 self,
2317 sql,
(...)
2324 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",
2325 ) -> DataFrame | Iterator[DataFrame]:
-> 2326 cursor = self.execute(sql, params)
2327 columns = [col_desc[0] for col_desc in cursor.description]
2329 if chunksize is not None:
File /opt/homebrew/lib/python3.11/site-packages/pandas/io/sql.py:2271, in SQLiteDatabase.execute(self, sql, params)
2267 except Exception as inner_exc: # pragma: no cover
2268 ex = DatabaseError(
2269 f"Execution failed on sql: {sql}\n{exc}\nunable to rollback"
2270 )
-> 2271 raise ex from inner_exc
2273 ex = DatabaseError(f"Execution failed on sql '{sql}': {exc}")
2274 raise ex from exc
DatabaseError: Execution failed on sql: SELECT device_id, data.vehicle_CO2 FROM vehicledatabase.lab4data_mylesai2
An error occurred (AccessDeniedException) when calling the StartQueryExecution operation: You are not authorized to perform: athena:StartQueryExecution on the resource. After your AWS administrator or you have updated your permissions, please try again.
unable to rollback
%% Cell type:code id:f7bfa8412b804272 tags:
``` python
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment