import pandas as pd
from sqlalchemy import create_engine
from dkutils.util import FileNameGenerator
[docs]class DataFrameWrapper:
"""
Class that facilitates running a query in a database via a Pandas Dataframe and then writing the data to files.
A number of formats are supported including png, html and text files.
"""
def __init__(self, connection_string: str):
self.engine = create_engine(connection_string)
self.file_name_generator = FileNameGenerator()
[docs] @classmethod
def snowflake(
cls, username: str, password: str, snowflake_account: str, database: str, warehouse: str
):
"""
Create a DataFrame with a connection to a Snowflake database
Parameters
----------
username: str
Snowflake username
password: str
Snowflake password
snowflake_account: str
Snowflake account name
database: str
Snowflake database name
warehouse
Snowflake warehouse name
Returns
-------
DataFrameWrapper
a DataFramewrapper that can be used to execute queries against a snowflake database
"""
connection_string = f"snowflake://{username}:{password}@{snowflake_account}/{database}?warehouse={warehouse}"
return DataFrameWrapper(connection_string)
[docs] @classmethod
def postgresql(
cls, username: str, password: str, hostname: str, database: str, port: int = 5432
):
"""
Create a DataFrame with a connection to a postgreSQL database
Parameters
----------
username: str
PostgreSQL username
password: str
PostgreSQL password
hostname: str
PostgreSQL host name
database: str
PostgreSQL database name
port: int, optional
PostgreSQL port
Returns
-------
DataFrameWrapper
a DataFramewrapper that can be used to execute queries against a PostgreSQL database
"""
connection_string = f"postgresql://{username}:{password}@{hostname}:{port}/{database}"
return DataFrameWrapper(connection_string)
[docs] @classmethod
def mssql(cls, username: str, password: str, hostname: str, database: str, port: int = 1433):
"""
Create a DataFrame with a connection to a mssql database
Parameters
----------
username: str
mssql username
password: str
mssql password
hostname: str
mssql host name
database: str
mssql database name
port: int, optional
mssql port
Returns
-------
DataFrameWrapper
a DataFramewrapper that can be used to execute queries against a PostgreSQL database
"""
connection_string = f"mssql+pymssql://{username}:{password}@{hostname}:{port}/{database}"
return DataFrameWrapper(connection_string)
[docs] def create_report(self, query: str, query_type: str, filename=None, additional_parms=None):
"""
This method will execute the given query and then write out the results as an html, png or txt file depending on
the given query_type. The file will named with the given filename. If the the filename parameter then the file
will be named file_ddd.ext. Where ddd will be a three digit number with two leading zeros that is incremented
for each time this method is called with a particular file type. The extension will be determined by the
query_type as follows:
html = html
plot = png
text = txt
Parameters
----------
query: str
A string containing the query to be executed
query_type: str
A string containing the type of file to be created. Must be either html, ping or text
filename: str, optional
The filename to be assigned when saving the query results
additional_parms: dict, optional
A dictionary containing additional the will be passed to the either to to_html, plot or to_string
methods of the dataframe
Returns
-------
int
A integer containing the number of rows returned by the query
str
A string containing the name of the file created
"""
def handle_html(file_name):
if not file_name:
file_name = self.file_name_generator.getFileName('html')
if additional_parms:
df.to_html(file_name, **additional_parms)
else:
df.to_html(file_name)
return file_name
def handle_plot(file_name):
if not file_name:
file_name = self.file_name_generator.getFileName('png')
if additional_parms:
fig = df.plot(**additional_parms).get_figure()
else:
fig = df.plot().get_figure()
fig.savefig(file_name)
return file_name
def handle_text(file_name):
if not file_name:
file_name = self.file_name_generator.getFileName('txt')
if additional_parms:
df.to_string(file_name, **additional_parms)
else:
df.to_string(file_name)
return file_name
handlers = {'html': handle_html, 'plot': handle_plot, 'text': handle_text}
if query_type not in handlers.keys():
raise TypeError(f"query_type must be one of {[*handlers]} but was: {query_type}")
df = pd.read_sql(query, self.engine)
total_rows = len(df.index)
return (total_rows, handlers[query_type](filename))