Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to sqlalchemy and queries #154

Merged
merged 8 commits into from
Apr 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ dist/*
.idea/*

venv*
tests/test.db-*
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,17 @@ You can now navigate to the notebooks directory and start using the detective pa
## Try out detective online
You can try out the latest version of detective from pypi without installing anything. If you click on the 'launch binder' button above, detective will be started in a Docker container online using the [Binderhub](https://binderhub.readthedocs.io) service. Run the example notebook to explore detective, and use the `Upload` button to upload your own `home-assistant_v2.db` database file for analysis. Note that all data is deleted when the container closes down, so this service is just for trying out detective.

## Development
## Development (VScode)
* Create a venv: `python3 -m venv venv`
* Activate venv: `source venv/bin/activate`
* Install requirements: `pip3 install -r requirements.txt`
* Install detective in development mode: `pip3 install -e .`
* Optional install Jupyter to run the notebooks: `pip3 install jupyterlab`
* Run jupyter, ensuring from venv: `venv/bin/jupyter lab`
* Install Jupyterlab to run the notebooks: `pip3 install jupyterlab`
* Open the notebook at `notebooks/Getting started with detective.ipynb`

### Running tests
* Install dependencies: `pip3 install -r requirements_test.txt`
* Run: `pytest .`
* Run: `pytest tests`

## Contributors
Big thanks to [@balloob](https://github.com/balloob) and [@frenck](https://github.com/frenck), checkout their profiles!
85 changes: 35 additions & 50 deletions detective/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def __init__(self, url, *, fetch_entities=True):
try:
self.engine = create_engine(url)
print("Successfully connected to database", stripped_db_url(url))
self.con = self.engine.connect()
if fetch_entities:
self.fetch_entities()
except Exception as exc:
Expand All @@ -70,7 +71,8 @@ def __init__(self, url, *, fetch_entities=True):
def perform_query(self, query, **params):
"""Perform a query."""
try:
return self.engine.execute(query, params)
with self.engine.connect() as conn:
return conn.execute(query, params)
except:
print(f"Error with query: {query}")
raise
Expand All @@ -79,7 +81,7 @@ def fetch_entities(self) -> None:
"""Fetch entities for which we have data."""
query = text(
"""
SELECT DISTINCT(entity_id) FROM states
SELECT DISTINCT(entity_id) FROM states_meta
"""
)
response = self.perform_query(query)
Expand All @@ -88,7 +90,7 @@ def fetch_entities(self) -> None:
self.entities = [e[0] for e in response]
print(f"There are {len(self.entities)} entities with data")

def fetch_all_sensor_data(self, limit=50000, get_attributes=False) -> pd.DataFrame:
def fetch_all_sensor_data(self, limit=50000) -> pd.DataFrame:
"""
Fetch data for all sensor entities.

Expand All @@ -97,38 +99,28 @@ def fetch_all_sensor_data(self, limit=50000, get_attributes=False) -> pd.DataFra
If None, there is no limit.
- get_attributes: If True, LEFT JOIN the attributes table to retrieve event's attributes.
"""

if get_attributes:
query = """
SELECT entity_id, state, last_updated, shared_attrs
"""
else:
query = """
SELECT entity_id, state, last_updated
"""

query += "FROM states"

if get_attributes:
query += """
LEFT JOIN state_attributes ON states.attributes_id = state_attributes.attributes_id
"""

query += """

query = """
SELECT states.state, states.last_updated_ts, states_meta.entity_id
FROM states
JOIN states_meta
ON states.metadata_id = states_meta.metadata_id
WHERE
entity_id LIKE '%sensor%'
states_meta.entity_id LIKE '%sensor%'
AND
state NOT IN ('unknown', 'unavailable')
ORDER BY last_updated DESC
"""
states.state NOT IN ('unknown', 'unavailable')
ORDER BY last_updated_ts DESC
"""

if limit is not None:
query += f"LIMIT {limit}"
df = pd.read_sql_query(query, self.url)
print(query)
query = text(query)
df = pd.read_sql_query(query, con=self.con)
print(f"The returned Pandas dataframe has {df.shape[0]} rows of data.")
return df

def fetch_all_data_of(self, sensors: Tuple[str], limit=50000, get_attributes=False) -> pd.DataFrame:
def fetch_all_data_of(self, sensors: Tuple[str], limit=50000) -> pd.DataFrame:
"""
Fetch data for sensors.

Expand All @@ -141,33 +133,26 @@ def fetch_all_data_of(self, sensors: Tuple[str], limit=50000, get_attributes=Fal
if len(sensors) == 1:
sensors_str = sensors_str.replace(",", "")

if get_attributes:
query = """
SELECT entity_id, state, last_updated, shared_attrs
"""
else:
query = """
SELECT entity_id, state, last_updated
"""

query += "FROM states"

if get_attributes:
query += """
LEFT JOIN state_attributes ON states.attributes_id = state_attributes.attributes_id
"""

query += f"""
WHERE
query = f"""
WITH combined_states AS (
SELECT states.state, states.last_updated_ts, states_meta.entity_id
FROM states
JOIN states_meta
ON states.metadata_id = states_meta.metadata_id
)
SELECT *
FROM combined_states
WHERE
entity_id IN {sensors_str}
AND
state NOT IN ('unknown', 'unavailable')
ORDER BY last_updated DESC
"""
ORDER BY last_updated_ts DESC
"""

if limit is not None:
query += f"LIMIT {limit}"

df = pd.read_sql_query(query, self.url)
print(query)
query = text(query)
df = pd.read_sql_query(query, con=self.con)
print(f"The returned Pandas dataframe has {df.shape[0]} rows of data.")
return df
4 changes: 2 additions & 2 deletions detective/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
def format_dataframe(df: pd.DataFrame) -> pd.DataFrame:
"""Convert states to numeric where possible and format the last_changed."""
df["state"] = pd.to_numeric(df["state"], errors="coerce")
df["last_updated"] = pd.to_datetime(
df["last_updated"].values, errors="ignore", utc=True
df["last_updated_ts"] = pd.to_datetime(
df["last_updated_ts"].values, errors="ignore", utc=True
).tz_localize(None)
df = df.dropna()
return df
Loading