Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ Follow these steps to set up Apple Health MCP Server in your environment.

4. Lastly, if you're going to be using DuckDB:
- Run `make duckdb` to create a parquet file with your exported XML data
- If you want to connect to the file through http(s):
- The only thing you need to do is change the .env path, e.g. `localhost:8080/applehealth.parquet`
- If you want an example on how to host the files locally, run `uv run tests/fileserver.py`


### Configuration Files

Expand Down Expand Up @@ -235,6 +239,7 @@ The Apple Health MCP Server provides a suite of tools for exploring, searching,
| `search_health_records_es` | Flexible search for health records in Elasticsearch with advanced filtering and query options. |
| `get_statistics_by_type_es` | Get comprehensive statistics (count, min, max, avg, sum) for a specific health record type. |
| `get_trend_data_es` | Analyze trends for a health record type over time (daily, weekly, monthly, yearly aggregations). |
| `search_values_es` | Search for records with exactly matching values (including text). |

### ClickHouse Tools (`ch_reader`)

Expand All @@ -244,6 +249,7 @@ The Apple Health MCP Server provides a suite of tools for exploring, searching,
| `search_health_records_ch` | Flexible search for health records in ClickHouse with advanced filtering and query options. |
| `get_statistics_by_type_ch` | Get comprehensive statistics (count, min, max, avg, sum) for a specific health record type. |
| `get_trend_data_ch` | Analyze trends for a health record type over time (daily, weekly, monthly, yearly aggregations). |
| `search_values_ch` | Search for records with exactly matching values (including text). |

### DuckDB Tools (`duckdb_reader`)

Expand All @@ -253,6 +259,7 @@ The Apple Health MCP Server provides a suite of tools for exploring, searching,
| `search_health_records_duckdb` | Flexible search for health records in DuckDB with advanced filtering and query options. |
| `get_statistics_by_type_duckdb` | Get comprehensive statistics (count, min, max, avg, sum) for a specific health record type. |
| `get_trend_data_duckdb` | Analyze trends for a health record type over time (daily, weekly, monthly, yearly aggregations). |
| `search_values_duckdb` | Search for records with exactly matching values (including text). |

All tools are accessible via MCP-compatible clients and can be used with natural language or programmatic queries to explore and analyze your Apple Health data.

Expand Down
4 changes: 2 additions & 2 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class Settings(BaseSettings):
CH_DB_NAME: str = "applehealth"
CH_TABLE_NAME: str = "data"

DUCKDB_FILENAME: str = "applehealth"
DUCKDB_FILENAME: str = "applehealth.parquet"

CHUNK_SIZE: int = 50_000

Expand All @@ -54,7 +54,7 @@ def assemble_cors_origins(cls, v: str | list[str]) -> list[str] | str:

@lru_cache
def get_settings() -> Settings:
return Settings() # type: ignore[call-arg
return Settings() # type: ignore[call-arg]


settings = get_settings()
19 changes: 15 additions & 4 deletions app/services/duckdb_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,30 @@
from pathlib import Path
from typing import Any

import duckdb
from duckdb import DuckDBPyRelation

from app.config import settings


@dataclass
class DuckDBClient:
def __init__(self):
self.parquetpath: Path = Path(f"{settings.DUCKDB_FILENAME}.parquet")
path: Path | str = f"{settings.DUCKDB_FILENAME}"

def __post_init__(self):
if not self.parquetpath.exists():
raise FileNotFoundError(f"Parquet file not found: {self.parquetpath}")
if self.path.startswith("localhost"):
self.path = "http://" + self.path

if self.path.startswith(("http://", "https://")):
duckdb.sql("""
INSTALL httpfs;
LOAD httpfs;
""")
else:
self.path = Path(self.path)

if isinstance(self.path, Path) and not self.path.exists():
raise FileNotFoundError(f"Parquet file not found: {self.path}")

@staticmethod
def format_response(response: DuckDBPyRelation) -> list[dict[str, Any]]:
Expand Down
11 changes: 6 additions & 5 deletions app/services/health/duckdb_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,16 @@

def get_health_summary_from_duckdb() -> list[dict[str, Any]]:
response = duckdb.sql(
f"SELECT type, COUNT(*) AS count FROM read_parquet('{client.parquetpath}') GROUP BY ALL",
f"""SELECT type, COUNT(*) AS count FROM read_parquet('{client.path}')
GROUP BY type ORDER BY count DESC""",
)
return client.format_response(response)


def search_health_records_from_duckdb(
params: HealthRecordSearchParams,
) -> list[dict[str, Any]]:
query: str = f"SELECT * FROM read_parquet('{client.parquetpath}')"
query: str = f"SELECT * FROM read_parquet('{client.path}')"
query += fill_query(params)
response = duckdb.sql(query)
return client.format_response(response)
Expand All @@ -31,7 +32,7 @@ def get_statistics_by_type_from_duckdb(
result = duckdb.sql(f"""
SELECT type, COUNT(*) AS count, AVG(value) AS average,
SUM(value) AS sum, MIN(value) AS min, MAX(value) AS max
FROM read_parquet('{client.parquetpath}')
FROM read_parquet('{client.path}')
WHERE type = '{record_type}' GROUP BY type
""")
return client.format_response(result)
Expand All @@ -47,7 +48,7 @@ def get_trend_data_from_duckdb(
SELECT device, time_bucket(INTERVAL '1 {interval}', startDate) AS interval,
AVG(value) AS average, SUM(value) AS sum,
MIN(value) AS min, MAX(value) AS max, COUNT(*) AS count
FROM read_parquet('{client.parquetpath}')
FROM read_parquet('{client.path}')
WHERE type = '{record_type}'
{f"AND startDate >= '{date_from}'" if date_from else ""}
{f"AND startDate <= '{date_to}'" if date_to else ""}
Expand All @@ -63,7 +64,7 @@ def search_values_from_duckdb(
date_to: str | None = None,
) -> list[dict[str, Any]]:
result = duckdb.sql(f"""
SELECT * FROM read_parquet('{client.parquetpath}') WHERE textvalue = '{value}'
SELECT * FROM read_parquet('{client.path}') WHERE textvalue = '{value}'
{f"AND type = '{record_type}'" if record_type else ""}
{f"AND startDate >= '{date_from}'" if date_from else ""}
{f"AND startDate <= '{date_to}'" if date_to else ""}
Expand Down
2 changes: 1 addition & 1 deletion config/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ ES_HOST="localhost"
CH_DIRNAME="applehealth.chdb"
CH_DB_NAME="applehealth"
CH_TABLE_NAME="data"
DUCKDB_FILENAME="applehealth"
DUCKDB_FILENAME="applehealth.parquet"
CHUNK_SIZE="50000"
RAW_XML_PATH="raw.xml"
2 changes: 1 addition & 1 deletion scripts/duckdb_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def exportxml(self) -> None:
chunk_dfs.append(df)

combined_df = pl.concat(chunk_dfs)
combined_df.write_parquet(f"{self.parquetpath}", compression="zstd")
combined_df.write_parquet(f"{self.path}", compression="zstd")

for f in chunkfiles:
os.remove(f)
Expand Down
4 changes: 2 additions & 2 deletions scripts/xml_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

class XMLExporter:
def __init__(self):
self.path: Path = Path(settings.RAW_XML_PATH)
self.xmlpath: Path = Path(settings.RAW_XML_PATH)
self.chunk_size: int = settings.CHUNK_SIZE

DATE_FIELDS: tuple[str, ...] = ("startDate", "endDate", "creationDate")
Expand Down Expand Up @@ -62,7 +62,7 @@ def parse_xml(self) -> Generator[DataFrame, Any, None]:
"""
records: list[dict[str, Any]] = []

for event, elem in ET.iterparse(self.path, events=("start",)):
for event, elem in ET.iterparse(self.xmlpath, events=("start",)):
if elem.tag == "Record" and event == "start":
if len(records) >= self.chunk_size:
yield DataFrame(records).reindex(columns=self.COLUMN_NAMES)
Expand Down
32 changes: 32 additions & 0 deletions tests/fileserver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import argparse

import uvicorn
from fastapi import FastAPI
from fastapi.responses import FileResponse

app = FastAPI()


@app.get("/{filename}")
async def serve_file(filename: str) -> FileResponse:
return FileResponse(filename)


parser = argparse.ArgumentParser(
prog="Filesystem server",
description="Host local files in this directory on localhost",
)
parser.add_argument(
"-p",
"--port",
type=int,
help="Port on which to serve",
default=8080,
dest="port",
action="store",
)

if __name__ == "__main__":
args = parser.parse_args()
port = args.port
uvicorn.run(app, host="localhost", port=port)