Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
e049b53
agent connects with http mcp
czajkub Sep 24, 2025
a0a71a2
lint
czajkub Sep 24, 2025
2f34a70
import
czajkub Sep 24, 2025
cd54621
idek
czajkub Sep 24, 2025
f523311
afb
czajkub Sep 24, 2025
5b246ad
w/e
czajkub Sep 24, 2025
39d3e06
added workout support for duckdb
czajkub Sep 29, 2025
76d6b58
w/e
czajkub Sep 30, 2025
423a6d9
update imports to new scheme
czajkub Sep 30, 2025
5722ff3
lint
czajkub Sep 30, 2025
642b3e6
format response
czajkub Sep 30, 2025
e992103
DRY and adding workout type
czajkub Sep 30, 2025
cc45a2c
prettty
czajkub Oct 1, 2025
411599c
lint
czajkub Oct 3, 2025
e5d5b6f
Merge branch 'main' into import
czajkub Oct 3, 2025
fccc25a
ensure conditions only contain str
czajkub Oct 3, 2025
00c7468
Merge branch 'import' of https://github.com/czajkub/apple-health-mcp-…
czajkub Oct 3, 2025
89a0bbe
exclude docs from linter
czajkub Oct 3, 2025
627f7b5
changes to importer and query logic
czajkub Oct 6, 2025
f2d4aaa
improve docstrings and stats by type tools
czajkub Oct 6, 2025
4e78030
cleanup importer
czajkub Oct 7, 2025
8b3b473
add logging and rename duckdb services
czajkub Oct 7, 2025
9be2982
run linter
czajkub Oct 7, 2025
9496ea3
fixed imports
czajkub Oct 7, 2025
c97d646
further fix imports
czajkub Oct 7, 2025
23ff176
uncomment mcp routerg imports
czajkub Oct 7, 2025
5e4c83b
fix mcp import
czajkub Oct 7, 2025
caec8c5
config changes
czajkub Oct 7, 2025
efd6ede
fix importer
czajkub Oct 7, 2025
7feb6f5
lint
czajkub Oct 7, 2025
a70c762
correct import display
czajkub Oct 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,10 @@ repos:
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer

exclude: |
(?x)(
^tests/
^docs/
^README.md/
)
2 changes: 1 addition & 1 deletion app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class Settings(BaseSettings):
CH_DB_NAME: str = "applehealth"
CH_TABLE_NAME: str = "data"

DUCKDB_FILENAME: str = "applehealth.parquet"
DUCKDB_FILENAME: str = "applehealth.duckdb"

CHUNK_SIZE: int = 50_000

Expand Down
41 changes: 25 additions & 16 deletions app/mcp/v1/tools/duckdb_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from fastmcp import FastMCP

from app.schemas.record import HealthRecordSearchParams, IntervalType, RecordType
from app.schemas.record import HealthRecordSearchParams, IntervalType, RecordType, WorkoutType
from app.services.health.duckdb_queries import (
get_health_summary_from_duckdb,
get_statistics_by_type_from_duckdb,
Expand All @@ -11,7 +11,7 @@
search_values_from_duckdb,
)

duckdb_reader_router = FastMCP(name="CH Reader MCP")
duckdb_reader_router = FastMCP(name="DuckDB Reader MCP")


@duckdb_reader_router.tool
Expand All @@ -23,14 +23,16 @@ def get_health_summary_duckdb() -> list[dict[str, Any]]:

Notes for LLM:
- IMPORTANT - Do not guess, autofill, or assume any missing data.
- If there are multiple databases available (DuckDB, ClickHouse, Elasticsearch):
- Use this tool if you're not certain of the record type that
should be called
- If there are multiple databases available (DuckDB, Elasticsearch):
first, ask the user which one he wants to use. DO NOT call any tools before
the user specifies his intent.
- If the user decides on an option, only use tools from this database,
do not switch over to another until the user specifies that he wants
to use a different one. You do not have to keep asking whether
the user wants to use the same database that he used before.
- If there is only one database available (DuckDB, ClickHouse, Elasticsearch):
- If there is only one database available (DuckDB, Elasticsearch):
you can use the tools from this database without the user specifying it.
"""
try:
Expand All @@ -46,10 +48,11 @@ def search_health_records_duckdb(params: HealthRecordSearchParams) -> list[dict[

Parameters:
- params: HealthRecordSearchParams object containing all search/filter parameters.
(required parameters: record_type)

Notes for LLMs:
- This function should return a list of health record documents (dicts)
matching the search criteria.
matching the search criteria ordered by date from most to least recent.
- Each document in the list should represent a single health record as stored in ClickHouse.
- If an error occurs, the function should return a list with a single dict
containing an 'error' key and the error message.
Expand All @@ -58,14 +61,16 @@ def search_health_records_duckdb(params: HealthRecordSearchParams) -> list[dict[
- Example date_from/date_to: "2020-01-01T00:00:00+00:00"
- Example value_min/value_max: "10", "100.5"
- IMPORTANT - Do not guess, autofill, or assume any missing data.
- If there are multiple databases available (DuckDB, ClickHouse, Elasticsearch):
- This tool can be used to search for most recent records of a given type,
in which case you should use this tool with a limit of 1.
- If there are multiple databases available (DuckDB, Elasticsearch):
first, ask the user which one he wants to use. DO NOT call any tools before
the user specifies his intent.
- If the user decides on an option, only use tools from this database,
do not switch over to another until the user specifies that he wants
to use a different one. You do not have to keep asking whether
the user wants to use the same database that he used before.
- If there is only one database available (DuckDB, ClickHouse, Elasticsearch):
- If there is only one database available (DuckDB, Elasticsearch):
you can use the tools from this database without the user specifying it.
"""
try:
Expand All @@ -75,7 +80,9 @@ def search_health_records_duckdb(params: HealthRecordSearchParams) -> list[dict[


@duckdb_reader_router.tool
def get_statistics_by_type_duckdb(record_type: RecordType | str) -> list[dict[str, Any]]:
def get_statistics_by_type_duckdb(
record_type: RecordType | WorkoutType | str,
) -> list[dict[str, Any]]:
"""
Get comprehensive statistics for a specific health record type from DuckDB.

Expand Down Expand Up @@ -105,17 +112,19 @@ def get_statistics_by_type_duckdb(record_type: RecordType | str) -> list[dict[st
specific health metrics.
- The function is useful for health analysis, identifying outliers, and
understanding data quality.
- This tool can also be used to figure out the value of the record with
the shortest/longest duration or highest/lowest value
- date_range key for query is commented, since it contained hardcoded from
date, but you can use it anyway if you replace startDate with your data.
- IMPORTANT - Do not guess, autofill, or assume any missing data.
- If there are multiple databases available (DuckDB, ClickHouse, Elasticsearch):
- If there are multiple databases available (DuckDB, Elasticsearch):
first, ask the user which one he wants to use. DO NOT call any tools before
the user specifies his intent.
- If the user decides on an option, only use tools from this database,
do not switch over to another until the user specifies that he wants
to use a different one. You do not have to keep asking whether
the user wants to use the same database that he used before.
- If there is only one database available (DuckDB, ClickHouse, Elasticsearch):
- If there is only one database available (DuckDB, Elasticsearch):
you can use the tools from this database without the user specifying it.
"""
try:
Expand All @@ -126,7 +135,7 @@ def get_statistics_by_type_duckdb(record_type: RecordType | str) -> list[dict[st

@duckdb_reader_router.tool
def get_trend_data_duckdb(
record_type: RecordType | str,
record_type: RecordType | WorkoutType | str,
interval: IntervalType = "month",
date_from: str | None = None,
date_to: str | None = None,
Expand Down Expand Up @@ -166,14 +175,14 @@ def get_trend_data_duckdb(
- IMPORTANT - interval must be one of: "day", "week", "month", or "year".
Do not use other values.
- Do not guess, autofill, or assume any missing data.
- If there are multiple databases available (DuckDB, ClickHouse, Elasticsearch):
- If there are multiple databases available (DuckDB, Elasticsearch):
first, ask the user which one he wants to use. DO NOT call any tools before
the user specifies his intent.
- If the user decides on an option, only use tools from this database,
do not switch over to another until the user specifies that he wants
to use a different one. You do not have to keep asking whether
the user wants to use the same database that he used before.
- If there is only one database available (DuckDB, ClickHouse, Elasticsearch):
- If there is only one database available (DuckDB, Elasticsearch):
you can use the tools from this database without the user specifying it.
"""
try:
Expand All @@ -184,7 +193,7 @@ def get_trend_data_duckdb(

@duckdb_reader_router.tool
def search_values_duckdb(
record_type: RecordType | str | None,
record_type: RecordType | WorkoutType | str | None,
value: str,
date_from: str | None = None,
date_to: str | None = None,
Expand All @@ -204,14 +213,14 @@ def search_values_duckdb(
records with the value of "HKCategoryValueSleepAnalysisAsleepDeep"
- The function automatically handles date filtering if date_from/date_to are provided
- Do not guess, autofill, or assume any missing data.
- If there are multiple databases available (DuckDB, ClickHouse, Elasticsearch):
- If there are multiple databases available (DuckDB, Elasticsearch):
first, ask the user which one he wants to use. DO NOT call any tools before
the user specifies his intent.
- If the user decides on an option, only use tools from this database,
do not switch over to another until the user specifies that he wants
to use a different one. You do not have to keep asking whether
the user wants to use the same database that he used before.
- If there is only one database available (DuckDB, ClickHouse, Elasticsearch):
- If there is only one database available (DuckDB, Elasticsearch):
you can use the tools from this database without the user specifying it.
"""
try:
Expand Down
15 changes: 14 additions & 1 deletion app/schemas/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,27 @@
"HKQuantityTypeIdentifierEnvironmentalAudioExposure",
]

WorkoutType = Literal[
"HKWorkoutActivityTypeRunning",
"HKWorkoutActivityTypeWalking",
"HKWorkoutActivityTypeHiking",
"HKWorkoutActivityTypeTraditionalStrengthTraining",
"HKWorkoutActivityTypeCycling",
"HKWorkoutActivityTypeMixedMetabolicCardioTraining",
"HKWorkoutActivityTypeHighIntensityIntervalTraining",
"HKWorkoutActivityTypeHockey",
]

IntervalType = Literal["day", "week", "month", "year"]


class HealthRecordSearchParams(BaseModel):
record_type: RecordType | str | None = None
record_type: RecordType | WorkoutType | str | None = None
source_name: str | None = None
date_from: str | None = None
date_to: str | None = None
min_workout_duration: str | None = None
max_workout_duration: str | None = None
value_min: str | None = None
value_max: str | None = None
limit: int = 10
12 changes: 7 additions & 5 deletions app/services/duckdb_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ def __post_init__(self):
else:
self.path = Path(self.path)

if isinstance(self.path, Path) and not self.path.exists():
raise FileNotFoundError(f"Parquet file not found: {self.path}")

@staticmethod
def format_response(response: DuckDBPyRelation) -> list[dict[str, Any]]:
return response.df().to_dict(orient="records")
def format_response(
response: DuckDBPyRelation | list[DuckDBPyRelation],
) -> list[dict[str, Any]]:
if isinstance(response, DuckDBPyRelation):
return response.df().to_dict(orient="records")
records = [record.df().to_dict(orient="records") for record in response]
return sum(records, [])
Loading