From 8a7b91980bdd5921d46a45dbee091f1973099eda Mon Sep 17 00:00:00 2001
From: czajkub <czajkub@gmail.com>
Date: Fri, 12 Sep 2025 10:51:43 +0200
Subject: [PATCH 01/13] added sum to trend data

---
 app/services/health/clickhouse.py     | 3 ++-
 app/services/health/duckdb_queries.py | 7 ++++---
 app/services/health/elasticsearch.py  | 2 ++
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/app/services/health/clickhouse.py b/app/services/health/clickhouse.py
index d80d87b..b3803a1 100644
--- a/app/services/health/clickhouse.py
+++ b/app/services/health/clickhouse.py
@@ -33,7 +33,8 @@ def get_trend_data_from_ch(
 ) -> dict[str, Any]:
     return ch.inquire(f"""
         SELECT toStartOfInterval(startDate, INTERVAL 1 {interval}) AS interval,
-        AVG(value), MIN(value), MAX(value), COUNT(*) FROM {ch.db_name}.{ch.table_name}
+        AVG(value) AS average, SUM(value) AS sum, MIN(value) AS min,
+        MAX(value) AS max, COUNT(*) AS count FROM {ch.db_name}.{ch.table_name}
         WHERE type = '{record_type}'
         {f"AND startDate >= '{date_from}'" if date_from else ""}
         {f"AND startDate <= '{date_to}'" if date_to else ""}
diff --git a/app/services/health/duckdb_queries.py b/app/services/health/duckdb_queries.py
index 6b90fae..7047106 100644
--- a/app/services/health/duckdb_queries.py
+++ b/app/services/health/duckdb_queries.py
@@ -45,11 +45,12 @@ def get_trend_data_from_duckdb(
 ) -> list[dict[str, Any]]:
     result = duckdb.sql(f"""
         SELECT time_bucket(INTERVAL '1 {interval}', startDate) AS interval,
-            AVG(value) AS average, MIN(value) AS min, MAX(value) AS max, COUNT(*) AS count
+        AVG(value) AS average, SUM(value) AS sum,
+        MIN(value) AS min, MAX(value) AS max, COUNT(*) AS count
         FROM read_parquet('{client.parquetpath}')
         WHERE type = '{record_type}'
-            {f"AND startDate >= '{date_from}'" if date_from else ""}
-            {f"AND startDate <= '{date_to}'" if date_to else ""}
+        {f"AND startDate >= '{date_from}'" if date_from else ""}
+        {f"AND startDate <= '{date_to}'" if date_to else ""}
         GROUP BY interval ORDER BY interval ASC
     """)
     return client.format_response(result)
diff --git a/app/services/health/elasticsearch.py b/app/services/health/elasticsearch.py
index 9f0d76a..dca5fb1 100644
--- a/app/services/health/elasticsearch.py
+++ b/app/services/health/elasticsearch.py
@@ -106,6 +106,7 @@ def get_trend_data_logic(
                     "avg_value": {"avg": {"field": "value"}},
                     "min_value": {"min": {"field": "value"}},
                     "max_value": {"max": {"field": "value"}},
+                    "value_sum": {"sum": {"field": "value"}},
                     "count": {"value_count": {"field": "value"}},
                 },
             },
@@ -121,6 +122,7 @@ def get_trend_data_logic(
                 "avg_value": bucket["avg_value"]["value"],
                 "min_value": bucket["min_value"]["value"],
                 "max_value": bucket["max_value"]["value"],
+                "value_sum": bucket["value_sum"]["value"],
                 "count": bucket["count"]["value"],
             },
         )

From 7795bf7008dc7a11d4ee2e7b0160c6a7293cfe9e Mon Sep 17 00:00:00 2001
From: czajkub <czajkub@gmail.com>
Date: Fri, 12 Sep 2025 11:16:14 +0200
Subject: [PATCH 02/13] added device grouping to duckdb for test

---
 app/services/health/duckdb_queries.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/services/health/duckdb_queries.py b/app/services/health/duckdb_queries.py
index 7047106..9714628 100644
--- a/app/services/health/duckdb_queries.py
+++ b/app/services/health/duckdb_queries.py
@@ -51,7 +51,7 @@ def get_trend_data_from_duckdb(
         WHERE type = '{record_type}'
         {f"AND startDate >= '{date_from}'" if date_from else ""}
         {f"AND startDate <= '{date_to}'" if date_to else ""}
-        GROUP BY interval ORDER BY interval ASC
+        GROUP BY interval, device ORDER BY interval ASC
     """)
     return client.format_response(result)
 

From a70831cad29a2ef0b1341b5521502b10bda60ebb Mon Sep 17 00:00:00 2001
From: czajkub <czajkub@gmail.com>
Date: Fri, 12 Sep 2025 11:23:50 +0200
Subject: [PATCH 03/13] added device as well to query

---
 app/services/health/clickhouse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/services/health/clickhouse.py b/app/services/health/clickhouse.py
index b3803a1..e7b3497 100644
--- a/app/services/health/clickhouse.py
+++ b/app/services/health/clickhouse.py
@@ -32,7 +32,7 @@ def get_trend_data_from_ch(
     date_to: str | None = None,
 ) -> dict[str, Any]:
     return ch.inquire(f"""
-        SELECT toStartOfInterval(startDate, INTERVAL 1 {interval}) AS interval,
+        SELECT device, toStartOfInterval(startDate, INTERVAL 1 {interval}) AS interval,
         AVG(value) AS average, SUM(value) AS sum, MIN(value) AS min,
         MAX(value) AS max, COUNT(*) AS count FROM {ch.db_name}.{ch.table_name}
         WHERE type = '{record_type}'

From 74c5428cc181f3a077904f80dc7472619a8b95df Mon Sep 17 00:00:00 2001
From: czajkub <czajkub@gmail.com>
Date: Fri, 12 Sep 2025 11:33:09 +0200
Subject: [PATCH 04/13] ch and duck device/interval grouping

---
 app/services/health/clickhouse.py     | 5 ++++-
 app/services/health/duckdb_queries.py | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/app/services/health/clickhouse.py b/app/services/health/clickhouse.py
index e7b3497..7cf09f5 100644
--- a/app/services/health/clickhouse.py
+++ b/app/services/health/clickhouse.py
@@ -38,7 +38,7 @@ def get_trend_data_from_ch(
         WHERE type = '{record_type}'
         {f"AND startDate >= '{date_from}'" if date_from else ""}
         {f"AND startDate <= '{date_to}'" if date_to else ""}
-        GROUP BY interval ORDER BY interval ASC
+        GROUP BY interval, device ORDER BY interval ASC
     """)
 
 
@@ -54,3 +54,6 @@ def search_values_from_ch(
         {f"AND startDate >= '{date_from}'" if date_from else ""}
         {f"AND startDate <= '{date_to}'" if date_to else ""}
     """)
+
+if __name__ == "__main__":
+    print(get_trend_data_from_ch("HKQuantityTypeIdentifierStepCount", "week", "2023-03-01", "2023-04-01"))
\ No newline at end of file
diff --git a/app/services/health/duckdb_queries.py b/app/services/health/duckdb_queries.py
index 9714628..d4b0156 100644
--- a/app/services/health/duckdb_queries.py
+++ b/app/services/health/duckdb_queries.py
@@ -44,7 +44,7 @@ def get_trend_data_from_duckdb(
     date_to: str | None = None,
 ) -> list[dict[str, Any]]:
     result = duckdb.sql(f"""
-        SELECT time_bucket(INTERVAL '1 {interval}', startDate) AS interval,
+        SELECT device, time_bucket(INTERVAL '1 {interval}', startDate) AS interval,
         AVG(value) AS average, SUM(value) AS sum,
         MIN(value) AS min, MAX(value) AS max, COUNT(*) AS count
         FROM read_parquet('{client.parquetpath}')
@@ -69,3 +69,6 @@ def search_values_from_duckdb(
         {f"AND startDate <= '{date_to}'" if date_to else ""}
     """)
     return client.format_response(result)
+
+if __name__ == "__main__":
+    print(get_trend_data_from_duckdb("HKQuantityTypeIdentifierStepCount", "week", "2023-03-01", "2023-04-01"))
\ No newline at end of file

From 954654c2adca352fff2b3526db59b7164b43dfe5 Mon Sep 17 00:00:00 2001
From: czajkub <czajkub@gmail.com>
Date: Fri, 12 Sep 2025 11:53:32 +0200
Subject: [PATCH 05/13] docstring tweak

---
 app/mcp/v1/tools/ch_reader.py     | 5 +++++
 app/mcp/v1/tools/duckdb_reader.py | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/app/mcp/v1/tools/ch_reader.py b/app/mcp/v1/tools/ch_reader.py
index 6ccf2de..09788a8 100644
--- a/app/mcp/v1/tools/ch_reader.py
+++ b/app/mcp/v1/tools/ch_reader.py
@@ -142,9 +142,11 @@ def get_trend_data_ch(
 
     Returns:
     - record_type: The analyzed record type
+    - device: The device on which the data was recorded
     - interval: The time interval used
     - trend_data: List of time buckets with statistics for each period:
       * date: The time period (ISO string)
+      * value_sum: Sum of values for the period
       * avg_value: Average value for the period
       * min_value: Minimum value for the period
       * max_value: Maximum value for the period
@@ -152,6 +154,9 @@ def get_trend_data_ch(
 
     Notes for LLMs:
     - Use this to analyze trends, patterns, and seasonal variations in health data
+    - Keep in mind that when there is data from multiple devices spanning the same
+      time period, there is a possibility of data being duplicated. Inform the user
+      of this possibility if you see multiple devices in the same time period.
     - The function automatically handles date filtering if date_from/date_to are provided
     - IMPORTANT - interval must be one of: "day", "week", "month", or "year".
       Do not use other values.
diff --git a/app/mcp/v1/tools/duckdb_reader.py b/app/mcp/v1/tools/duckdb_reader.py
index f142ea9..e34bb5b 100644
--- a/app/mcp/v1/tools/duckdb_reader.py
+++ b/app/mcp/v1/tools/duckdb_reader.py
@@ -142,9 +142,11 @@ def get_trend_data_duckdb(
 
     Returns:
     - record_type: The analyzed record type
+    - device: The device on which the data was recorded
     - interval: The time interval used
     - trend_data: List of time buckets with statistics for each period:
       * date: The time period (ISO string)
+      * value_sum: Sum of values for the period
       * avg_value: Average value for the period
       * min_value: Minimum value for the period
       * max_value: Maximum value for the period
@@ -152,6 +154,9 @@ def get_trend_data_duckdb(
 
     Notes for LLMs:
     - Use this to analyze trends, patterns, and seasonal variations in health data
+    - Keep in mind that when there is data from multiple devices spanning the same
+      time period, there is a possibility of data being duplicated. Inform the user
+      of this possibility if you see multiple devices in the same time period.
     - The function automatically handles date filtering if date_from/date_to are provided
     - IMPORTANT - interval must be one of: "day", "week", "month", or "year".
       Do not use other values.

From 1afd7fea514020d2aea9bc9cf80572e5a512414d Mon Sep 17 00:00:00 2001
From: czajkub <czajkub@gmail.com>
Date: Fri, 12 Sep 2025 14:58:55 +0200
Subject: [PATCH 06/13] docstring improving

---
 app/mcp/v1/tools/ch_reader.py     | 2 ++
 app/mcp/v1/tools/duckdb_reader.py | 2 ++
 app/mcp/v1/tools/es_reader.py     | 7 +++++++
 3 files changed, 11 insertions(+)

diff --git a/app/mcp/v1/tools/ch_reader.py b/app/mcp/v1/tools/ch_reader.py
index 09788a8..71b6a3d 100644
--- a/app/mcp/v1/tools/ch_reader.py
+++ b/app/mcp/v1/tools/ch_reader.py
@@ -157,6 +157,8 @@ def get_trend_data_ch(
     - Keep in mind that when there is data from multiple devices spanning the same
       time period, there is a possibility of data being duplicated. Inform the user
       of this possibility if you see multiple devices in the same time period.
+    - If a user asks you to sum up some values from their health records, DO NOT
+      search for records and write a script to sum them, instead, use this tool.
     - The function automatically handles date filtering if date_from/date_to are provided
     - IMPORTANT - interval must be one of: "day", "week", "month", or "year".
       Do not use other values.
diff --git a/app/mcp/v1/tools/duckdb_reader.py b/app/mcp/v1/tools/duckdb_reader.py
index e34bb5b..7e367c3 100644
--- a/app/mcp/v1/tools/duckdb_reader.py
+++ b/app/mcp/v1/tools/duckdb_reader.py
@@ -157,6 +157,8 @@ def get_trend_data_duckdb(
     - Keep in mind that when there is data from multiple devices spanning the same
       time period, there is a possibility of data being duplicated. Inform the user
       of this possibility if you see multiple devices in the same time period.
+    - If a user asks you to sum up some values from their health records, DO NOT
+      search for records and write a script to sum them, instead, use this tool.
     - The function automatically handles date filtering if date_from/date_to are provided
     - IMPORTANT - interval must be one of: "day", "week", "month", or "year".
       Do not use other values.
diff --git a/app/mcp/v1/tools/es_reader.py b/app/mcp/v1/tools/es_reader.py
index 2281819..a092bd2 100644
--- a/app/mcp/v1/tools/es_reader.py
+++ b/app/mcp/v1/tools/es_reader.py
@@ -142,9 +142,11 @@ def get_trend_data_es(
 
     Returns:
     - record_type: The analyzed record type
+    - device: The device on which the data was recorded
     - interval: The time interval used
     - trend_data: List of time buckets with statistics for each period:
       * date: The time period (ISO string)
+      * value_sum: Sum of values for the period
       * avg_value: Average value for the period
       * min_value: Minimum value for the period
       * max_value: Maximum value for the period
@@ -152,6 +154,11 @@ def get_trend_data_es(
 
     Notes for LLMs:
     - Use this to analyze trends, patterns, and seasonal variations in health data
+    - Keep in mind that when there is data from multiple devices spanning the same
+      time period, there is a possibility of data being duplicated. Inform the user
+      of this possibility if you see multiple devices in the same time period.
+    - If a user asks you to sum up some values from their health records, DO NOT
+      search for records and write a script to sum them, instead, use this tool.
     - The function automatically handles date filtering if date_from/date_to are provided
     - IMPORTANT - interval must be one of: "day", "week", "month", or "year".
       Do not use other values.

From 0e42919cb32879e3b53999896fb2d1bd0135b4c0 Mon Sep 17 00:00:00 2001
From: czajkub <czajkub@gmail.com>
Date: Fri, 12 Sep 2025 15:34:01 +0200
Subject: [PATCH 07/13] remove debug code

---
 app/services/health/clickhouse.py     | 3 ---
 app/services/health/duckdb_queries.py | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/app/services/health/clickhouse.py b/app/services/health/clickhouse.py
index 7cf09f5..0a5bcd5 100644
--- a/app/services/health/clickhouse.py
+++ b/app/services/health/clickhouse.py
@@ -54,6 +54,3 @@ def search_values_from_ch(
         {f"AND startDate >= '{date_from}'" if date_from else ""}
         {f"AND startDate <= '{date_to}'" if date_to else ""}
     """)
-
-if __name__ == "__main__":
-    print(get_trend_data_from_ch("HKQuantityTypeIdentifierStepCount", "week", "2023-03-01", "2023-04-01"))
\ No newline at end of file
diff --git a/app/services/health/duckdb_queries.py b/app/services/health/duckdb_queries.py
index d4b0156..72b8bcf 100644
--- a/app/services/health/duckdb_queries.py
+++ b/app/services/health/duckdb_queries.py
@@ -69,6 +69,3 @@ def search_values_from_duckdb(
         {f"AND startDate <= '{date_to}'" if date_to else ""}
     """)
     return client.format_response(result)
-
-if __name__ == "__main__":
-    print(get_trend_data_from_duckdb("HKQuantityTypeIdentifierStepCount", "week", "2023-03-01", "2023-04-01"))
\ No newline at end of file

From c3cbcb6e1641b23838608142086926d73b6c9c88 Mon Sep 17 00:00:00 2001
From: czajkub <czajkub@gmail.com>
Date: Mon, 15 Sep 2025 09:26:55 +0200
Subject: [PATCH 08/13] standardise errors and change trend docstrings

---
 app/mcp/v1/tools/ch_reader.py     |  9 ++++++---
 app/mcp/v1/tools/duckdb_reader.py | 11 +++++++----
 app/mcp/v1/tools/es_reader.py     |  9 ++++++---
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/app/mcp/v1/tools/ch_reader.py b/app/mcp/v1/tools/ch_reader.py
index 71b6a3d..9043fc7 100644
--- a/app/mcp/v1/tools/ch_reader.py
+++ b/app/mcp/v1/tools/ch_reader.py
@@ -36,7 +36,7 @@ def get_health_summary_ch() -> dict[str, Any]:
     try:
         return get_health_summary_from_ch()
     except Exception as e:
-        return {"error": str(e)}
+        return {"error": f"Failed to get health summary: {str(e)}"}
 
 
 @ch_reader_router.tool
@@ -71,7 +71,7 @@ def search_health_records_ch(params: HealthRecordSearchParams) -> dict[str, Any]
     try:
         return search_health_records_from_ch(params)
     except Exception as e:
-        return {"error": str(e)}
+        return {"error": f"Failed to search health records: {str(e)}"}
 
 
 @ch_reader_router.tool
@@ -158,7 +158,10 @@ def get_trend_data_ch(
       time period, there is a possibility of data being duplicated. Inform the user
       of this possibility if you see multiple devices in the same time period.
     - If a user asks you to sum up some values from their health records, DO NOT
-      search for records and write a script to sum them, instead, use this tool.
+      search for records and write a script to sum them, instead, use this tool:
+      if they ask to sum data from a year, use this tool with date_from set as the
+      beginning of the year and date_to as the end of the year, with an interval
+      of 'year'
     - The function automatically handles date filtering if date_from/date_to are provided
     - IMPORTANT - interval must be one of: "day", "week", "month", or "year".
       Do not use other values.
diff --git a/app/mcp/v1/tools/duckdb_reader.py b/app/mcp/v1/tools/duckdb_reader.py
index 7e367c3..b064cd1 100644
--- a/app/mcp/v1/tools/duckdb_reader.py
+++ b/app/mcp/v1/tools/duckdb_reader.py
@@ -36,7 +36,7 @@ def get_health_summary_duckdb() -> list[dict[str, Any]]:
     try:
         return get_health_summary_from_duckdb()
     except Exception as e:
-        return [{"error": str(e)}]
+        return [{"error": f"Failed to get health summary: {str(e)}"}]
 
 
 @duckdb_reader_router.tool
@@ -71,7 +71,7 @@ def search_health_records_duckdb(params: HealthRecordSearchParams) -> list[dict[
     try:
         return search_health_records_from_duckdb(params)
     except Exception as e:
-        return [{"error": str(e)}]
+        return [{"error": f"Failed to search health records: {str(e)}"}]
 
 
 @duckdb_reader_router.tool
@@ -158,7 +158,10 @@ def get_trend_data_duckdb(
       time period, there is a possibility of data being duplicated. Inform the user
       of this possibility if you see multiple devices in the same time period.
     - If a user asks you to sum up some values from their health records, DO NOT
-      search for records and write a script to sum them, instead, use this tool.
+      search for records and write a script to sum them, instead, use this tool:
+      if they ask to sum data from a year, use this tool with date_from set as the
+      beginning of the year and date_to as the end of the year, with an interval
+      of 'year'
     - The function automatically handles date filtering if date_from/date_to are provided
     - IMPORTANT - interval must be one of: "day", "week", "month", or "year".
       Do not use other values.
@@ -214,4 +217,4 @@ def search_values_duckdb(
     try:
         return search_values_from_duckdb(record_type, value, date_from, date_to)
     except Exception as e:
-        return [{"error": f"Failed to get trend data: {str(e)}"}]
+        return [{"error": f"Failed to search for values: {str(e)}"}]
diff --git a/app/mcp/v1/tools/es_reader.py b/app/mcp/v1/tools/es_reader.py
index a092bd2..6cbcccd 100644
--- a/app/mcp/v1/tools/es_reader.py
+++ b/app/mcp/v1/tools/es_reader.py
@@ -36,7 +36,7 @@ def get_health_summary_es() -> dict[str, Any]:
     try:
         return get_health_summary_from_es()
     except Exception as e:
-        return {"error": f"Failed to get health summary from ES: {str(e)}"}
+        return {"error": f"Failed to get health summary: {str(e)}"}
 
 
 @es_reader_router.tool
@@ -158,7 +158,10 @@ def get_trend_data_es(
       time period, there is a possibility of data being duplicated. Inform the user
       of this possibility if you see multiple devices in the same time period.
     - If a user asks you to sum up some values from their health records, DO NOT
-      search for records and write a script to sum them, instead, use this tool.
+      search for records and write a script to sum them, instead, use this tool:
+      if they ask to sum data from a year, use this tool with date_from set as the
+      beginning of the year and date_to as the end of the year, with an interval
+      of 'year'
     - The function automatically handles date filtering if date_from/date_to are provided
     - IMPORTANT - interval must be one of: "day", "week", "month", or "year".
       Do not use other values.
@@ -214,4 +217,4 @@ def search_values_es(
     try:
         return search_values_logic(record_type, value, date_from, date_to)
     except Exception as e:
-        return [{"error": f"Failed to get trend data: {str(e)}"}]
+        return [{"error": f"Failed to search for values: {str(e)}"}]

From 18e16d0c0337bccb129b9a3bc9c5ce9ff9cb1196 Mon Sep 17 00:00:00 2001
From: czajkub <czajkub@gmail.com>
Date: Wed, 17 Sep 2025 10:13:13 +0200
Subject: [PATCH 09/13] add localhost support for parquet

also change parquetpath to path and add .parquet suffix to the path in config
---
 app/config.py                         |  4 ++--
 app/services/duckdb_client.py         | 20 ++++++++++++++++----
 app/services/health/duckdb_queries.py | 11 ++++++-----
 config/.env.example                   |  2 +-
 scripts/duckdb_importer.py            |  2 +-
 scripts/xml_exporter.py               |  4 ++--
 6 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/app/config.py b/app/config.py
index b2270dc..a2fe6c3 100644
--- a/app/config.py
+++ b/app/config.py
@@ -30,7 +30,7 @@ class Settings(BaseSettings):
     CH_DB_NAME: str = "applehealth"
     CH_TABLE_NAME: str = "data"
 
-    DUCKDB_FILENAME: str = "applehealth"
+    DUCKDB_FILENAME: str = "applehealth.parquet"
 
     CHUNK_SIZE: int = 50_000
 
@@ -54,7 +54,7 @@ def assemble_cors_origins(cls, v: str | list[str]) -> list[str] | str:
 
 @lru_cache
 def get_settings() -> Settings:
-    return Settings()  # type: ignore[call-arg
+    return Settings()  # type: ignore[call-arg]
 
 
 settings = get_settings()
diff --git a/app/services/duckdb_client.py b/app/services/duckdb_client.py
index c2e7659..c83c8ff 100644
--- a/app/services/duckdb_client.py
+++ b/app/services/duckdb_client.py
@@ -2,6 +2,7 @@
 from pathlib import Path
 from typing import Any
 
+import duckdb
 from duckdb import DuckDBPyRelation
 
 from app.config import settings
@@ -9,12 +10,23 @@
 
 @dataclass
 class DuckDBClient:
-    def __init__(self):
-        self.parquetpath: Path = Path(f"{settings.DUCKDB_FILENAME}.parquet")
+    path: Path | str = f"{settings.DUCKDB_FILENAME}"
 
     def __post_init__(self):
-        if not self.parquetpath.exists():
-            raise FileNotFoundError(f"Parquet file not found: {self.parquetpath}")
+        print("__post_init__")
+        if self.path.startswith("localhost"):
+            self.path = "http://" + self.path
+
+        if self.path.startswith(("http://", "https://")):
+            duckdb.sql("""
+                    INSTALL httpfs;
+                    LOAD httpfs;
+                """)
+        else:
+            self.path = Path(self.path)
+
+        if isinstance(self.path, Path) and not self.path.exists():
+            raise FileNotFoundError(f"Parquet file not found: {self.path}")
 
     @staticmethod
     def format_response(response: DuckDBPyRelation) -> list[dict[str, Any]]:
diff --git a/app/services/health/duckdb_queries.py b/app/services/health/duckdb_queries.py
index 72b8bcf..78686c9 100644
--- a/app/services/health/duckdb_queries.py
+++ b/app/services/health/duckdb_queries.py
@@ -11,7 +11,8 @@
 
 def get_health_summary_from_duckdb() -> list[dict[str, Any]]:
     response = duckdb.sql(
-        f"SELECT type, COUNT(*) AS count FROM read_parquet('{client.parquetpath}') GROUP BY ALL",
+        f"""SELECT type, COUNT(*) AS count FROM read_parquet('{client.path}')"
+         GROUP BY type ORDER BY count DESC""",
     )
     return client.format_response(response)
 
@@ -19,7 +20,7 @@ def get_health_summary_from_duckdb() -> list[dict[str, Any]]:
 def search_health_records_from_duckdb(
     params: HealthRecordSearchParams,
 ) -> list[dict[str, Any]]:
-    query: str = f"SELECT * FROM read_parquet('{client.parquetpath}')"
+    query: str = f"SELECT * FROM read_parquet('{client.path}')"
     query += fill_query(params)
     response = duckdb.sql(query)
     return client.format_response(response)
@@ -31,7 +32,7 @@ def get_statistics_by_type_from_duckdb(
     result = duckdb.sql(f"""
                     SELECT type, COUNT(*) AS count, AVG(value) AS average,
                     SUM(value) AS sum, MIN(value) AS min, MAX(value) AS max
-                    FROM read_parquet('{client.parquetpath}')
+                    FROM read_parquet('{client.path}')
                     WHERE type = '{record_type}' GROUP BY type
                     """)
     return client.format_response(result)
@@ -47,7 +48,7 @@ def get_trend_data_from_duckdb(
         SELECT device, time_bucket(INTERVAL '1 {interval}', startDate) AS interval,
         AVG(value) AS average, SUM(value) AS sum,
         MIN(value) AS min, MAX(value) AS max, COUNT(*) AS count
-        FROM read_parquet('{client.parquetpath}')
+        FROM read_parquet('{client.path}')
         WHERE type = '{record_type}'
         {f"AND startDate >= '{date_from}'" if date_from else ""}
         {f"AND startDate <= '{date_to}'" if date_to else ""}
@@ -63,7 +64,7 @@ def search_values_from_duckdb(
     date_to: str | None = None,
 ) -> list[dict[str, Any]]:
     result = duckdb.sql(f"""
-        SELECT * FROM read_parquet('{client.parquetpath}') WHERE textvalue = '{value}'
+        SELECT * FROM read_parquet('{client.path}') WHERE textvalue = '{value}'
         {f"AND type = '{record_type}'" if record_type else ""}
         {f"AND startDate >= '{date_from}'" if date_from else ""}
         {f"AND startDate <= '{date_to}'" if date_to else ""}
diff --git a/config/.env.example b/config/.env.example
index 7f9d2a5..035f920 100644
--- a/config/.env.example
+++ b/config/.env.example
@@ -4,6 +4,6 @@ ES_HOST="localhost"
 CH_DIRNAME="applehealth.chdb"
 CH_DB_NAME="applehealth"
 CH_TABLE_NAME="data"
-DUCKDB_FILENAME="applehealth"
+DUCKDB_FILENAME="applehealth.parquet"
 CHUNK_SIZE="50000"
 RAW_XML_PATH="raw.xml"
diff --git a/scripts/duckdb_importer.py b/scripts/duckdb_importer.py
index b39929c..138fabb 100644
--- a/scripts/duckdb_importer.py
+++ b/scripts/duckdb_importer.py
@@ -35,7 +35,7 @@ def exportxml(self) -> None:
             chunk_dfs.append(df)
 
         combined_df = pl.concat(chunk_dfs)
-        combined_df.write_parquet(f"{self.parquetpath}", compression="zstd")
+        combined_df.write_parquet(f"{self.path}", compression="zstd")
 
         for f in chunkfiles:
             os.remove(f)
diff --git a/scripts/xml_exporter.py b/scripts/xml_exporter.py
index 5fffcf1..b7d0c64 100644
--- a/scripts/xml_exporter.py
+++ b/scripts/xml_exporter.py
@@ -10,7 +10,7 @@
 
 class XMLExporter:
     def __init__(self):
-        self.path: Path = Path(settings.RAW_XML_PATH)
+        self.xmlpath: Path = Path(settings.RAW_XML_PATH)
         self.chunk_size: int = settings.CHUNK_SIZE
 
     DATE_FIELDS: tuple[str, ...] = ("startDate", "endDate", "creationDate")
@@ -62,7 +62,7 @@ def parse_xml(self) -> Generator[DataFrame, Any, None]:
         """
         records: list[dict[str, Any]] = []
 
-        for event, elem in ET.iterparse(self.path, events=("start",)):
+        for event, elem in ET.iterparse(self.xmlpath, events=("start",)):
             if elem.tag == "Record" and event == "start":
                 if len(records) >= self.chunk_size:
                     yield DataFrame(records).reindex(columns=self.COLUMN_NAMES)

From fab22eb91507081e5a1c39009a507425bfc7c753 Mon Sep 17 00:00:00 2001
From: czajkub <czajkub@gmail.com>
Date: Wed, 17 Sep 2025 11:44:23 +0200
Subject: [PATCH 10/13] remove debug from client

---
 app/services/duckdb_client.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/app/services/duckdb_client.py b/app/services/duckdb_client.py
index c83c8ff..5ebb4be 100644
--- a/app/services/duckdb_client.py
+++ b/app/services/duckdb_client.py
@@ -13,7 +13,6 @@ class DuckDBClient:
     path: Path | str = f"{settings.DUCKDB_FILENAME}"
 
     def __post_init__(self):
-        print("__post_init__")
         if self.path.startswith("localhost"):
             self.path = "http://" + self.path
 

From b619143de480fd6b36ae8e4151cf530b8186e47c Mon Sep 17 00:00:00 2001
From: czajkub <czajkub@gmail.com>
Date: Wed, 17 Sep 2025 13:33:25 +0200
Subject: [PATCH 11/13] unterminated string

---
 app/services/health/duckdb_queries.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/app/services/health/duckdb_queries.py b/app/services/health/duckdb_queries.py
index 78686c9..52fe471 100644
--- a/app/services/health/duckdb_queries.py
+++ b/app/services/health/duckdb_queries.py
@@ -11,7 +11,7 @@
 
 def get_health_summary_from_duckdb() -> list[dict[str, Any]]:
     response = duckdb.sql(
-        f"""SELECT type, COUNT(*) AS count FROM read_parquet('{client.path}')"
+        f"""SELECT type, COUNT(*) AS count FROM read_parquet('{client.path}')
          GROUP BY type ORDER BY count DESC""",
     )
     return client.format_response(response)
@@ -70,3 +70,6 @@ def search_values_from_duckdb(
         {f"AND startDate <= '{date_to}'" if date_to else ""}
     """)
     return client.format_response(result)
+
+if __name__=="__main__":
+    print(get_health_summary_from_duckdb())
\ No newline at end of file

From 1c1678f2d35f8e9354012e87cc54fc7ecce89a03 Mon Sep 17 00:00:00 2001
From: czajkub <czajkub@gmail.com>
Date: Wed, 17 Sep 2025 14:31:00 +0200
Subject: [PATCH 12/13] remove debug and add fileserver example

---
 app/services/health/duckdb_queries.py |  3 ---
 tests/fileserver.py                   | 32 +++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 tests/fileserver.py

diff --git a/app/services/health/duckdb_queries.py b/app/services/health/duckdb_queries.py
index 52fe471..4cd9937 100644
--- a/app/services/health/duckdb_queries.py
+++ b/app/services/health/duckdb_queries.py
@@ -70,6 +70,3 @@ def search_values_from_duckdb(
         {f"AND startDate <= '{date_to}'" if date_to else ""}
     """)
     return client.format_response(result)
-
-if __name__=="__main__":
-    print(get_health_summary_from_duckdb())
\ No newline at end of file
diff --git a/tests/fileserver.py b/tests/fileserver.py
new file mode 100644
index 0000000..462d82d
--- /dev/null
+++ b/tests/fileserver.py
@@ -0,0 +1,32 @@
+import argparse
+
+import uvicorn
+from fastapi import FastAPI
+from fastapi.responses import FileResponse
+
+app = FastAPI()
+
+
+@app.get("/{filename}")
+async def serve_file(filename: str) -> FileResponse:
+    return FileResponse(filename)
+
+
+parser = argparse.ArgumentParser(
+    prog="Filesystem server",
+    description="Host local files in this directory on localhost",
+)
+parser.add_argument(
+    "-p",
+    "--port",
+    type=int,
+    help="Port on which to serve",
+    default=8080,
+    dest="port",
+    action="store",
+)
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    port = args.port
+    uvicorn.run(app, host="localhost", port=port)

From bd0bb509a4f6854613c07efb909a5b76730921f1 Mon Sep 17 00:00:00 2001
From: Jakub Czajka <czajkub@gmail.com>
Date: Wed, 17 Sep 2025 14:37:53 +0200
Subject: [PATCH 13/13] Update README.md

---
 README.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/README.md b/README.md
index 6db3c71..0e8064d 100644
--- a/README.md
+++ b/README.md
@@ -105,6 +105,10 @@ Follow these steps to set up Apple Health MCP Server in your environment.
 
 4. Lastly, if you're going to be using DuckDB:
    - Run `make duckdb` to create a parquet file with your exported XML data
+   - If you want to connect to the file through http(s):
+     - The only thing you need to do is change the .env path, e.g. `localhost:8080/applehealth.parquet`
+     - If you want an example on how to host the files locally, run `uv run tests/fileserver.py` 
+   
 
 ### Configuration Files
 
@@ -235,6 +239,7 @@ The Apple Health MCP Server provides a suite of tools for exploring, searching,
 | `search_health_records_es`  | Flexible search for health records in Elasticsearch with advanced filtering and query options.        |
 | `get_statistics_by_type_es` | Get comprehensive statistics (count, min, max, avg, sum) for a specific health record type.          |
 | `get_trend_data_es`         | Analyze trends for a health record type over time (daily, weekly, monthly, yearly aggregations).     |
+| `search_values_es`          | Search for records with exactly matching values (including text).     |
 
 ### ClickHouse Tools (`ch_reader`)
 
@@ -244,6 +249,7 @@ The Apple Health MCP Server provides a suite of tools for exploring, searching,
 | `search_health_records_ch`  | Flexible search for health records in ClickHouse with advanced filtering and query options.        |
 | `get_statistics_by_type_ch` | Get comprehensive statistics (count, min, max, avg, sum) for a specific health record type.          |
 | `get_trend_data_ch`         | Analyze trends for a health record type over time (daily, weekly, monthly, yearly aggregations).     |
+| `search_values_ch`          | Search for records with exactly matching values (including text).     |
 
 ### DuckDB Tools (`duckdb_reader`)
 
@@ -253,6 +259,7 @@ The Apple Health MCP Server provides a suite of tools for exploring, searching,
 | `search_health_records_duckdb`  | Flexible search for health records in DuckDB with advanced filtering and query options.        |
 | `get_statistics_by_type_duckdb` | Get comprehensive statistics (count, min, max, avg, sum) for a specific health record type.          |
 | `get_trend_data_duckdb`         | Analyze trends for a health record type over time (daily, weekly, monthly, yearly aggregations).     |
+| `search_values_duckdb`          | Search for records with exactly matching values (including text).     |
 
 All tools are accessible via MCP-compatible clients and can be used with natural language or programmatic queries to explore and analyze your Apple Health data.