the-momentum · czajkub · Sep 12, 2025 · Sep 12, 2025 · Sep 12, 2025 · Sep 12, 2025
diff --git a/.github/workflows/mcp-composite-action/action.yml b/.github/workflows/mcp-composite-action/action.yml
@@ -0,0 +1,34 @@
+name: 'MCP server setup'
+description: 'Sync uv dependencies and run MCP server'
+
+
+inputs:
+  DUCKDB_FILENAME:
+    description: 'path to duckdb file'
+    required: false
+    default: 'tests/duckdb.example'
+
+runs:
+  using: "composite"
+  steps:
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.13'
+    - name: Install uv
+      run: curl -LsSf https://astral.sh/uv/install.sh | sh
+      shell: bash
+    - name: Install dependencies
+      run: uv sync --group dev
+      shell: bash
+    - name: Run fileserver
+      run: uv run --directory tests/ fileserver.py &
+      shell: bash
+    - name: Run mcp server
+      run: uv run fastmcp run -t http app/main.py &
+      env:
+        DUCKDB_FILENAME: ${{ inputs.DUCKDB_FILENAME }}
+      shell: bash
+    - name: Wait for mcp initialization
+      run: sleep 5
+      shell: bash
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,41 @@
+name: tests
+
+on: [push]
+
+env:
+  DUCKDB_FILENAME: tests/duckdb.example
+  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  OPIK_API_KEY: ${{ secrets.OPIK_API_KEY }}
+  OPIK_WORKSPACE: ${{ secrets.OPIK_WORKSPACE }}
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/workflows/mcp-composite-action
+        with:
+          DUCKDB_FILENAME: 'tests/duckdb.example'
+      - name: Run tests
+        run: uv run --directory tests/ pytest query_tests.py
+  inspector:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+      - uses: ./.github/workflows/mcp-composite-action
+        with:
+          DUCKDB_FILENAME: 'tests/duckdb.example'
+      - name: Run inspector
+        run: npx @modelcontextprotocol/inspector --cli http://127.0.0.1:8000/mcp --method tools/list
+  opik:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+      - uses: ./.github/workflows/mcp-composite-action
+        with:
+          DUCKDB_FILENAME: 'tests/duckdb.example'
+      - name: Run opik experiments
+        run: uv run tests/opik/tool_calls.py
diff --git a/README.md b/README.md
@@ -53,6 +53,7 @@ Want to try it out? **[🚀 Getting Started](docs/getting-started.md)**
 - **[🔧 Configuration](docs/configuration.md)** - Environment variables and settings
 - **[🛠️ MCP Tools](docs/mcp-tools.md)** - All available tools
 - **[🗺️ Roadmap](docs/roadmap.md)** - Upcoming features and roadmap
+- **[🧪 Testing](docs/tests.md)** - Instructions on tests and how to run them
 
 **Need help?** Looking for guidance on use cases or implementation? Don't hesitate to ask your question in our [GitHub discussion forum](https://github.com/the-momentum/apple-health-mcp-server/discussions)! You'll also find interesting use cases, tips, and community insights there.
 

diff --git a/app/config.py b/app/config.py
@@ -37,6 +37,11 @@ class Settings(BaseSettings):
     RAW_XML_PATH: str = "raw.xml"
     XML_SAMPLE_SIZE: int = 1000
 
+    # Opik tests
+    OPENAI_API_KEY: str = ""
+    OPIK_WORKSPACE: str = ""
+    OPIK_API_KEY: str = ""
+
     @field_validator("BACKEND_CORS_ORIGINS", mode="after")
     @classmethod
     def assemble_cors_origins(cls, v: str | list[str]) -> list[str] | str:

diff --git a/app/services/health/clickhouse.py b/app/services/health/clickhouse.py
@@ -49,7 +49,7 @@ def search_values_from_ch(
     date_to: str | None = None,
 ) -> dict[str, Any]:
     return ch.inquire(f"""
-        SELECT * FROM {ch.db_name}.{ch.table_name} WHERE textvalue = '{value}'
+        SELECT * FROM {ch.db_name}.{ch.table_name} WHERE textValue = '{value}'
         {f"AND type = '{record_type}'" if record_type else ""}
         {f"AND startDate >= '{date_from}'" if date_from else ""}
         {f"AND startDate <= '{date_to}'" if date_to else ""}

diff --git a/app/services/health/duckdb_queries.py b/app/services/health/duckdb_queries.py
@@ -141,8 +141,8 @@ def main() -> None:
     pars = HealthRecordSearchParams(
         limit=20,
         record_type="HKWorkoutActivityTypeRunning",
-        date_from="2016-01-01T00:00:00+00:00",
-        date_to="2016-12-31T23:59:59+00:00",
+        min_workout_duration="45",
+        max_workout_duration="53",
     )
     logger.info(
         f"records for search_health_records_from_duckdb: {search_health_records_from_duckdb(pars)}",

diff --git a/config/.env.example b/config/.env.example
@@ -4,3 +4,6 @@ ES_HOST="localhost"
 DUCKDB_FILENAME="applehealth.duckdb"
 CHUNK_SIZE="50000"
 RAW_XML_PATH="raw.xml"
+OPENAI_API_KEY="sk-proj-***"
+OPIK_WORKSPACE="username"
+OPIK_API_KEY="abcdef12345"
diff --git a/docs/getting-started.md b/docs/getting-started.md
@@ -41,8 +41,8 @@ Follow these steps to set up Apple Health MCP Server in your environment.
    - Run `make duckdb` to create a parquet file with your exported XML data
    - If you want to connect to the file through http(s):
      - The only thing you need to do is change the .env path, e.g. `localhost:8080/applehealth.parquet`
-     - If you want an example on how to host the files locally, run `uv run tests/fileserver.py` 
-   
+     - If you want an example on how to host the files locally, run `uv run tests/fileserver.py`
+
 
 ## Configuration Files
 

diff --git a/docs/tests.md b/docs/tests.md
@@ -0,0 +1,72 @@
+[← Back to README](../README.md)
+
+## Testing 🧪
+
+There are 3 types of tests in this projects, all of which are included in the pipeline:
+
+Every test is done on [pre-prepared mock apple health data](https://gist.github.com/czajkub/7ee7a01c35990f910f034f46dbf83b66):
+
+
+## Unit tests 🔧:
+  - Testing the importing of XML data to .duckdb and database calls to DuckDB
+
+## MCP Inspector tests 🔍:
+  - Uses the [MCP Inspector](https://modelcontextprotocol.io/docs/tools/inspector) provided by Anthropic to test connection to the server hosted with streamable HTTP
+  - Mainly used in the pipeline, but can be run locally
+
+## Opik tests 🤖:
+  - End-to-End tests using an agent created from [this](https://github.com/the-momentum/python-ai-kit) AI development kit
+  - Two types of tests:
+    -  Checking whether the correct tool was called
+    -  Judging the answer from an LLM by three metrics:
+       - Answer relevancy: whether the answer is relevant to the user's question 🎯
+       - Hallucination: whether the answer contains misleading or false information 🚫
+       - Levenshtein ratio: Heuristic checking the text structure similarity 📊
+
+# How to run tests locally 💻:
+- ### Unit tests 🔧:
+```bash
+pytest tests/query_tests.py
+```
+
+Before running the next tests, make sure you have the server up and running:
+```bash
+uv run fastmcp run -t http app/main.py
+```
+
+- ### Inspector tests 🔍:
+```bash
+npx @modelcontextprotocol/inspector --cli http://localhost:8000/mcp --transport http --method tools/list
+```
+
+- ### Opik tests 🤖:
+Make sure your `OPENAI_API_KEY`, `OPIK_WORKSPACE` and `OPIK_API_KEY` environmental variables are set
+(Opik workspace refers to your profile name and not project name)
+```bash
+uv run tests/opik/tool_calls.py
+```
+
+### How to run Opik tests in pipeline:
+- Create an account on Opik if you already haven't
+- Copy your `OPIK_API_KEY` and `OPIK_WORKSPACE` to Github secrets
+
+
+To add new tests, you can either do it in the code ([example from opik](https://www.comet.com/docs/opik/evaluation/manage_datasets)):
+```python
+import opik
+# Get or create a dataset
+client = opik.Opik()
+dataset = client.get_or_create_dataset(name="My dataset")
+# Add dataset items to it
+dataset.insert([
+    {"user_question": "Hello, world!", "expected_output": {"assistant_answer": "Hello, world!"}},
+    {"user_question": "What is the capital of France?", "expected_output": {"assistant_answer": "Paris"}},
+])
+```
+
+Or add it on the website:
+<img width="1919" height="873" alt="image" src="https://github.com/user-attachments/assets/dc9f3807-40b4-4227-b4c2-5a1ea44396e7" />
+
+When adding tool call questions, make sure the `input` and `tool_call` values are present, and when adding output checks make sure `input` and `expected_output` are set correctly.
+
+[← Back to README](../README.md)
diff --git a/pyproject.toml b/pyproject.toml
@@ -31,6 +31,8 @@ module-name = "app"
 [dependency-groups]
 dev = [
     "fastapi>=0.116.2",
+    "opik>=1.8.56",
+    "pydantic-ai>=1.0.10",
     "pytest>=8.4.2",
     "pytest-asyncio>=1.0.0",
     "pytest-cov>=6.2.1",
@@ -50,7 +52,7 @@ exclude = ["./tests/", "./docs/", "./README.md"]
 [tool.ruff]
 line-length = 100
 target-version = "py313"
-extend-exclude = ["tests/", "./docs/", "./README.md"]
+extend-exclude = ["./tests/", "./docs/", "./README.md"]
 
 [tool.ruff.lint]
 select = [

diff --git a/scripts/clickhouse_importer.py b/scripts/clickhouse_importer.py
@@ -26,7 +26,7 @@ def create_table(self) -> None:
                        creationDate DateTime,
                        unit String,
                        value Float32,
-                       textvalue String,
+                       textValue String,
                    )
                        ENGINE = MergeTree
                        ORDER BY startDate

diff --git a/tests/agent.py b/tests/agent.py
@@ -0,0 +1,81 @@
+import asyncio
+import os
+
+from pydantic_ai.models.openai import OpenAIChatModel
+from pydantic_ai.providers.openai import OpenAIProvider
+from pydantic_ai import Agent
+from pydantic_ai.tools import Tool
+from pydantic_ai.mcp import MCPServerStreamableHTTP
+
+from dotenv import load_dotenv
+import opik
+
+load_dotenv()
+
+class AgentManager:
+    def __init__(self):
+        self.agent: Agent | None = None
+        self.mcp_client: MCPServerStreamableHTTP | None = None
+        self.tools: list[Tool] | None = None
+        self._initialized = False
+
+    async def initialize(self, model: str = "gpt-4o",
+                         system_prompt: str | None = None):
+        if self._initialized:
+            return
+        try:
+            self.mcp_client = MCPServerStreamableHTTP("http://localhost:8000/mcp")
+        except Exception as e:
+            self.mcp_client = None
+            raise ConnectionError("Could not connect to MCP server") from e
+
+        if system_prompt is None:
+            system_prompt = "You are an AI assistant to help the user as best as you can. You can use the tools provided to you to help the user."
+
+        self.agent = self._create_agent(model, system_prompt)
+        self._initialized = True
+
+    def _create_agent(self, model: str, system_prompt: str) -> Agent:
+        model = OpenAIChatModel(model, provider=OpenAIProvider(api_key=os.getenv("openai_api_key")))
+        return Agent(
+            model=model,
+            deps_type=dict[str, str],
+            system_prompt=system_prompt,
+            toolsets=[self.mcp_client],
+            output_type=str,
+        )
+
+    @opik.track
+    async def handle_message(self, message: str) -> str:
+        if not self._initialized:
+            raise RuntimeError("Agent not initialized. Call initialize() first.")
+
+        async with self.agent:
+            result = await self.agent.run(message)
+            return result.output
+
+    def is_initialized(self) -> bool:
+        return self._initialized
+
+
+
+agent_manager = AgentManager()
+
+
+async def main():
+    await agent_manager.initialize()
+
+    try:
+        while True:
+            user_input = input("Enter your message: ")
+            if user_input == "exit":
+                break
+            print("User: ", user_input)
+            response = await agent_manager.handle_message(user_input)
+            print("Agent: ", response)
+    finally:
+        print("Closing agent")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/tests/duckdb.example b/tests/duckdb.example