Giskard-AI · henchaves · Jun 11, 2025 · Apr 7, 2025 · Apr 7, 2025 · Apr 7, 2025
diff --git a/docs/integrations/huggingface/QATestset.md b/docs/integrations/huggingface/QATestset.md
@@ -0,0 +1,53 @@
+# 📤 Push a QATestset to the Hugging Face Hub
+
+**Learn how to upload and manage your QATestset on the Hugging Face Hub using the `push_to_hf_hub` feature.**
+
+This tutorial will guide you through the steps to push a dataset to the Hugging Face Hub and load it back for reuse.
+
+## Install Required Dependencies
+
+Before you begin, ensure you have the necessary libraries installed. Run the following command to install the `datasets` and `huggingface_hub` packages:
+
+```bash
+pip install datasets huggingface_hub
+```
+
+## Authenticate with Hugging Face
+
+To enable access to your account, set your Hugging Face authentication token (`HF_TOKEN`). You can generate your token from your [Hugging Face account settings](https://huggingface.co/settings/tokens).
+
+## Push Your Dataset to the Hub
+
+Use the `push_to_hf_hub` method to upload your dataset to the Hugging Face Hub. Replace `<username>` with your Hugging Face username and `<dataset_name>` with the desired name for your dataset:
+
+This example demonstrates how to load a `QATestset` from the file `test_set.jsonl` and push it to the Hugging Face Hub:
+
+```python
+from giskard.rag.testset import QATestset
+test_set = QATestset.load("test_set.jsonl")
+test_set.push_to_hf_hub("<username>/<dataset_name>")
+```
+
+Once the dataset is successfully pushed, it will be available on your Hugging Face profile.
+
+## Load the Dataset from the Hub
+
+To reuse the dataset, you can load it back using the `load_from_hf_hub` method. This example demonstrates how to load the dataset and convert it to a pandas DataFrame for inspection:
+
+```python
+from giskard.rag.testset import QATestset
+dset = QATestset.load_from_hf_hub("<username>/<dataset_name>")
+dset.to_pandas().head()
+```
+
+Replace `<username>` and `<dataset_name>` with the appropriate values.
+
+## Benefits of Using the Hugging Face Hub
+
+By leveraging this integration, you can:
+
+- Seamlessly share datasets across projects and collaborators.
+- Reuse datasets without the need for manual file transfers.
+- Access datasets directly from the Hugging Face Hub for streamlined workflows.
+
+Start pushing your datasets today and take advantage of the collaborative power of the Hugging Face Hub!
diff --git a/docs/integrations/huggingface/index.md b/docs/integrations/huggingface/index.md
@@ -8,6 +8,7 @@
 :hidden:
 
 ./evaluator.md
+./QATestset.md
 
 ```
 
@@ -17,3 +18,8 @@
 :text-align: center
 :link: ./evaluator.md
 ::::
+
+::::{grid-item-card} <br/><h3>📤 Push a QATestset to the Hugging Face Hub</h3>
+:text-align: center
+:link: ./QATestset.md
+::::
diff --git a/docs/open_source/testset_generation/testset_generation/index.md b/docs/open_source/testset_generation/testset_generation/index.md
@@ -279,6 +279,8 @@ from giskard.rag import QATestset
 loaded_testset = QATestset.load("my_testset.jsonl")
 ```
 
+You can push your generated test set to the Hugging Face Hub or load an existing dataset from it using [`QATestset.push_to_hf_hub`](giskard.rag.QATestset.push_to_hf_hub) and [`QATestset.load_from_hf_hub`](giskard.rag.QATestset.load_from_hf_hub). This allows you to share and reuse datasets easily. For detailed instructions, refer to the [Hugging Face Integration Documentation](../../../integrations/huggingface/QATestset.md).
+
 You can also convert it to a pandas DataFrame, for quick inspection or further processing:
 
 ```py

diff --git a/giskard/llm/client/base.py b/giskard/llm/client/base.py
@@ -31,3 +31,8 @@ def complete(
         format=None,
     ) -> ChatMessage:
         ...
+
+    @abstractmethod
+    def get_config(self) -> dict:
+        """Return the configuration of the LLM client."""
+        ...
diff --git a/giskard/llm/client/bedrock.py b/giskard/llm/client/bedrock.py
@@ -61,6 +61,10 @@ def complete(
 
         return self._parse_completion(completion, caller_id)
 
+    def get_config(self) -> dict:
+        """Return the configuration of the LLM client."""
+        return {"client_type": self.__class__.__name__, "model": self.model}
+
 
 @deprecated("ClaudeBedrockClient is deprecated: https://docs.giskard.ai/en/latest/open_source/setting_up/index.html")
 class ClaudeBedrockClient(BaseBedrockClient):

diff --git a/giskard/llm/client/gemini.py b/giskard/llm/client/gemini.py
@@ -57,6 +57,10 @@ def __init__(self, model: str = "gemini-pro", _client=None):
         self.model = model
         self._client = _client or genai.GenerativeModel(self.model)
 
+    def get_config(self) -> dict:
+        """Return the configuration of the LLM client."""
+        return {"client_type": self.__class__.__name__, "model": self.model}
+
     def complete(
         self,
         messages: Sequence[ChatMessage],

diff --git a/giskard/llm/client/litellm.py b/giskard/llm/client/litellm.py
@@ -151,3 +151,12 @@ def complete(
                     continue
 
         return ChatMessage(role=response_message.role, content=response_message.content)
+
+    def get_config(self) -> dict:
+        """Return the configuration of the LLM client."""
+        return {
+            "client_type": self.__class__.__name__,
+            "model": self.model,
+            "disable_structured_output": self.disable_structured_output,
+            "completion_params": self.completion_params,
+        }
diff --git a/giskard/llm/client/mistral.py b/giskard/llm/client/mistral.py
@@ -24,6 +24,10 @@ def __init__(self, model: str = "mistral-large-latest", client: Mistral = None):
         self.model = model
         self._client = client or Mistral(api_key=os.getenv("MISTRAL_API_KEY", ""))
 
+    def get_config(self) -> dict:
+        """Return the configuration of the LLM client."""
+        return {"client_type": self.__class__.__name__, "model": self.model}
+
     def complete(
         self,
         messages: Sequence[ChatMessage],

diff --git a/giskard/llm/client/openai.py b/giskard/llm/client/openai.py
@@ -37,6 +37,14 @@ def __init__(
         self._client = client or openai.OpenAI()
         self.json_mode = json_mode if json_mode is not None else _supports_json_format(model)
 
+    def get_config(self) -> dict:
+        """Return the configuration of the LLM client."""
+        return {
+            "client_type": self.__class__.__name__,
+            "model": self.model,
+            "json_mode": self.json_mode,
+        }
+
     def complete(
         self,
         messages: Sequence[ChatMessage],

diff --git a/giskard/rag/dataset_card_template.md b/giskard/rag/dataset_card_template.md
@@ -0,0 +1,46 @@
+---
+tags:
+- giskard
+- synthetic
+
+task_categories:
+- text-generation
+- text2text-generation
+---
+
+# Dataset Card for {repo_id}
+This dataset was created using the [giskard](https://github.com/Giskard-AI/giskard) library, an open-source Python framework designed to evaluate and test AI systems. Giskard helps identify performance, bias, and security issues in AI applications, supporting both LLM-based systems like RAG agents and traditional machine learning models for tabular data.
+
+This dataset is a QA (Question/Answer) dataset, containing {num_items} pairs.
+
+## Usage
+
+You can load this dataset using the following code:
+
+```python
+from giskard.rag.testset import QATestset
+test_set = QATestset.load_from_hub("{repo_id}")
+```
+
+Refer to the following tutorial to use it for evaluating your RAG engine: [RAG evaluation tutorial](https://docs.giskard.ai/en/stable/open_source/testset_generation/rag_evaluation/index.html).
+
+## Configuration
+
+The configuration relative to the dataset generation:
+
+```bash
+{config}
+```
+
+---
+
+<h2 style="text-align: center;">
+  <span style="display: inline-flex; align-items: center; gap: 8px;">
+    Built with 
+    <a href="https://giskard.ai" target="_blank" style="display: inline-flex;">
+      <img src="https://cdn.prod.website-files.com/601d6f7d0b9c984f07bf10bc/62983fa8ef716259c397a57d_logo.svg" 
+             alt="Giskard Logo" 
+             width="100">
+    </a>
+  </span>
+</h2>
diff --git a/giskard/rag/testset.py b/giskard/rag/testset.py
@@ -1,14 +1,24 @@
-from typing import Any, Dict, Optional, Sequence
+from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence
 
 import json
+import logging
 from dataclasses import dataclass
+from pathlib import Path
 
 import pandas as pd
+from datasets import Dataset as HFDataset
+from datasets import load_dataset
+from huggingface_hub import DatasetCard
+
+if TYPE_CHECKING:
+    from huggingface_hub import CommitInfo
 
 from ..core.suite import Suite
 from ..datasets.base import Dataset
 from ..testing.tests.llm import test_llm_correctness
 
+logger = logging.getLogger(__name__)
+
 
 @dataclass
 class QuestionSample:
@@ -110,6 +120,78 @@ def load(cls, path):
         dataframe = pd.read_json(path, orient="records", lines=True)
         return cls.from_pandas(dataframe)
 
+    def push_to_hf_hub(
+        self,
+        repo_id: str,
+        token: str = None,
+        private: bool = False,
+        **kwargs: Any,
+    ) -> "CommitInfo":
+        """Push the QATestset to the Hugging Face Hub.
+
+        Parameters
+        ----------
+        repo_id : str
+            The repository ID on the Hugging Face Hub.
+        token : str, optional
+            Authentication token for private repositories. Defaults to None.
+        private : bool
+            Whether to create a private repository. Defaults to False.
+        **kwargs : Any
+            Additional arguments passed to Dataset.push_to_hub().
+
+        Returns
+        -------
+        CommitInfo
+            The commit information.
+        """
+
+        # Conversion to Dataset from the datasets library
+        dataset = HFDataset.from_pandas(self._dataframe)
+        dataset.push_to_hub(repo_id, token=token, private=private, **kwargs)
+
+        # Load the dataset card template
+        template_path = Path(__file__).parent / "dataset_card_template.md"
+        template = template_path.read_text()
+
+        # Make and push the dataset card
+        try:
+            from ..llm.client import get_default_client
+
+            config = {"metadata": get_default_client().get_config()}
+        except Exception:
+            config = {}
+        content = template.format(repo_id=repo_id, num_items=len(self._dataframe), config=json.dumps(config, indent=4))
+        return DatasetCard(content=content).push_to_hub(repo_id=repo_id, token=token, repo_type="dataset")
+
+    @classmethod
+    def load_from_hf_hub(cls, repo_id: str, token: str = None, **kwargs: Any) -> "QATestset":
+        """
+        Load an instance of the class from the Hugging Face Hub.
+
+        Parameters
+        ----------
+        repo_id : str
+            The repository ID on the Hugging Face Hub.
+        token : str, optional
+            Authentication token for private repositories. Defaults to None.
+        **kwargs : Any
+            Additional arguments passed to `load_dataset`.
+
+        Returns
+        -------
+        QATestset
+            An instance of the class itself loaded from the Hub.
+
+        Raises
+        ------
+        ImportError
+            If required dependencies are not installed.
+        """
+        dataset = load_dataset(repo_id, token=token, split="train", **kwargs)
+        dataframe = pd.DataFrame(dataset)
+        return cls.from_pandas(dataframe)
+
     def to_test_suite(self, name=None, slicing_metadata: Optional[Sequence[str]] = None):
         """
         Convert the testset to a Giskard test suite.

diff --git a/pyproject.toml b/pyproject.toml
@@ -91,6 +91,7 @@ test = [
     "pytest-memray; sys_platform == 'linux' or sys_platform == 'darwin'",
     "pytest-reportlog>=0.4.0",
     "pytest-xdist>=3.3.1",
+    "pytest-mock",
     "ragas>=0.1.5, <=0.2.7", # ragas 0.2.8 introduces abstract classes
     "shap<0.45", # fixing this to avoid changed on insights
 ]

diff --git a/tests/llm/test_llm_client.py b/tests/llm/test_llm_client.py
@@ -60,9 +60,16 @@ def test_litellm_client(completion):
     completion.return_value = DEMO_OPENAI_RESPONSE
     client = Mock()
     client.chat.completions.create.return_value = DEMO_OPENAI_RESPONSE
-    res = LiteLLMClient("gpt-4o", True, completion_params={"api_key": "api_key"}).complete(
-        [ChatMessage(role="system", content="Hello")], temperature=0.11, max_tokens=1
-    )
+    llm_client = LiteLLMClient("gpt-4o", True, completion_params={"api_key": "api_key"})
+    cfg = llm_client.get_config()
+    assert cfg == {
+        "client_type": "LiteLLMClient",
+        "model": "gpt-4o",
+        "disable_structured_output": True,
+        "completion_params": {"api_key": "api_key"},
+    }
+
+    res = llm_client.complete([ChatMessage(role="system", content="Hello")], temperature=0.11, max_tokens=1)
 
     completion.assert_called_once()
     assert completion.call_args[1]["messages"] == [{"role": "system", "content": "Hello"}]
@@ -105,6 +112,14 @@ def completion(self, model: str, messages: list, api_key: str, **kwargs) -> lite
     set_llm_model("mock/faux-bot", api_key=API_KEY)
 
     llm_client = get_default_client()
+    cfg = llm_client.get_config()
+    assert cfg == {
+        "client_type": "LiteLLMClient",
+        "model": "mock/faux-bot",
+        "disable_structured_output": False,
+        "completion_params": {"api_key": API_KEY},
+    }
+
     message = "Mock input"
     response = llm_client.complete([ChatMessage(role="user", content=message)])
     assert f"Mock response - {message}" == response.content
@@ -134,9 +149,11 @@ def test_mistral_client():
 
     from giskard.llm.client.mistral import MistralClient
 
-    res = MistralClient(model="mistral-large", client=client).complete(
-        [ChatMessage(role="user", content="Hello")], temperature=0.11, max_tokens=12
-    )
+    llm_client = MistralClient(model="mistral-large", client=client)
+    cfg = llm_client.get_config()
+    assert cfg == {"client_type": "MistralClient", "model": "mistral-large"}
+
+    res = llm_client.complete([ChatMessage(role="user", content="Hello")], temperature=0.11, max_tokens=12)
 
     client.chat.complete.assert_called_once()
     assert client.chat.complete.call_args[1]["messages"] == [{"role": "user", "content": "Hello"}]
@@ -177,6 +194,8 @@ def test_claude_bedrock_client():
     client = ClaudeBedrockClient(
         bedrock_runtime_client, model="anthropic.claude-3-sonnet-20240229-v1:0", anthropic_version="bedrock-2023-05-31"
     )
+    cfg = client.get_config()
+    assert cfg == {"client_type": "ClaudeBedrockClient", "model": "anthropic.claude-3-sonnet-20240229-v1:0"}
 
     # Call the complete method
     res = client.complete([ChatMessage(role="user", content="Hello")], temperature=0.11, max_tokens=12)
@@ -203,6 +222,8 @@ def test_gemini_client():
 
     # Initialize the GeminiClient with the mocked gemini_api_client
     client = GeminiClient(model="gemini-pro", _client=gemini_api_client)
+    cfg = client.get_config()
+    assert cfg == {"client_type": "GeminiClient", "model": "gemini-pro"}
 
     # Call the complete method
     res = client.complete([ChatMessage(role="user", content="Hello")], temperature=0.11, max_tokens=12)
-Original file line number
+Diff line change
@@ Expand Up / @@ -279,6 +279,8 @@ from giskard.rag import QATestset @@
     loaded_testset = QATestset.load("my_testset.jsonl")
     ```
+    You can push your generated test set to the Hugging Face Hub or load an existing dataset from it using [`QATestset.push_to_hf_hub`](giskard.rag.QATestset.push_to_hf_hub) and [`QATestset.load_from_hf_hub`](giskard.rag.QATestset.load_from_hf_hub). This allows you to share and reuse datasets easily. For detailed instructions, refer to the [Hugging Face Integration Documentation](../../../integrations/huggingface/QATestset.md).
     You can also convert it to a pandas DataFrame, for quick inspection or further processing:
     ```py
@@ Expand Down @@