Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
8580e8e
first draft
GTimothee Apr 7, 2025
75db905
tmp change for test
GTimothee Apr 7, 2025
9f5e454
tmp change for test
GTimothee Apr 7, 2025
1ebc028
update tags in template, litlle updates in the testset class
GTimothee Apr 8, 2025
1f12a0c
add get_config usage in llmclient
GTimothee Apr 8, 2025
f054c3e
fix little inconsistency with get_config
GTimothee Apr 8, 2025
bb656bd
add unit tests
GTimothee Apr 8, 2025
be6e661
update tests
GTimothee Apr 8, 2025
1d6cc7f
update tests
GTimothee Apr 8, 2025
41acbce
update tests
GTimothee Apr 8, 2025
e007e91
test get_config for litellm
GTimothee Apr 8, 2025
1e5a580
fix test get_config for litellm
GTimothee Apr 8, 2025
632b939
fix test get_config for other llm
GTimothee Apr 8, 2025
7b9d58d
fix test get_config for mistralllm
GTimothee Apr 8, 2025
23fe214
add documentation
GTimothee Apr 8, 2025
1c0edea
pdm lock updated
GTimothee Apr 9, 2025
2da300e
add API reference
GTimothee Apr 9, 2025
b80d2b4
update method names + add logo to card template
GTimothee Apr 11, 2025
87920d6
merge updated main
GTimothee Apr 11, 2025
7e79f75
fixed test - tests passing
GTimothee Apr 11, 2025
b6e5739
update pdm.lock
GTimothee Apr 11, 2025
9893f13
isort - black
GTimothee Apr 11, 2025
e4134f8
update pdm.lock
henchaves Apr 11, 2025
0d1638f
small fix for backward compatibility
GTimothee Apr 15, 2025
ddf00fb
Merge branch 'main' into qatest_push_to_hub
davidberenstein1957 Apr 16, 2025
ce1d60c
Merge branch 'main' into qatest_push_to_hub
davidberenstein1957 Apr 22, 2025
4457516
fix workflow for testing pydantic v1
Apr 28, 2025
7763077
Update QATestset.md
davidberenstein1957 Jun 11, 2025
2f62912
Update build-python.yml
davidberenstein1957 Jun 11, 2025
460fd27
Delete pdm.lock
davidberenstein1957 Jun 11, 2025
eea7afd
update pdm.lock
henchaves Jun 11, 2025
085abe6
Merge branch 'main' into qatest_push_to_hub
henchaves Jun 11, 2025
e7e4aaa
Update testset.py
davidberenstein1957 Jun 11, 2025
86ebea5
Update testset.py
davidberenstein1957 Jun 11, 2025
dc0596c
Update testset.py
davidberenstein1957 Jun 11, 2025
cb74007
Update giskard/rag/testset.py
davidberenstein1957 Jun 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions giskard/rag/dataset_card_template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
---
tags:
- giskard
- QATestset
- qa-dataset
- dataset
---

# Dataset Card for {repo_id}
This dataset was created using the [giskard](https://github.com/Giskard-AI/giskard) library, an open-source Python framework designed to evaluate and test AI systems. Giskard helps identify performance, bias, and security issues in AI applications, supporting both LLM-based systems like RAG agents and traditional machine learning models for tabular data.

This dataset is a QA (Question/Answer) dataset, containing {num_items} pairs.

## Usage

You can load this dataset using the following code:

```python
from giskard.rag.testset import QATestset
test_set = QATestset.load_from_hub("{repo_id}")
```

Refer to the following tutorial to use it for evaluating your RAG engine: [RAG evaluation tutorial](https://docs.giskard.ai/en/stable/open_source/testset_generation/rag_evaluation/index.html).

## Configuration

The configuration relative to the dataset generation:

```bash
{config}
```
96 changes: 96 additions & 0 deletions giskard/rag/testset.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,27 @@
from pathlib import Path
import tempfile
from typing import Any, Dict, Optional, Sequence

import json
from dataclasses import dataclass

import pandas as pd

from typing import TYPE_CHECKING
if TYPE_CHECKING:
from huggingface_hub import CommitInfo


from ..core.suite import Suite
from ..datasets.base import Dataset
from ..testing.tests.llm import test_llm_correctness


_HUB_IMPORT_ERROR = ImportError(
"`datasets` and `huggingface_hub` are required to push to the Hugging Face Hub. Please install them with `pip install datasets huggingface_hub`"
)


@dataclass
class QuestionSample:
id: str
Expand Down Expand Up @@ -110,6 +122,90 @@ def load(cls, path):
dataframe = pd.read_json(path, orient="records", lines=True)
return cls.from_pandas(dataframe)

def push_to_hub(
self,
repo_id: str,
token: str | None = None,
private: bool = False,
**kwargs: Any,
) -> "CommitInfo":
"""Push the QATestset to the Hugging Face Hub.

Parameters
----------
repo_id : str
The repository ID on the Hugging Face Hub.
token : str, optional
Authentication token for private repositories. Defaults to None.
private : bool
Whether to create a private repository. Defaults to False.
**kwargs : Any
Additional arguments passed to Dataset.push_to_hub().

Returns
-------
CommitInfo
The commit information.
"""

try:
from datasets import Dataset as HFDataset
from huggingface_hub import DatasetCard
except ImportError:
raise _HUB_IMPORT_ERROR

# Conversion to Dataset from the datasets library
dataset = HFDataset.from_pandas(self._dataframe)
dataset.push_to_hub(repo_id, token=token, private=private, **kwargs)

# Load the dataset card template
template_path = Path(__file__).parent / "dataset_card_template.md"
template = template_path.read_text()

# Make and push the dataset card
# global _default_llm_model
config = {
"metadata": {
"model": "gpt-4o"
}
}
content = template.format(repo_id=repo_id, num_items=len(self._dataframe), config=json.dumps(config, indent=4))
return DatasetCard(content=content).push_to_hub(repo_id=repo_id, token=token, repo_type="dataset")

@classmethod
def load_from_hub(cls, repo_id: str, token: str | None = None, **kwargs: Any) -> "QATestset":
"""
Load an instance of the class from the Hugging Face Hub.

Parameters
----------
repo_id : str
The repository ID on the Hugging Face Hub.
token : str, optional
Authentication token for private repositories. Defaults to None.
**kwargs : Any
Additional arguments passed to `load_dataset`.

Returns
-------
QATestset
An instance of the class itself loaded from the Hub.

Raises
------
ImportError
If required dependencies are not installed.
"""
try:
from datasets import load_dataset
except ImportError:
raise _HUB_IMPORT_ERROR

# Load dataset and extract items
dataset = load_dataset(repo_id, token=token, split="train", **kwargs)
dataframe = pd.DataFrame(dataset)
return cls.from_pandas(dataframe)

def to_test_suite(self, name=None, slicing_metadata: Optional[Sequence[str]] = None):
"""
Convert the testset to a Giskard test suite.
Expand Down