Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions rdagent/app/data_science/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,9 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):

model_architecture_suggestion_time_percent: float = 0.75
allow_longer_timeout: bool = False
coder_longer_timeout_multiplier: int = 3
runner_longer_timeout_multiplier: int = 2
coder_longer_timeout_multiplier_upper: int = 3
runner_longer_timeout_multiplier_upper: int = 2
timeout_increase_stage: float = 0.3

#### hypothesis critique and rewrite
enable_hypo_critique_rewrite: bool = True
Expand Down
54 changes: 18 additions & 36 deletions rdagent/components/coder/CoSTEER/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,11 @@
from rdagent.components.coder.CoSTEER.evaluators import CoSTEERMultiFeedback
from rdagent.components.coder.CoSTEER.evolvable_subjects import EvolvingItem
from rdagent.components.coder.CoSTEER.knowledge_management import (
CoSTEERKnowledgeBaseV1,
CoSTEERKnowledgeBaseV2,
CoSTEERRAGStrategyV1,
CoSTEERRAGStrategyV2,
)
from rdagent.core.developer import Developer
from rdagent.core.evaluation import Evaluator, Feedback
from rdagent.core.evaluation import Evaluator
from rdagent.core.evolving_agent import EvolvingStrategy, RAGEvoAgent
from rdagent.core.exception import CoderError
from rdagent.core.experiment import Experiment
Expand All @@ -26,8 +24,8 @@ def __init__(
settings: CoSTEERSettings,
eva: Evaluator,
es: EvolvingStrategy,
evolving_version: int,
*args,
evolving_version: int = 2,
max_seconds: int | None = None,
with_knowledge: bool = True,
with_feedback: bool = True,
Expand All @@ -37,6 +35,8 @@ def __init__(
**kwargs,
) -> None:
super().__init__(*args, **kwargs)
self.settings = settings

self.max_loop = settings.max_loop if max_loop is None else max_loop
self.max_seconds = max_seconds
self.knowledge_base_path = (
Expand All @@ -54,37 +54,22 @@ def __init__(
self.evaluator = eva
self.evolving_version = evolving_version

# init knowledge base
self.knowledge_base = self.load_or_init_knowledge_base(
former_knowledge_base_path=self.knowledge_base_path,
component_init_list=[],
)
# init rag method
self.rag = (
CoSTEERRAGStrategyV2(self.knowledge_base, settings=settings)
CoSTEERRAGStrategyV2(
settings=settings,
former_knowledge_base_path=self.knowledge_base_path,
dump_knowledge_base_path=self.new_knowledge_base_path,
evolving_version=self.evolving_version,
)
if self.evolving_version == 2
else CoSTEERRAGStrategyV1(self.knowledge_base, settings=settings)
)

def load_or_init_knowledge_base(self, former_knowledge_base_path: Path = None, component_init_list: list = []):
if former_knowledge_base_path is not None and former_knowledge_base_path.exists():
knowledge_base = pickle.load(open(former_knowledge_base_path, "rb"))
if self.evolving_version == 1 and not isinstance(knowledge_base, CoSTEERKnowledgeBaseV1):
raise ValueError("The former knowledge base is not compatible with the current version")
elif self.evolving_version == 2 and not isinstance(
knowledge_base,
CoSTEERKnowledgeBaseV2,
):
raise ValueError("The former knowledge base is not compatible with the current version")
else:
knowledge_base = (
CoSTEERKnowledgeBaseV2(
init_component_list=component_init_list,
)
if self.evolving_version == 2
else CoSTEERKnowledgeBaseV1()
else CoSTEERRAGStrategyV1(
settings=settings,
former_knowledge_base_path=self.knowledge_base_path,
dump_knowledge_base_path=self.new_knowledge_base_path,
evolving_version=self.evolving_version,
)
return knowledge_base
)

def develop(self, exp: Experiment) -> Experiment:

Expand All @@ -98,6 +83,8 @@ def develop(self, exp: Experiment) -> Experiment:
with_knowledge=self.with_knowledge,
with_feedback=self.with_feedback,
knowledge_self_gen=self.knowledge_self_gen,
enable_filelock=self.settings.enable_filelock,
filelock_path=self.settings.filelock_path,
)

start_datetime = datetime.now()
Expand All @@ -116,11 +103,6 @@ def develop(self, exp: Experiment) -> Experiment:
if self.with_feedback and self.filter_final_evo:
evo_exp = self._exp_postprocess_by_feedback(evo_exp, self.evolve_agent.evolving_trace[-1].feedback)

# save new knowledge base
if self.new_knowledge_base_path is not None:
with self.new_knowledge_base_path.open("wb") as f:
pickle.dump(self.knowledge_base, f)
logger.info(f"New knowledge base saved to {self.new_knowledge_base_path}")
exp.sub_workspace_list = evo_exp.sub_workspace_list
exp.experiment_workspace = evo_exp.experiment_workspace
return exp
Expand Down
3 changes: 3 additions & 0 deletions rdagent/components/coder/CoSTEER/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ class Config:
new_knowledge_base_path: Union[str, None] = None
"""Path to the new knowledge base"""

enable_filelock: bool = False
filelock_path: Union[str, None] = None

max_seconds_multiplier: int = 10**6


Expand Down
7 changes: 4 additions & 3 deletions rdagent/components/coder/CoSTEER/evolving_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,18 @@ def evolve(
evolving_trace: list[EvoStep] = [],
**kwargs,
) -> EvolvingItem:
code_list = [None for _ in range(len(evo.sub_tasks))]

# 1.找出需要evolve的task
to_be_finished_task_index: list[int] = []
for index, target_task in enumerate(evo.sub_tasks):
target_task_desc = target_task.get_task_information()
if target_task_desc in queried_knowledge.success_task_to_knowledge_dict:
# NOTE: very weird logic:
# it depends on the knowledge to set the already finished task
evo.sub_workspace_list[index] = queried_knowledge.success_task_to_knowledge_dict[
code_list[index] = queried_knowledge.success_task_to_knowledge_dict[
target_task_desc
].implementation
].implementation.file_dict
elif (
target_task_desc not in queried_knowledge.success_task_to_knowledge_dict
and target_task_desc not in queried_knowledge.failed_task_info_set
Expand All @@ -111,7 +113,6 @@ def evolve(
],
n=RD_AGENT_SETTINGS.multi_proc_n,
)
code_list = [None for _ in range(len(evo.sub_tasks))]
for index, target_index in enumerate(to_be_finished_task_index):
code_list[target_index] = result[index]

Expand Down
77 changes: 66 additions & 11 deletions rdagent/components/coder/CoSTEER/knowledge_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import copy
import json
import pickle
import random
import re
from itertools import combinations
Expand Down Expand Up @@ -51,6 +52,53 @@ def get_implementation_and_feedback_str(self) -> str:
"""


class CoSTEERRAGStrategy(RAGStrategy):
def __init__(self, *args, dump_knowledge_base_path: Path = None, **kwargs):
super().__init__(*args, **kwargs)
self.dump_knowledge_base_path = dump_knowledge_base_path

def load_or_init_knowledge_base(
self, former_knowledge_base_path: Path = None, component_init_list: list = [], evolving_version: int = 2
) -> EvolvingKnowledgeBase:
if former_knowledge_base_path is not None and former_knowledge_base_path.exists():
knowledge_base = pickle.load(open(former_knowledge_base_path, "rb"))
if evolving_version == 1 and not isinstance(knowledge_base, CoSTEERKnowledgeBaseV1):
raise ValueError("The former knowledge base is not compatible with the current version")
elif evolving_version == 2 and not isinstance(
knowledge_base,
CoSTEERKnowledgeBaseV2,
):
raise ValueError("The former knowledge base is not compatible with the current version")
else:
knowledge_base = (
CoSTEERKnowledgeBaseV2(
init_component_list=component_init_list,
)
if evolving_version == 2
else CoSTEERKnowledgeBaseV1()
)
return knowledge_base

def dump_knowledge_base(self):
if self.dump_knowledge_base_path is None:
logger.warning("Dump knowledge base path is not set, skip dumping.")
else:
if not self.dump_knowledge_base_path.parent.exists():
self.dump_knowledge_base_path.parent.mkdir(parents=True, exist_ok=True)
with open(self.dump_knowledge_base_path, "wb") as f:
pickle.dump(self.knowledgebase, f)

def load_dumped_knowledge_base(self, *args, **kwargs):
if self.dump_knowledge_base_path is None:
logger.warning("Dump knowledge base path is not set, skip dumping.")
elif not Path(self.dump_knowledge_base_path).exists():
logger.info(f"Dumped knowledge base {self.dump_knowledge_base_path} does not exist, skip loading.")
else:
with open(self.dump_knowledge_base_path, "rb") as f:
self.knowledgebase = pickle.load(f)
logger.info(f"Loaded dumped knowledge base from {self.dump_knowledge_base_path}")


class CoSTEERQueriedKnowledge(QueriedKnowledge):
def __init__(self, success_task_to_knowledge_dict: dict = {}, failed_task_info_set: set = set()) -> None:
self.success_task_to_knowledge_dict = success_task_to_knowledge_dict
Expand Down Expand Up @@ -85,9 +133,9 @@ def __init__(
super().__init__(*args, **kwargs)


class CoSTEERRAGStrategyV1(RAGStrategy):
def __init__(self, knowledgebase: CoSTEERKnowledgeBaseV1, settings: CoSTEERSettings) -> None:
super().__init__(knowledgebase)
class CoSTEERRAGStrategyV1(CoSTEERRAGStrategy):
def __init__(self, settings: CoSTEERSettings, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.current_generated_trace_count = 0
self.settings = settings

Expand Down Expand Up @@ -213,9 +261,9 @@ def __init__(
)


class CoSTEERRAGStrategyV2(RAGStrategy):
def __init__(self, knowledgebase: CoSTEERKnowledgeBaseV2, settings: CoSTEERSettings) -> None:
super().__init__(knowledgebase)
class CoSTEERRAGStrategyV2(CoSTEERRAGStrategy):
def __init__(self, settings: CoSTEERSettings, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.current_generated_trace_count = 0
self.settings = settings

Expand Down Expand Up @@ -249,6 +297,12 @@ def generate_knowledge(
target_task_information not in self.knowledgebase.success_task_to_knowledge_dict
and implementation is not None
):
if target_task_information not in self.knowledgebase.task_to_component_nodes:
self.knowledgebase.task_to_component_nodes[target_task_information] = (
self.analyze_component(
target_task_information,
)
)
self.knowledgebase.working_trace_knowledge.setdefault(target_task_information, []).append(
single_knowledge,
) # save to working trace
Expand Down Expand Up @@ -465,7 +519,6 @@ def component_query(
self.knowledgebase.task_to_component_nodes[target_task_information] = self.analyze_component(
target_task_information,
)

component_analysis_result = self.knowledgebase.task_to_component_nodes[target_task_information]

if len(component_analysis_result) > 1:
Expand Down Expand Up @@ -557,12 +610,14 @@ def component_query(
queried_from_gt_knowledge_list = [
knowledge
for knowledge in queried_knowledge_list
if knowledge.feedback is not None and knowledge.feedback.final_decision_based_on_gt == True
if knowledge.feedback is not None
and (
hasattr(knowledge.feedback, "final_decision_based_on_gt")
and knowledge.feedback.final_decision_based_on_gt == True
)
]
queried_without_gt_knowledge_list = [
knowledge
for knowledge in queried_knowledge_list
if knowledge.feedback is not None and knowledge.feedback.final_decision_based_on_gt == False
knowledge for knowledge in queried_knowledge_list if knowledge not in queried_from_gt_knowledge_list
]
queried_from_gt_knowledge_count = max(
min((v2_query_component_limit // 2 + 1), len(queried_from_gt_knowledge_list)),
Expand Down
6 changes: 0 additions & 6 deletions rdagent/components/coder/data_science/pipeline/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,6 @@ def implement_one_task(
data_folder_info = self.scen.processed_data_folder_description
pipeline_task_info = target_task.get_task_information()

queried_similar_successful_knowledge = (
queried_knowledge.task_to_similar_task_successful_knowledge[pipeline_task_info]
if queried_knowledge is not None
else []
)
queried_former_failed_knowledge = (
queried_knowledge.task_to_former_failed_traces[pipeline_task_info] if queried_knowledge is not None else []
)
Expand All @@ -82,7 +77,6 @@ def implement_one_task(

system_prompt = T(".prompts:pipeline_coder.system").r(
task_desc=pipeline_task_info,
queried_similar_successful_knowledge=queried_similar_successful_knowledge,
queried_former_failed_knowledge=queried_former_failed_knowledge[0],
out_spec=PythonAgentOut.get_spec(),
runtime_environment=self.scen.get_runtime_environment(),
Expand Down
7 changes: 7 additions & 0 deletions rdagent/components/coder/data_science/pipeline/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,17 @@ def evaluate(
else:
eda_output = implementation.file_dict.get("EDA.md", None)

queried_similar_successful_knowledge = (
queried_knowledge.task_to_similar_task_successful_knowledge[target_task.get_task_information()]
if queried_knowledge is not None
else []
)

system_prompt = T(".prompts:pipeline_eval.system").r(
is_sub_enabled=test_eval.is_sub_enabled(self.scen.competition),
debug_mode=DS_RD_SETTING.sample_data_by_LLM,
mle_check=(DS_RD_SETTING.sample_data_by_LLM and test_eval.is_sub_enabled(self.scen.competition)),
queried_similar_successful_knowledge=queried_similar_successful_knowledge,
)
user_prompt = T(".prompts:pipeline_eval.user").r(
scenario=self.scen.get_scenario_all_desc(eda_output=eda_output),
Expand Down
38 changes: 24 additions & 14 deletions rdagent/components/coder/data_science/pipeline/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,6 @@ pipeline_coder:
# Specification your code should follow
{% include "scenarios.data_science.share:component_spec.Pipeline" %}

{% if queried_similar_successful_knowledge|length != 0 or queried_former_failed_knowledge|length != 0 %}
# Relevant Information for This Task
{% endif %}

{% if queried_similar_successful_knowledge|length != 0 %}
## Successful Implementations for Similar Models
====={% for similar_successful_knowledge in queried_similar_successful_knowledge %} Model {{ loop.index }}:=====
{{ similar_successful_knowledge.target_task.get_task_information() }}
=====Code:=====
{{ similar_successful_knowledge.implementation.all_codes }}
{% endfor %}
{% endif %}

{% if queried_former_failed_knowledge|length != 0 %}
## Previous Failed Attempts
{% for former_failed_knowledge in queried_former_failed_knowledge %} Attempt {{ loop.index }}:
Expand Down Expand Up @@ -102,6 +89,16 @@ pipeline_coder:
```bash
python main.py --debug
```
Please simulate the following code to check whether the code is running in debug mode:
```python
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--debug', action='store_true', help='Run in debug mode')
args = parser.parse_args()
DEBUG = False
if args.debug:
DEBUG = True
```
In debug mode, you should only sample ten percent of the training data and run the minimum epochs to quickly test the correctness of the code.
In debug mode, you should implement a timer to measure the time taken for your debug configuration and estimate the time required for the full run. Your timer should only measure the time taken for the training part, not the data loading or feature engineering part.
For example:
Expand All @@ -115,7 +112,8 @@ pipeline_coder:
```
In debug mode, your code should run faster, so the environment will set a shorter time limit than the standard time limit for your code.
For example, you can sample ten percent of the training data and run for one epoch, then the full run with ten epochs will take one hundred times the time taken for the debug run. The scale is calculated by yourself depending on the data sampling and epoch number you choose. If your full run enables early stopping, the scale should be smaller considering the early stopping will stop the training earlier than the full epochs.
You should sample the data after train valid split. When you split the data after sampling, you might get a class with only one sample which might cause the split strategy to fail.
Be careful about the train-valid split strategy. StratifiedShuffleSplit is highly risk since the data has some categories with only one sample. If you use StratifiedShuffleSplit, you should consider using a try-except block to catch the error and use a different split strategy if the error occurs.
You should sample the data after train valid split. When you split the data after sampling, you might get a class with only one sample which might cause the split strategy to fail.
Your debug code should run exactly the same as the full run, except for the data sampling and epoch number, to ensure the correctness of the code.
You should print total time and estimated time in standard output using print function in the following schema:
=== Start of Debug Information ===
Expand Down Expand Up @@ -269,6 +267,18 @@ pipeline_eval:
- If the submission check returns an error message, you should set the "final_decision" to false and clearly document the issues in the "return_checking" field.
{% endif %}

{% if queried_similar_successful_knowledge|length != 0 %}
### Step 6: Similar Successful Implementations to help Code Improvement
The user has done several similar tasks and get some successful implementations. These code might not be implemented to the same task, but they are similar to your task and they might work well on your dataset.
Please refer to these successful implementation and provide your suggestions in your response on how to correct your current code based on these successful implementations.
## Successful Implementations for Similar Tasks
====={% for similar_successful_knowledge in queried_similar_successful_knowledge %} Similar Task {{ loop.index }}:=====
{{ similar_successful_knowledge.target_task.get_task_information() }}
=====Code:=====
{{ similar_successful_knowledge.implementation.all_codes }}
{% endfor %}
{% endif %}

## Output Format
Please respond with your feedback in the following JSON format without anything else.
```json
Expand Down
Loading
Loading