microsoft · peteryang1 · Jul 16, 2025 · Jul 16, 2025 · Jul 16, 2025
diff --git a/rdagent/components/coder/CoSTEER/config.py b/rdagent/components/coder/CoSTEER/config.py
@@ -17,12 +17,12 @@ class Config:
 
     fail_task_trial_limit: int = 20
 
-    v1_query_former_trace_limit: int = 5
-    v1_query_similar_success_limit: int = 5
+    v1_query_former_trace_limit: int = 3
+    v1_query_similar_success_limit: int = 3
 
     v2_query_component_limit: int = 1
     v2_query_error_limit: int = 1
-    v2_query_former_trace_limit: int = 1
+    v2_query_former_trace_limit: int = 3
     v2_add_fail_attempt_to_latest_successful_execution: bool = False
     v2_error_summary: bool = False
     v2_knowledge_sampler: float = 1.0

diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/prompts_v2.yaml b/rdagent/scenarios/data_science/proposal/exp_gen/prompts_v2.yaml
@@ -106,48 +106,6 @@ feedback_problem:
     # Current SOTA Implementation
     {{ sota_exp_desc }}
 
-scenario_description: |-
-  {% if use_raw_description -%}
-  ====== Background ======
-  {{ raw_description }}
-
-  {% else %}
-  ====== Background ======
-  {{ background }}
-  {% endif %}
-
-  {% if eda_output is not none %}The following is the output of the exploratory data analysis (EDA) performed on the dataset, You should carefully analyze it to better craft your feature engineering and model training strategies.
-  ====== Data Overview (EDA) ======
-  {{ eda_output }}
-  {% endif %}
-
-  ====== Submission Format ======
-  Please ensure your submission adheres to the following specifications:
-  {{ submission_specifications }}
-
-  ====== Important Guidelines ======
-  Before submitting your results, please note the following:
-  - We have numerous tests in place to check your code.
-  - Ensure your submission is genuine.
-  - Do not manipulate data or return values solely to pass preliminary tests, as this will not lead to successful final evaluation.
-
-  ====== Evaluation ======
-  {% if metric_name %}
-  The primary evaluation metric for this task is: **{{ metric_name }}**.
-  {% endif %}
-  This metric is considered better when it is **{% if metric_direction %}larger{% else %}smaller{% endif %}**.
-
-  {% if evaluation is not none %}
-  Additional Evaluation Details:
-  {{ evaluation }}
-  {% endif %}
-
-  {% if time_limit %}
-  ====== Time Limit ======
-  Your code's execution is limited to **{{ time_limit }}**. After this time limit, your code will be terminated. But remember your main target is to achieve the best performance and you have several times to modify your code. So please be bold to make the best use of all the time limit and don't be too conservative.
-  During this time limit, you have all the resources available to you. Please fully leverage all the computational resources(CPUs and GPUs) to achieve the best performance like choose a powerful model, use a large batch size, enable data sampler with big parallel.
-  {% endif %}
-
 hypothesis_gen:
   system: |-
     {% include "scenarios.data_science.share:scen.role" %}

diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/proposal.py b/rdagent/scenarios/data_science/proposal/exp_gen/proposal.py
@@ -788,19 +788,6 @@ def task_gen(
             exp.pending_tasks_list.append([workflow_task])
         return exp
 
-    def get_scenario_all_desc(self, trace: DSTrace, eda_output=None) -> str:
-        return T(".prompts_v2:scenario_description").r(
-            background=trace.scen.background,
-            submission_specifications=trace.scen.submission_specifications,
-            evaluation=trace.scen.metric_description,
-            metric_name=trace.scen.metric_name,
-            metric_direction=trace.scen.metric_direction,
-            raw_description=trace.scen.raw_description,
-            use_raw_description=DS_RD_SETTING.use_raw_description,
-            time_limit=f"{DS_RD_SETTING.full_timeout / 60 / 60 : .2f} hours",
-            eda_output=eda_output,
-        )
-
     def get_all_hypotheses(self, problem_dict: dict, hypothesis_dict: dict) -> list[DSHypothesis]:
         result = []
         for name, data in hypothesis_dict.items():
@@ -844,7 +831,7 @@ def gen(
             eda_output = None
         else:
             eda_output = sota_exp.experiment_workspace.file_dict.get("EDA.md", None)
-        scenario_desc = self.get_scenario_all_desc(trace, eda_output=eda_output)
+        scenario_desc = self.scen.get_scenario_all_desc(eda_output=eda_output)
 
         sota_exp_desc = T("scenarios.data_science.share:describe.exp").r(
             exp=sota_exp, heading="Best of previous exploration of the scenario"

diff --git a/rdagent/scenarios/data_science/scen/__init__.py b/rdagent/scenarios/data_science/scen/__init__.py
@@ -142,6 +142,8 @@ def get_competition_full_desc(self) -> str:
             use_raw_description=DS_RD_SETTING.use_raw_description,
             time_limit=None,
             eda_output=None,
+            sample_data_by_LLM=None,
+            debug_time_limit=None,
         )
 
     def get_scenario_all_desc(self, eda_output=None) -> str:
@@ -158,6 +160,8 @@ def get_scenario_all_desc(self, eda_output=None) -> str:
             use_raw_description=DS_RD_SETTING.use_raw_description,
             time_limit=f"{DS_RD_SETTING.full_timeout / 60 / 60 : .2f} hours",
             eda_output=eda_output,
+            sample_data_by_LLM=DS_RD_SETTING.sample_data_by_LLM,
+            debug_time_limit=f"{DS_RD_SETTING.debug_timeout / 60 / 60 : .2f} hours",
         )
 
     def get_runtime_environment(self) -> str:

diff --git a/rdagent/scenarios/data_science/scen/prompts.yaml b/rdagent/scenarios/data_science/scen/prompts.yaml
@@ -1,36 +1,47 @@
 scenario_description: |-
-  {% if use_raw_description %}
-  ------Background of the scenario------
+  {% if use_raw_description -%}
+  ====== Background of the scenario======
   {{ raw_description }}
 
   {% else %}
-  ------Background of the scenario------
+  ====== Background of the scenario======
   {{ background }}
-
   {% endif %}
 
-  ------ Guidelines for participating in the competition ----
-  Before submitting your results, we have numerous tests ready to check your code. Please ensure your submission is genuine and do not manipulate data or return values just to pass the tests, as this will not lead to successful final results.
+  {% if eda_output is not none %}The following is the output of the exploratory data analysis (EDA) performed on the dataset, You should carefully analyze it to better craft your feature engineering and model training strategies.
+  ====== Data Overview (EDA) ======
+  {{ eda_output }}
+  {% endif %}
 
-  ------The expected output & submission format specifications------
+  ====== Submission Format ======
+  Please ensure your submission adheres to the following specifications:
   {{ submission_specifications }}
 
-  ------The name of the evaluation metric used------
-  `{{ metric_name }}`
+  ====== Important Guidelines ======
+  Before submitting your results, please note the following:
+  - We have numerous tests in place to check your code.
+  - Ensure your submission is genuine.
+  - Do not manipulate data or return values solely to pass preliminary tests, as this will not lead to successful final evaluation.
 
-  {% if time_limit %}------The time limit to your code------
-  You code running is limit to {{ time_limit }}, after this time limit, your code will be terminated. But remember your main target is to achieve the best performance and you have several times to modify your code. So please be bold to make the best use of all the time limit and don't be too conservative.
-  During this time limit, you have all the resources available to you. Please fully leverage all the computational resources(CPUs and GPUs) to achieve the best performance like choose a powerful model, use a large batch size, enable data sampler with big parallel.
+  ====== Evaluation ======
+  {% if metric_name %}
+  The primary evaluation metric for this task is: **{{ metric_name }}**.
   {% endif %}
-  {% if evaluation is not none %}------Evaluation------
-  {{ evaluation }}
+  This metric is considered better when it is **{% if metric_direction %}larger{% else %}smaller{% endif %}**.
 
+  {% if evaluation is not none %}
+  Additional Evaluation Details:
+  {{ evaluation }}
   {% endif %}
-  The evaluation metrics used is directed as:
-  The metric is better when it is {% if metric_direction %}bigger{% else %}smaller{% endif %}.
 
-  {% if eda_output is not none %}------Data Overview(EDA)------
-  {{ eda_output }}
+  {% if time_limit %}
+  ====== Time Limit On Full Code Execution ======
+  Your full code's execution is limited to **{{ time_limit }}**. After this time limit, your code will be terminated. But remember your main target is to achieve the best performance and you have several times to modify your code. So please be bold to make the best use of all the time limit and don't be too conservative.
+  During this time limit, you have all the resources available to you. Please fully leverage all the computational resources(CPUs and GPUs) to achieve the best performance like choose a powerful model, use a large batch size, enable data sampler with big parallel.
+  {% if sample_data_by_LLM is not none and sample_data_by_LLM is true %}
+  ====== Time Limit On Debug Mode Code Execution ======
+  Your are also required to include a debug mode in your code, the debug code's execution is limited to **{{ debug_time_limit }}**. You should set your debug parameters(eg. epoch number) accordingly to ensure your debug code can run within this time limit. 
+  {% endif %}
   {% endif %}
 
 competition_description_template:

diff --git a/rdagent/utils/env.py b/rdagent/utils/env.py
@@ -234,7 +234,6 @@ def __run_with_retry(
                         f"The running time exceeds {self.conf.running_timeout_period} seconds, so the process is killed."
                     )
                     log_output += f"\n\nThe running time exceeds {self.conf.running_timeout_period} seconds, so the process is killed."
-                log_output += f"\nTotal running time: {end - start:.3f} seconds."
                 return EnvResult(log_output, return_code, end - start)
             except Exception as e:
                 if retry_index == self.conf.retry_count: