diff --git a/rdagent/app/utils/ws.py b/rdagent/app/utils/ws.py index ab1221a55..c56ddb84a 100644 --- a/rdagent/app/utils/ws.py +++ b/rdagent/app/utils/ws.py @@ -10,7 +10,7 @@ @app.command() -def run(competition: str, cmd: str, local_path: str = "./"): +def run(competition: str, cmd: str, local_path: str = "./", mount_path: str | None = None): """ Launch the data-science environment for a specific competition and run the provided command. @@ -44,6 +44,9 @@ def run(competition: str, cmd: str, local_path: str = "./"): enable_cache=False, ) + if mount_path is not None: + env.conf.mount_path = mount_path + env.run(entry=cmd, local_path=local_path) diff --git a/rdagent/components/coder/data_science/conf.py b/rdagent/components/coder/data_science/conf.py index 3719b86c8..e5d9ba7ff 100644 --- a/rdagent/components/coder/data_science/conf.py +++ b/rdagent/components/coder/data_science/conf.py @@ -19,7 +19,7 @@ class DSCoderCoSTEERSettings(CoSTEERSettings): class Config: env_prefix = "DS_Coder_CoSTEER_" - max_seconds: int = 2400 + max_seconds: int = DS_RD_SETTING.debug_timeout * 4 env_type: str = "docker" # TODO: extract a function for env and conf. diff --git a/rdagent/components/coder/data_science/pipeline/prompts.yaml b/rdagent/components/coder/data_science/pipeline/prompts.yaml index 19bb1333e..0b7322fb8 100644 --- a/rdagent/components/coder/data_science/pipeline/prompts.yaml +++ b/rdagent/components/coder/data_science/pipeline/prompts.yaml @@ -82,6 +82,7 @@ pipeline_coder: ``` In debug mode, you should only sample ten percent of the training data and run the minimum epochs to quickly test the correctness of the code. In debug mode, you should implement a timer to measure the time taken for your debug configuration and estimate the time required for the full run. + In debug mode, your code should run faster, so the environment will set a shorter time limit than the standard time limit for your code. For example, you can sample ten percent of the training data and run for one epoch, then the full run with ten epochs will take one hundred times the time taken for the debug run. The scale is calculated by yourself depending on the data sampling and epoch number you choose. If your full run enables early stopping, the scale should be smaller considering the early stopping will stop the training earlier than the full epochs. You should sample the data after train valid split. When you split the data after sampling, you might get a class with only one sample which might cause the split strategy to fail. Your debug code should run exactly the same as the full run, except for the data sampling and epoch number, to ensure the correctness of the code. @@ -133,7 +134,9 @@ pipeline_coder: {% if latest_code %} # Former code + ``` {{ latest_code }} + ``` {% if latest_code_feedback is not none %} ## Feedback to former code {{ latest_code_feedback }} @@ -270,7 +273,11 @@ pipeline_eval: {{ spec }} # Code + ``` {{ code }} + ``` ## Execution Output + ``` {{ stdout }} + ``` diff --git a/rdagent/core/scenario.py b/rdagent/core/scenario.py index a9ff6b83f..b333c069b 100644 --- a/rdagent/core/scenario.py +++ b/rdagent/core/scenario.py @@ -52,6 +52,12 @@ def get_scenario_all_desc( The scenario description varies based on the task being performed. """ + @abstractmethod + def get_runtime_environment(self) -> str: + """ + Get the runtime environment information + """ + @property def experiment_setting(self) -> str | None: """Get experiment setting and return as rich text string""" diff --git a/rdagent/scenarios/data_science/dev/runner/__init__.py b/rdagent/scenarios/data_science/dev/runner/__init__.py index ceef3e5e6..47972362e 100644 --- a/rdagent/scenarios/data_science/dev/runner/__init__.py +++ b/rdagent/scenarios/data_science/dev/runner/__init__.py @@ -31,7 +31,7 @@ class DSRunnerCoSTEERSettings(DSCoderCoSTEERSettings): class Config: env_prefix = "DS_Runner_CoSTEER_" - max_seconds: int = 3600 + max_seconds: int = DS_RD_SETTING.full_timeout env_type: str = "docker" # TODO: extract a function for env and conf. diff --git a/rdagent/scenarios/data_science/dev/runner/eval.py b/rdagent/scenarios/data_science/dev/runner/eval.py index 1b0fe1cf5..3ba631998 100644 --- a/rdagent/scenarios/data_science/dev/runner/eval.py +++ b/rdagent/scenarios/data_science/dev/runner/eval.py @@ -133,6 +133,7 @@ def evaluate( scenario=self.scen.get_scenario_all_desc(eda_output=implementation.file_dict.get("EDA.md", None)), is_sub_enabled=test_eval.is_sub_enabled(self.scen.competition), task_desc=target_task.get_task_information(), + runtime_environment=self.scen.get_runtime_environment(), ) user_prompt = T(".prompts:DSCoSTEER_eval.user").r( code=implementation.all_codes, diff --git a/rdagent/scenarios/data_science/dev/runner/prompts.yaml b/rdagent/scenarios/data_science/dev/runner/prompts.yaml index 4a7fee8ab..79c1386a0 100644 --- a/rdagent/scenarios/data_science/dev/runner/prompts.yaml +++ b/rdagent/scenarios/data_science/dev/runner/prompts.yaml @@ -9,6 +9,9 @@ DSCoSTEER_eval: The task is as follows: {{ task_desc }} + You have following environment to run the code: + {{ runtime_environment }} + The whole workflow includes multiple stages, such as: - Data loading - Feature engineering @@ -16,7 +19,11 @@ DSCoSTEER_eval: - Ensembling The user will provide you the time spent on the whole code execution and the timeout of the code execution. You should decide whether the hyperparameter is reasonable based on the time. - For example, if the code only spent ten percent of the timeout and the hyperparameter like `n_estimators` or 'epochs' is very small or batch size is small you should suggest to increase these hyperparameter. + For example, if the code uses only a small portion of the allowed time, and hyperparameters like `n_estimators` or `epochs` have low values, with early stopping not being triggered and possible signs of underfitting, you should suggest increasing these hyperparameters. + + You should also notice other resources utilization hyper-parameters, + For example, if you are using a GPU with large memory, and the batch size is set very low, you should suggest increasing the batch size if it is not reasonable. + Please provide your feedback in two key-value pairs: "hyperparameter_tuning_decision": "hyperparameter_tuning_suggestion": diff --git a/rdagent/scenarios/data_science/scen/runtime_info.py b/rdagent/scenarios/data_science/scen/runtime_info.py index 5a754c514..d4bb953bc 100644 --- a/rdagent/scenarios/data_science/scen/runtime_info.py +++ b/rdagent/scenarios/data_science/scen/runtime_info.py @@ -62,6 +62,8 @@ def get_gpu_info(): "numpy", "scikit-learn", "scipy", + "xgboost", + "sklearn", "lightgbm", "vtk", "opencv-python", diff --git a/rdagent/scenarios/data_science/test_eval.py b/rdagent/scenarios/data_science/test_eval.py index ba3174cd5..d91fcd57e 100644 --- a/rdagent/scenarios/data_science/test_eval.py +++ b/rdagent/scenarios/data_science/test_eval.py @@ -23,7 +23,7 @@ def valid(self, competition: str, workspace: FBWorkspace) -> tuple[str, int]: @abstractmethod def enabled(self, competition) -> bool: - """able to eval or not""" + """support `eval` & `valid` or not""" @abstractmethod def get_sample_submission_name(self, competition: str) -> str: