Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/Agent.Sdk/Knob/AgentKnobs.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@ public class AgentKnobs
new EnvironmentKnobSource("AZP_AGENT_USE_HOST_GROUP_ID"),
new BuiltInDefaultKnobSource("true"));

public const string DockerActionRetriesVariableName = "VSTSAGENT_DOCKER_ACTION_RETRIES";

public static readonly Knob DockerActionRetries = new Knob(
nameof(DockerActionRetries),
"When enabled, the agent retries docker steps if failed",
new RuntimeKnobSource(DockerActionRetriesVariableName),
new EnvironmentKnobSource(DockerActionRetriesVariableName),
new BuiltInDefaultKnobSource("false"));

// Directory structure
public static readonly Knob AgentToolsDirectory = new Knob(
nameof(AgentToolsDirectory),
Expand Down
89 changes: 81 additions & 8 deletions src/Agent.Worker/Container/DockerCommandManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ public override void Initialize(IHostContext hostContext)
public async Task<DockerVersion> DockerVersion(IExecutionContext context)
{
ArgUtil.NotNull(context, nameof(context));
string serverVersionStr = (await ExecuteDockerCommandAsync(context, "version", "--format '{{.Server.APIVersion}}'")).FirstOrDefault();
var action = new Func<Task<List<string>>>(async () => await ExecuteDockerCommandAsync(context, "version", "--format '{{.Server.APIVersion}}'"));
const string command = "Docker version";
string serverVersionStr = (await ExecuteDockerCommandAsyncWithRetries(context, action, command)).FirstOrDefault();
ArgUtil.NotNullOrEmpty(serverVersionStr, "Docker.Server.Version");
context.Output($"Docker daemon API version: {serverVersionStr}");

Expand Down Expand Up @@ -98,12 +100,14 @@ public async Task<int> DockerLogin(IExecutionContext context, string server, str
ArgUtil.NotNull(username, nameof(username));
ArgUtil.NotNull(password, nameof(password));

if (PlatformUtil.RunningOnWindows)
{
var action = new Func<Task<int>>(async () => PlatformUtil.RunningOnWindows
// Wait for 17.07 to switch using stdin for docker registry password.
return await ExecuteDockerCommandAsync(context, "login", $"--username \"{username}\" --password \"{password.Replace("\"", "\\\"")}\" {server}", new List<string>() { password }, context.CancellationToken);
}
return await ExecuteDockerCommandAsync(context, "login", $"--username \"{username}\" --password-stdin {server}", new List<string>() { password }, context.CancellationToken);
? await ExecuteDockerCommandAsync(context, "login", $"--username \"{username}\" --password \"{password.Replace("\"", "\\\"")}\" {server}", new List<string>() { password }, context.CancellationToken)
: await ExecuteDockerCommandAsync(context, "login", $"--username \"{username}\" --password-stdin {server}", new List<string>() { password }, context.CancellationToken)
);

const string command = "Docker login";
return await ExecuteDockerCommandAsyncWithRetries(context, action, command);
}

public async Task<int> DockerLogout(IExecutionContext context, string server)
Expand All @@ -119,7 +123,9 @@ public async Task<int> DockerPull(IExecutionContext context, string image)
ArgUtil.NotNull(context, nameof(context));
ArgUtil.NotNull(image, nameof(image));

return await ExecuteDockerCommandAsync(context, "pull", image, context.CancellationToken);
var action = new Func<Task<int>>(async () => await ExecuteDockerCommandAsync(context, "pull", image, context.CancellationToken));
const string command = "Docker pull";
return await ExecuteDockerCommandAsyncWithRetries(context, action, command);
}

public async Task<string> DockerCreate(IExecutionContext context, ContainerInfo container)
Expand Down Expand Up @@ -194,7 +200,9 @@ public async Task<int> DockerStart(IExecutionContext context, string containerId
ArgUtil.NotNull(context, nameof(context));
ArgUtil.NotNull(containerId, nameof(containerId));

return await ExecuteDockerCommandAsync(context, "start", containerId, context.CancellationToken);
var action = new Func<Task<int>>(async () => await ExecuteDockerCommandAsync(context, "start", containerId, context.CancellationToken));
const string command = "Docker start";
return await ExecuteDockerCommandAsyncWithRetries(context, action, command);
}

public async Task<int> DockerRemove(IExecutionContext context, string containerId)
Expand Down Expand Up @@ -443,5 +451,70 @@ await processInvoker.ExecuteAsync(

return output;
}

private static async Task<int> ExecuteDockerCommandAsyncWithRetries(IExecutionContext context, Func<Task<int>> action, string command)
{
bool dockerActionRetries = AgentKnobs.DockerActionRetries.GetValue(context).AsBoolean();
context.Output($"DockerActionRetries variable value: {dockerActionRetries}");

int retryCount = 0;
int exitCode = 0;
const int maxRetries = 3;
TimeSpan delayInSeconds = TimeSpan.FromSeconds(10);

while (retryCount < maxRetries)
{
exitCode = await action();

if (exitCode == 0 || !dockerActionRetries)
{
break;
}

context.Warning($"{command} failed with exit code {exitCode}, back off {delayInSeconds} seconds before retry.");
await Task.Delay(delayInSeconds);
retryCount++;
}

return exitCode;
}

private static async Task<List<string>> ExecuteDockerCommandAsyncWithRetries(IExecutionContext context, Func<Task<List<string>>> action, string command)
{
bool dockerActionRetries = AgentKnobs.DockerActionRetries.GetValue(context).AsBoolean();
context.Output($"DockerActionRetries variable value: {dockerActionRetries}");

int retryCount = 0;
List<string> output = new List<string>();
const int maxRetries = 3;
TimeSpan delayInSeconds = TimeSpan.FromSeconds(10);

while (retryCount <= maxRetries)
{
try
{
output = await action();
}
catch (ProcessExitCodeException)
{
if (!dockerActionRetries || retryCount == maxRetries)
{
throw;
}

context.Warning($"{command} failed, back off {delayInSeconds} seconds before retry.");
await Task.Delay(delayInSeconds);
}

retryCount++;

if (output != null && output.Count != 0)
{
break;
}
}

return output;
}
}
}
33 changes: 10 additions & 23 deletions src/Agent.Worker/ContainerOperationProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,12 @@ private async Task PullContainerAsync(IExecutionContext executionContext, Contai
ArgUtil.NotNullOrEmpty(username, nameof(username));
ArgUtil.NotNullOrEmpty(password, nameof(password));

int loginExitCode = await _dockerManger.DockerLogin(executionContext, registryServer, username, password);
int loginExitCode = await _dockerManger.DockerLogin(
executionContext,
registryServer,
username,
password);

if (loginExitCode != 0)
{
throw new InvalidOperationException($"Docker login fail with exit code {loginExitCode}");
Expand All @@ -338,29 +343,11 @@ private async Task PullContainerAsync(IExecutionContext executionContext, Contai
}
}

// Pull down docker image with retry up to 3 times
int retryCount = 0;
int pullExitCode = 0;
while (retryCount < 3)
{
pullExitCode = await _dockerManger.DockerPull(executionContext, container.ContainerImage);
if (pullExitCode == 0)
{
break;
}
else
{
retryCount++;
if (retryCount < 3)
{
var backOff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(1), TimeSpan.FromSeconds(10));
executionContext.Warning($"Docker pull failed with exit code {pullExitCode}, back off {backOff.TotalSeconds} seconds before retry.");
await Task.Delay(backOff);
}
}
}
int pullExitCode = await _dockerManger.DockerPull(
executionContext,
container.ContainerImage);

if (retryCount == 3 && pullExitCode != 0)
if (pullExitCode != 0)
{
throw new InvalidOperationException($"Docker pull failed with exit code {pullExitCode}");
}
Expand Down