private async Task<string> CreatePoolIWithPackageIfNoExistAsync(BatchClient batchClient, BatchMetadata batchMetadata) { Microsoft.Azure.Batch.CloudPool? pool = await batchClient.PoolOperations.GetPoolAsync(batchMetadata.PoolId) ?? throw new InvalidProgramException($"Pool ID {batchMetadata.PoolId} not exist"); if (pool is not null && PoolHasSteadyState(pool, batchMetadata.PoolId) && pool.AllocationState is not null && pool.CurrentDedicatedComputeNodes != batchMetadata.QtyDedicatedNodes && pool.AutoScaleEnabled == false) { _logger.LogWarning("Pool ID {PoolId} exist, now resize pool ... please wait", batchMetadata.PoolId); await pool.ResizeAsync(batchMetadata.QtyDedicatedNodes, null, TimeSpan.FromMinutes(15)); foreach (var item in pool.ApplicationPackageReferences) { _logger.LogWarning("Pool App version deployed {packageVersion}", item.Version); } return pool.Id; } else if (pool is not null && pool.CurrentDedicatedComputeNodes == batchMetadata.QtyDedicatedNodes) { _logger.LogWarning("Pool is already resized to", batchMetadata.QtyDedicatedNodes); return pool.Id; } else if (pool is not null && pool.AutoScaleEnabled == true) { throw new InvalidOperationException($"Pool ID {batchMetadata.PoolId} has AutoScaleEnabled == true, please set false"); } else { throw new InvalidOperationException($"Pool ID {batchMetadata.PoolId} is null or pool stateisn't {Microsoft.Azure.Batch.Common.AllocationState.Steady}"); } } private bool PoolHasSteadyState(Microsoft.Azure.Batch.CloudPool pool, string poolId) { var numRetry = 0; bool isSteady = false; while (!isSteady) { numRetry++; if (numRetry < 10) { _logger.LogWarning("Pool ID {PoolId} is in resizing state, ... please wait 1 mninute", poolId); isSteady = pool is not null && pool.AllocationState is not null && (Microsoft.Azure.Batch.Common.AllocationState)pool.AllocationState == Microsoft.Azure.Batch.Common.AllocationState.Steady; if (!isSteady) Thread.Sleep(TimeSpan.FromMinutes(1)); } else { _logger.LogError($"Pool ID {poolId}, maximum amount of retry ({numRetry}), pool state isn't {Microsoft.Azure.Batch.Common.AllocationState.Steady}"); break; } } return isSteady; }
How can I troubleshoot a Pool Linux that gets stuck during resizing? The Pool is created with zero nodes, and when resizing from SDK .NET, on third execution, the pool keeps resizing eternally and "Stop Resizing" doesn't work. There are also two images attached as references.