Getting 503 service unavaialble while trying to get 90000 files from one sharepoint site along with metadata and export the same into CSV format

Anonymous
2023-07-05T12:26:03.03+00:00

Facing issue while trying to get the metadata of all of the documents from one SharePoint Site and export it into CSV using SharePoint PowerShell. I have approximately 100000 of documents in one site.

I am getting the error: Error Generating CSV. Please contact with Administrator Exception calling "ExecuteQuery" with "0" argument(s): "The remote server returned an error: (503) Server Unavailable."

Below is my code where i can get only 1000 items but while trying to get approx 5000 also i am getting 503:

#Load SharePoint CSOM Assemblies
Add-Type -Path "C:\Program Files\SharePoint Online Management Shell\Microsoft.Online.SharePoint.PowerShell\Microsoft.SharePoint.Client.dll" 
Add-Type -Path "C:\Program Files\SharePoint Online Management Shell\Microsoft.Online.SharePoint.PowerShell\Microsoft.SharePoint.Client.Runtime.dll" 
Add-Type -Path "C:\Program Files\SharePoint Online Management Shell\Microsoft.Online.SharePoint.PowerShell\Microsoft.Online.SharePoint.Client.Tenant.dll" 
    
#Function to Generate Report on all documents in a SharePoint Online Site Collection
Function Get-SPODocumentInventory($SiteURL)
{
    Try {
        #Setup the context
        $Ctx = New-Object Microsoft.SharePoint.Client.ClientContext($SiteURL)
        $Ctx.Credentials = $Credentials
    
        #Get the web from given URL and its subsites
        $Web = $Ctx.web
        $Ctx.Load($Web)
        $Ctx.Load($Web.Lists)
        $Ctx.Load($web.Webs)
        $Ctx.executeQuery()
  
        #Arry to Skip System Lists and Libraries
        $SystemLists =@("Converted Forms", "Master Page Gallery", "Customized Reports", "Form Templates", "List Template Gallery", "Theme Gallery",
               "Reporting Templates", "Solution Gallery", "Style Library", "Web Part Gallery","Site Assets", "wfpub", "Site Pages", "Images")
      
        Write-host -f Yellow "Processing Site: $SiteURL"
  
        #Filter Document Libraries to Scan
        $Lists = $Web.Lists | Where {$_.BaseType -eq "DocumentLibrary" -and $_.Hidden -eq $false -and $SystemLists -notcontains $_.Title -and $_.ItemCount -gt 0}
        #Loop through each document library
        Foreach ($List in $Lists)
        {
            #Define CAML Query to Get List Items in batches
            $Query = New-Object Microsoft.SharePoint.Client.CamlQuery
            $Query.ViewXml ="
                <View Scope='RecursiveAll'>
                   <Query>
                      <OrderBy><FieldRef Name='ID' Ascending='TRUE'/></OrderBy>
                   </Query>
                   <RowLimit Paged='TRUE'>$BatchSize</RowLimit>
                </View>"
  
            Write-host | Add-Member NoteProperty -f Cyan "`t Processing Document Library: '$($List.Title)' with $($List.ItemCount) Item(s)"
  
            Do {
                #Get List items
                $ListItems = $List.GetItems($Query)
                $Ctx.Load($ListItems)
                $Ctx.ExecuteQuery()
 
                #Filter Files
                $Files = $ListItems | Where { $_.FileSystemObjectType -eq "File"}
 
                #Iterate through each file and get data
                $DocumentInventory = @()
                Foreach($Item in $Files)
                {
                    $File = $Item.File
                    $Ctx.Load($File)
                    $Ctx.ExecuteQuery()
                    $LastFolderName = ($File.ServerRelativeURL) -replace "/","\"  
                    $fileLeafRef = $item["FileLeafRef"]
                    $fileExtension = [System.IO.Path]::GetExtension($fileLeafRef)

                    $DocumentData = New-Object PSObject
                    $DocumentData | Add-Member NoteProperty SiteName($Web.Title)
                    $DocumentData | Add-Member NoteProperty SiteURL($SiteURL)
                    $DocumentData | Add-Member NoteProperty DocLibraryName($List.Title)
                    $DocumentData | Add-Member NoteProperty TopLevelFolderName($Folder.Name)
                    $DocumentData | Add-Member NoteProperty LastFolderName($LastFolderName)
                    $DocumentData | Add-Member NoteProperty FileName($File.Name)
                    $DocumentData | Add-Member NoteProperty FileURL($File.ServerRelativeUrl)
                    $DocumentData | Add-Member NoteProperty FileID($Item.ID)                    
                    $DocumentData | Add-Member NoteProperty FileType($fileExtension)  
                    $DocumentData | Add-Member NoteProperty ModifiedBy($Item["Editor"].Email)
                    $DocumentData | Add-Member NoteProperty LastModifiedOn($File.TimeLastModified)     
                    $DocumentData | Add-Member NoteProperty CreatedBy($Item["Author"].Email)
                    $DocumentData | Add-Member NoteProperty CreatedOn($File.TimeCreated)
                    #$DocumentData | Add-Member NoteProperty RetentionLabelName($File.RetentionLabel)                   
                    #$DocumentData | Add-Member NoteProperty SiteOwner($SiteCollURL.Email)
                    $DocumentData | Add-Member NoteProperty GUID($List.ID)                 
                        
                    #Add the result to an Array
                    $DocumentInventory += $DocumentData
                }
                #Export the result to CSV file
                $DocumentInventory | Export-CSV $ReportOutput -UseCulture -NoTypeInformation -Append
                $Query.ListItemCollectionPosition = $ListItems.ListItemCollectionPosition
                #Introduce a delay between batches
                Start-Sleep -Seconds 5  # Adjust the delay as needed
            } While($Query.ListItemCollectionPosition -ne $null)
        }
           
        #Iterate through each subsite of the current web and call the function recursively
        ForEach ($Subweb in $Web.Webs)
        {
            #Call the function recursively to process all subsites underneaththe current web
            Get-SPODocumentInventory($Subweb.url)
        }
    }
    Catch {
        write-host -f Red "Error Generating CSV. Please contact with Administrator" $_.Exception.Message
    }
}
   
#Config Parameters
$SiteCollURL="https://wvbss.sharepoint.com/sites/SayantanMainSite"
$ReportOutput="C:\temp\BubEx1.csv"
$BatchSize = 50
 
#Setup Credentials to connect
$Cred= Get-Credential
$Credentials = New-Object Microsoft.SharePoint.Client.SharePointOnlineCredentials($Cred.Username, $Cred.Password)
   
#Delete the Output Report, if exists
if (Test-Path $ReportOutput) { Remove-Item $ReportOutput }
   
#Call the function
Get-SPODocumentInventory $SiteCollURL
Microsoft 365 and Office | SharePoint | Development
Microsoft 365 and Office | SharePoint | For business | Windows
Windows for business | Windows Server | User experience | PowerShell
0 comments No comments
{count} votes

2 answers

Sort by: Most helpful
  1. Anonymous
    2023-07-06T06:59:24.7966667+00:00

    Hi @Sayantan Ganguly

    A 503 error means that the server was unable to process your request. This could be due to things like high server load or access rate limiting.

    In order to solve this problem, you can consider the following methods:

    1. Increase the delay between each request:
    # add delay in loop
    Start-Sleep -Seconds 5 # Adjust the delay time as needed
    
    1. Reduce the batch size
    $BatchSize = 25 # adjust batch size as needed
    
    1. Batch document library: divide the document library into smaller parts for processing
    # Add conditions in the loop to limit the number of documents processed
    $Files = $ListItems | Where-Object { $_.FileSystemObjectType -eq "File" } | Select-Object -First 1000
    

    Be aware that, depending on the limitations of SharePoint Online, you may experience performance challenges with large document libraries and metadata operations for large numbers of documents.


    If the answer is helpful, please click "Accept Answer" and kindly upvote it. If you have extra questions about this answer, please click "Comment".

    Note: Please follow the steps in our documentation to enable e-mail notifications if you want to receive the related email notification for this thread.

    Best Regards

    Cheng Feng


  2. Anonymous
    2023-07-12T18:45:09.1266667+00:00
    # Import SharePoint Online Management Shell module
    Import-Module -Name Microsoft.Online.SharePoint.PowerShell -DisableNameChecking
    
    # Function to Generate Report on all documents in a SharePoint Online Site Collection
    Function Get-SPODocumentInventory($SiteURL)
    {
        Try {
            # Setup the context
            $Ctx = New-Object Microsoft.SharePoint.Client.ClientContext($SiteURL)
            $Ctx.Credentials = $Credentials
    
            # Get the web from the given URL and its subsites
            $Web = $Ctx.Web
            $Ctx.Load($Web)
            $Ctx.ExecuteQuery()
    
            # Array to Skip System Lists and Libraries
            $SystemLists = @(
                "Converted Forms", "Master Page Gallery", "Customized Reports", "Form Templates", "List Template Gallery",
                "Theme Gallery", "Reporting Templates", "Solution Gallery", "Style Library", "Web Part Gallery",
                "Site Assets", "wfpub", "Site Pages", "Images"
            )
    
            Write-Host -ForegroundColor Yellow "Processing Site: $SiteURL"
    
            # Function to process a document library
            function Process-DocumentLibrary($List)
            {
                if ($List.BaseType -eq "DocumentLibrary" -and !$List.Hidden -and $SystemLists -notcontains $List.Title -and $List.ItemCount -gt 0) {
                    Write-Host -ForegroundColor Cyan "`tProcessing Document Library: $($List.Title)" -NoNewline
                    Write-Host " ($($List.ItemCount) Item(s))"
    
                    $Query = New-Object Microsoft.SharePoint.Client.CamlQuery
                    $Query.ViewXml = "<View Scope='RecursiveAll'>
                                          <Query>
                                             <OrderBy><FieldRef Name='ID' Ascending='TRUE'/></OrderBy>
                                          </Query>
                                          <RowLimit Paged='TRUE'>$BatchSize</RowLimit>
                                      </View>"
    
                    $totalItemCount = $List.ItemCount
                    $processedItemCount = 0
    
                    do {
                        $ListItems = $List.GetItems($Query)
                        $Ctx.Load($ListItems)
                        Retry-ExecuteQuery -Context $Ctx
    
                        $Files = $ListItems | Where-Object { $_.FileSystemObjectType -eq "File" }
    
                        $DocumentInventory = foreach ($Item in $Files) {
                            $Ctx.Load($Item.File)
                            $Ctx.Load($Item.File.Author)
                            $Ctx.Load($Item.File.ModifiedBy)
                            Retry-ExecuteQuery -Context $Ctx
    
                            $File = $Item.File
    
                            [PSCustomObject]@{
                                SiteURL = $SiteURL
                                DocLibraryName = $List.Title
                                FileName = $File.Name
                                FileURL = $File.ServerRelativeUrl
                                CreatedBy = $File.Author.Email
                                CreatedOn = $File.TimeCreated
                                ModifiedBy = $File.ModifiedBy.Email
                                LastModifiedOn = $File.TimeLastModified
                                SizeKB = [math]::Round($File.Length / 1KB, 2)
                            }
                        }
    
                        # Export the result to CSV file
                        $DocumentInventory | Export-Csv -Path $ReportOutput -NoTypeInformation -Append
    
                        $processedItemCount += $Files.Count
    
                        $queryPosition = $ListItems.ListItemCollectionPosition
                        if ($queryPosition -ne $null) {
                            $Query.ListItemCollectionPosition = $queryPosition
                        }
                    } while ($queryPosition -ne $null)
                }
            }
    
            # Filter Document Libraries to Scan
            $Lists = $Web.Lists
            $Ctx.Load($Lists)
            Retry-ExecuteQuery -Context $Ctx
    
            # Loop through each document library
            foreach ($List in $Lists) {
                Process-DocumentLibrary $List
            }
    
            # Function to process subsites recursively
            function Process-SubWebs($Web)
            {
                $subWebs = $Web.Webs
                $Ctx.Load($subWebs)
                Retry-ExecuteQuery -Context $Ctx
    
                foreach ($subWeb in $subWebs) {
                    # Call the function recursively to process all subsites underneath the current web
                    Get-SPODocumentInventory $subWeb.Url
                    Process-SubWebs $subWeb
                }
            }
    
            # Process subsites recursively
            Process-SubWebs $Web
        }
        Catch {
            Write-Host -ForegroundColor Red "Error Generating Document Inventory! $($_.Exception.Message)"
        }
    }
    
    # Retry logic for ExecuteQuery
    function Retry-ExecuteQuery {
        param (
            [Microsoft.SharePoint.Client.ClientContext]$Context,
            [int]$MaxRetries = 3,
            [int]$DelaySeconds = 10
        )
    
        $retryCount = 0
        while ($retryCount -lt $MaxRetries) {
            try {
                $Context.ExecuteQuery()
                return
            }
            catch {
                $retryCount++
                Write-Host -ForegroundColor Red "Retry attempt $($retryCount): $($_.Exception.Message)"
                if ($retryCount -lt $MaxRetries) {
                    $httpException = $_.Exception.InnerException | where { $_.GetType().Name -eq 'Microsoft.SharePoint.Client.ServerException' }
                    if ($httpException -and $httpException.Message.StartsWith('The HTTP response has headers exceeding the limit')) {
                        Write-Host -ForegroundColor Yellow "Throttling limit reached. Retrying after $DelaySeconds seconds..."
                        Start-Sleep -Seconds $DelaySeconds
                    }
                    else {
                        Write-Host -ForegroundColor Yellow "Retrying after $DelaySeconds seconds..."
                        Start-Sleep -Seconds $DelaySeconds
                    }
                }
                else {
                    throw $_.Exception
                }
            }
        }
    }
    
    # Config Parameters
    $SiteCollURL = "https://wvbss.sharepoint.com/sites/SayantanMainSite"
    $ReportOutput = "C:\temp\DocInventory.csv"
    $BatchSize = 500 # Adjust the batch size based on performance
    
    # Setup Credentials to connect
    $Cred = Get-Credential
    $Credentials = New-Object Microsoft.SharePoint.Client.SharePointOnlineCredentials($Cred.UserName, $Cred.Password)
    
    # Delete the Output Report if it exists
    if (Test-Path $ReportOutput) { Remove-Item $ReportOutput }
    
    # Call the function
    Get-SPODocumentInventory $SiteCollURL
    
    0 comments No comments

Your answer

Answers can be marked as Accepted Answers by the question author, which helps users to know the answer solved the author's problem.