如何:查询目录树中的一个或多个最大的文件 (LINQ)

更新:2007 年 11 月

此示例演示与文件大小(以字节为单位)相关的五种查询:

  • 如何检索最大文件的大小(以字节为单位)。

  • 如何检索最小文件的大小(以字节为单位)。

  • 如何从指定的根文件夹下的一个或多个文件夹检索 FileInfo 对象最大或最小文件。

  • 如何检索一个序列,如 10 个最大文件。

  • 如何按文件大小(以字节为单位)将文件分组,并忽略小于指定大小的文件。

示例

下面的示例包含五种不同的查询,这些查询演示如何根据文件大小(以字节为单位)查询和分组文件。可以轻松地修改这些示例,以使查询基于 FileInfo 对象的某个其他属性。

Module QueryBySize
    Sub Main()

        ' Change the drive\path if necessary
        Dim root As String = "C:\Program Files\Microsoft Visual Studio 9.0"

        Dim fileList = GetFiles(root)

        ' Return the size of the largest file
        Dim maxSize = Aggregate aFile In fileList Into Max(GetFileLength(aFile))

        'Dim maxSize = fileLengths.Max
        Console.WriteLine("The length of the largest file under {0} is {1}", _
                          root, maxSize)

        ' Return the FileInfo object of the largest file
        ' by sorting and selecting from the beginning of the list
        Dim filesByLengDesc = From file In fileList _
                              Let filelength = GetFileLength(file) _
                              Where filelength > 0 _
                              Order By filelength Descending _
                              Select file

        Dim longestFile = filesByLengDesc.First

        Console.WriteLine("The largest file under {0} is {1} with a length of {2} bytes", _
                          root, longestFile.FullName, longestFile.Length)

        Dim smallestFile = filesByLengDesc.Last

        Console.WriteLine("The smallest file under {0} is {1} with a length of {2} bytes", _
                                root, smallestFile.FullName, smallestFile.Length)

        ' Return the FileInfos for the 10 largest files
        ' Based on a previous query, but nothing is executed
        ' until the For Each statement below.
        Dim tenLargest = From file In filesByLengDesc Take 10

        Console.WriteLine("The 10 largest files under {0} are:", root)

        For Each fi As System.IO.FileInfo In tenLargest
            Console.WriteLine("{0}: {1} bytes", fi.FullName, fi.Length)
        Next

        ' Group files according to their size,
        ' leaving out the ones under 200K
        Dim sizeGroups = From file As System.IO.FileInfo In GetFiles(root) _
                         Where file.Length > 0 _
                         Let groupLength = file.Length / 100000 _
                         Group file By groupLength Into fileGroup = Group _
                         Where groupLength >= 2 _
                         Order By groupLength Descending

        For Each group In sizeGroups
            Console.WriteLine(group.groupLength + "00000")

            For Each item As System.IO.FileInfo In group.fileGroup
                Console.WriteLine("   {0}: {1}", item.Name, item.Length)
            Next
        Next

        ' Keep the console window open in debug mode
        Console.WriteLine("Press any key to exit.")
        Console.ReadKey()

    End Sub

    ' This method is used to catch the possible exception
    ' that can be raised when accessing the FileInfo.Length property.
    ' In this particular case, it is safe to ignore the exception.
    Function GetFileLength(ByVal fi As System.IO.FileInfo) As Long
        Dim retval As Long
        Try
            retval = fi.Length
        Catch ex As FileNotFoundException
            ' If a file is no longer present,
            ' just return zero bytes. 
            retval = 0
        End Try

        Return retval
    End Function

    ' Function to retrieve a list of files. Note that this is a copy
    ' of the file information.
    Function GetFiles(ByVal root As String) As System.Collections.Generic.IEnumerable(Of System.IO.FileInfo)
        Return From file In My.Computer.FileSystem.GetFiles _
                  (root, FileIO.SearchOption.SearchAllSubDirectories, "*.*") _
               Select New System.IO.FileInfo(file)
    End Function
End Module
class QueryBySize
{
    static void Main(string[] args)
    {
        QueryFilesBySize();
        Console.WriteLine("Press any key to exit");
        Console.ReadKey();
    }

    private static void QueryFilesBySize()
    {
        string startFolder = @"c:\program files\Microsoft Visual Studio 9.0\";

        // Take a snapshot of the file system. 
        // fileList is an IEnumerable<System.IO.FileInfo>
        var fileList = GetFiles(startFolder);

        //Return the size of the largest file
        long maxSize =
            (from file in fileList
             let len = GetFileLength(file)
             select len)
             .Max();

        Console.WriteLine("The length of the largest file under {0} is {1}",
            startFolder, maxSize);

        // Return the FileInfo object for the largest file
        // by sorting and selecting from beginning of list
        System.IO.FileInfo longestFile = 
            (from file in fileList
            let len = GetFileLength(file)
            where len > 0
            orderby len descending
            select file)
            .First();

        Console.WriteLine("The largest file under {0} is {1} with a length of {2} bytes",
                            startFolder, longestFile.FullName, longestFile.Length);

        //Return the FileInfo of the smallest file
        System.IO.FileInfo smallestFile = 
            (from file in fileList
            let len = GetFileLength(file)
            where len > 0
            orderby len ascending
            select file).First();

        Console.WriteLine("The smallest file under {0} is {1} with a length of {2} bytes",
                            startFolder, smallestFile.FullName, smallestFile.Length);

        //Return the FileInfos for the 10 largest files
        // queryTenLargest is an IEnumerable<System.IO.FileInfo>
        var queryTenLargest = 
            (from file in fileList
            let len = GetFileLength(file)
            orderby len descending
            select file).Take(10);

        Console.WriteLine("The 10 largest files under {0} are:", startFolder);

        foreach (var v in queryTenLargest)
        {
            Console.WriteLine("{0}: {1} bytes", v.FullName, v.Length);
        }


        // Group the files according to their size, leaving out
        // files that are less than 200000 bytes. 
        var querySizeGroups = 
            from file in fileList
            let len = GetFileLength(file)
            where len > 0
            group file by (len / 100000) into fileGroup
            where fileGroup.Key >= 2
            orderby fileGroup.Key descending
            select fileGroup;


        foreach (var filegroup in querySizeGroups)
        {
            Console.WriteLine(filegroup.Key.ToString() + "00000");
            foreach (var item in filegroup)
            {
                Console.WriteLine("\t{0}: {1}", item.Name, item.Length);
            }
        }

        // Keep the console window open in debug mode.
        Console.WriteLine("Press any key to exit.");
        Console.ReadKey();
    }

    // This method is used to swallow the possible exception
    // that can be raised when accessing the FileInfo.Length property.
    // In this particular case, it is safe to swallow the exception.
    static long GetFileLength(System.IO.FileInfo fi)
    {
        long retval;
        try
        {
            retval = fi.Length;
        }
        catch (System.IO.FileNotFoundException)
        {
            // If a file is no longer present,
            // just add zero bytes to the total.
            retval = 0;
        }
        return retval;
    }

    // This method assumes that the application has discovery 
    // permissions for all folders under the specified path.
    static IEnumerable<System.IO.FileInfo> GetFiles(string path)
    {
        if (!System.IO.Directory.Exists(path))
            throw new System.IO.DirectoryNotFoundException();

        string[] fileNames = null;
        List<System.IO.FileInfo> files = new List<System.IO.FileInfo>();

        fileNames = System.IO.Directory.GetFiles(path, "*.*", System.IO.SearchOption.AllDirectories);
        foreach (string name in fileNames)
        {
            files.Add(new System.IO.FileInfo(name));
        }
        return files;
    }
}

若要返回一个或多个完整的 FileInfo 对象,查询必须首先检查数据源中的每个对象,然后按这些对象的 Length 属性的值排序它们。然后查询可以返回具有最大长度的单个对象或序列。使用 First 可返回列表中的第一个元素。使用 Take<TSource> 可返回前 n 个元素。指定降序排序顺序可将最小的元素放在列表的开头。

查询调用单独的方法获取文件大小(以字节为单位),以便消除在以下情况下可能引发的异常:自调用 GetFiles 创建 FileInfo 对象起的时间段内在另一个线程上删除了文件。虽然已创建了 FileInfo 对象,但仍会发生异常,因为 FileInfo 对象将尝试使用第一次访问其 Length 属性时的最新大小(以字节为单位)刷新此属性。通过将此操作放在查询外的 try-catch 块中,我们遵循了避免在查询中执行可能引起副作用的操作的规则。通常,在抑制异常时必须格外谨慎,确保没有将应用程序置于未知状态。

编译代码

  • 创建一个面向 .NET Framework 3.5 版的 Visual Studio 项目。默认情况下,该项目具有对 System.Core.dll 的引用以及针对 System.Linq 命名空间的 using 指令 (C#) 或 Imports 语句 (Visual Basic)。

  • 将此代码复制到您的项目。

  • 按 F5 编译并运行程序。

  • 按任意键退出控制台窗口。

可靠编程

若要对多种类型的文档和文件的内容执行大量查询操作,请考虑使用 Windows Desktop Search(Windows 桌面搜索)引擎。

请参见

概念

LINQ to Objects

LINQ 和文件目录