共用方式為


如何:查詢目錄樹狀結構中的重複檔案 (LINQ)

有時候同名的檔案會存在於多個資料夾中。 例如,在 Visual Studio 安裝資料夾下,就有幾個資料都有 readme.htm 檔案。 這個範例顯示如何在指定的根資料夾下查詢這類重複檔名。 第二個範例則顯示如何查詢大小和建立時間也相符的檔案。

範例

Module QueryDuplicateFileNames


    Public Sub Main()

        Dim path As String = "C:\Program Files\Microsoft Visual Studio 9.0\Common7"
        QueryDuplicates1(path)
        ' Uncomment to run this query instead 
        ' QueryDuplicates2(path) 

    End Sub 
    Sub QueryDuplicates1(ByVal root As String)
        Dim dir As New System.IO.DirectoryInfo(root)
        Dim duplicates = From aFile In dir.GetFiles("*.*", System.IO.SearchOption.AllDirectories) _
                                 Order By aFile.Name _
                                 Group aFile By aFile.Name Into newGroup = Group _
                                 Where newGroup.Count() >= 2 _
                                 Select newGroup

        ' Page the display so that the results can be read. 
        Dim trimLength = root.Length
        PageOutput(duplicates, trimLength)

    End Sub 
    Sub QueryDuplicates2(ByVal root As String)

        ' This time a composite key is used. This sub finds all files 
        ' that have been copied into multiple subfolders. 
        Dim dir As New System.IO.DirectoryInfo(root)

        Dim duplicates = From aFile In Dir.GetFiles("*.*", System.IO.SearchOption.AllDirectories) _
                                 Order By aFile.Name _
                                 Group aFile By aFile.Name, aFile.CreationTime, aFile.Length Into newGroup = Group _
                                 Where newGroup.Count() >= 2 _
                                 Select newGroup

        ' Page the display so that the results can be read. 
        Dim trimLength = root.Length
        PageOutput(duplicates, trimLength)

    End Sub 
    ' Pages console diplay for large query results. No more than one group per page. 
    ' This sub specifically works with group queries of FileInfo objects 
    ' but can be modified for any type. 
    Sub PageOutput(ByVal groupQuery, ByVal charsToSkip)

        ' "3" = 1 line for extension key + 1 for "Press any key" + 1 for input cursor. 
        Dim numLines As Integer = Console.WindowHeight - 3
        ' Flag to indicate whether there are more results to diplay 
        Dim goAgain As Boolean = True 

        For Each fg As IEnumerable(Of System.IO.FileInfo) In groupQuery
            ' Start a new extension at the top of a page. 
            Dim currentLine As Integer = 0

            Do While (currentLine < fg.Count())
                Console.Clear()

                ' Get the next page of results 
                ' No more than one filename per page 
                Dim resultPage = From file In fg _
                                Skip currentLine Take numLines

                ' Execute the query. Trim the paths in the output. 
                For Each line In resultPage
                    Console.WriteLine(vbTab & line.FullName.Substring(charsToSkip))
                Next 

                ' Advance the current position
                currentLine = numLines + currentLine

                ' Give the user a chance to break out of the loop
                Console.WriteLine("Press any key for next page or the 'End' key to exit.")
                Dim key As ConsoleKey = Console.ReadKey().Key
                If key = ConsoleKey.End Then
                    goAgain = False 
                    Exit For 
                End If 
            Loop 
        Next 
    End Sub 
End Module
class QueryDuplicateFileNames
{
    static void Main(string[] args)
    {
        // Uncomment QueryDuplicates2 to run that query.
        QueryDuplicates();
        // QueryDuplicates2(); 

        // Keep the console window open in debug mode.
        Console.WriteLine("Press any key to exit.");
        Console.ReadKey();
    }

    static void QueryDuplicates()
    {
        // Change the root drive or folder if necessary 
        string startFolder = @"c:\program files\Microsoft Visual Studio 9.0\";

        // Take a snapshot of the file system.
        System.IO.DirectoryInfo dir = new System.IO.DirectoryInfo(startFolder);

        // This method assumes that the application has discovery permissions 
        // for all folders under the specified path.
        IEnumerable<System.IO.FileInfo> fileList = dir.GetFiles("*.*", System.IO.SearchOption.AllDirectories);

        // used in WriteLine to keep the lines shorter 
        int charsToSkip = startFolder.Length;

        // var can be used for convenience with groups. 
        var queryDupNames =
            from file in fileList
            group file.FullName.Substring(charsToSkip) by file.Name into fileGroup
            where fileGroup.Count() > 1
            select fileGroup;

        // Pass the query to a method that will 
        // output one page at a time.
        PageOutput<string, string>(queryDupNames);
    }

    // A Group key that can be passed to a separate method. 
    // Override Equals and GetHashCode to define equality for the key. 
    // Override ToString to provide a friendly name for Key.ToString() 
    class PortableKey
    {
        public string Name { get; set; }
        public DateTime CreationTime { get; set; }
        public long Length { get; set; }

        public override bool Equals(object obj)
        {
            PortableKey other = (PortableKey)obj;
            return other.CreationTime == this.CreationTime &&
                   other.Length == this.Length &&
                   other.Name == this.Name;
        }

        public override int GetHashCode()
        {
            string str = String.Format("{0}{1}{2}", this.CreationTime, this.Length, this.Name);
            return str.GetHashCode();
        }
        public override string ToString()
        {
            return String.Format("{0} {1} {2}", this.Name, this.Length, this.CreationTime);
        }
    }
    static void QueryDuplicates2()
    {
        // Change the root drive or folder if necessary. 
        string startFolder = @"c:\program files\Microsoft Visual Studio 9.0\Common7";

        // Make the the lines shorter for the console display 
        int charsToSkip = startFolder.Length;

        // Take a snapshot of the file system.
        System.IO.DirectoryInfo dir = new System.IO.DirectoryInfo(startFolder);
        IEnumerable<System.IO.FileInfo> fileList = dir.GetFiles("*.*", System.IO.SearchOption.AllDirectories);

        // Note the use of a compound key. Files that match 
        // all three properties belong to the same group. 
        // A named type is used to enable the query to be 
        // passed to another method. Anonymous types can also be used 
        // for composite keys but cannot be passed across method boundaries 
        //  
        var queryDupFiles =
            from file in fileList
            group file.FullName.Substring(charsToSkip) by 
                new PortableKey { Name = file.Name, CreationTime = file.CreationTime, Length = file.Length } into fileGroup
            where fileGroup.Count() > 1
            select fileGroup;

        var list = queryDupFiles.ToList();

        int i = queryDupFiles.Count();

        PageOutput<PortableKey, string>(queryDupFiles);
    }


    // A generic method to page the output of the QueryDuplications methods 
    // Here the type of the group must be specified explicitly. "var" cannot
    // be used in method signatures. This method does not display more than one 
    // group per page. 
    private static void PageOutput<K, V>(IEnumerable<System.Linq.IGrouping<K, V>> groupByExtList)
    {
        // Flag to break out of paging loop. 
        bool goAgain = true;

        // "3" = 1 line for extension + 1 for "Press any key" + 1 for input cursor.
        int numLines = Console.WindowHeight - 3;

        // Iterate through the outer collection of groups. 
        foreach (var filegroup in groupByExtList)
        {
            // Start a new extension at the top of a page. 
            int currentLine = 0;

            // Output only as many lines of the current group as will fit in the window. 
            do
            {
                Console.Clear();
                Console.WriteLine("Filename = {0}", filegroup.Key.ToString() == String.Empty ? "[none]" : filegroup.Key.ToString());

                // Get 'numLines' number of items starting at number 'currentLine'. 
                var resultPage = filegroup.Skip(currentLine).Take(numLines);

                //Execute the resultPage query 
                foreach (var fileName in resultPage)
                {
                    Console.WriteLine("\t{0}", fileName);
                }

                // Increment the line counter.
                currentLine += numLines;

                // Give the user a chance to escape.
                Console.WriteLine("Press any key to continue or the 'End' key to break...");
                ConsoleKey key = Console.ReadKey().Key;
                if (key == ConsoleKey.End)
                {
                    goAgain = false;
                    break;
                }
            } while (currentLine < filegroup.Count());

            if (goAgain == false)
                break;
        }
    }
}

第一個查詢會使用簡單的索引鍵來判斷是否相符,這樣會尋找所有名稱相同,但是內容可能不同的檔案。 第二個查詢則會使用複合索引鍵來比對 FileInfo 物件的三個屬性。 這個查詢比較可能找到同名而且內容類似或相同的檔案。

編譯程式碼

  • 建立以 .NET Framework 3.5 版為目標的 Visual Studio 專案。 專案預設會含 System.Core.dll 的參考,以及 System.Linq 命名空間 (Namespace) 的 using 指示詞 (C#) 或 Imported 命名空間 (Visual Basic)。 請在 C# 專案中,加入 System.IO 命名空間的 using 指示詞。

  • 請將這段程式碼複製到您的專案,

  • 按 F5 編譯和執行程式。

  • 按任何鍵離開主控台視窗。

穩固程式設計

如需對多種類型的文件和檔案內容執行大量查詢作業,可考慮使用 Windows 桌面搜尋引擎。

請參閱

概念

LINQ to Objects

LINQ 和檔案目錄