方法 : ディレクトリ ツリーで重複するファイルを問い合わせる (LINQ)
更新 : 2007 年 11 月
同じ名前のファイルが複数のフォルダに存在することがあります。たとえば、Visual Studio インストール フォルダでは、いくつかのフォルダに readme.htm ファイルが含まれています。次の例では、指定したルート フォルダの下で、このような重複したファイル名を問い合わせる方法を示します。2 番目の例では、サイズと作成時刻も一致するファイルを問い合わせる方法を示します。
使用例
Module QueryDuplicateFileNames
Public Sub Main()
Dim path As String = "C:\Program Files\Microsoft Visual Studio 9.0\Common7"
'QueryDuplicates1(path)
' Uncomment to run this query instead
QueryDuplicates2(path)
End Sub
Sub QueryDuplicates1(ByVal root As String)
Dim duplicates = From aFile In GetFiles(root) _
Order By aFile.Name _
Group aFile By aFile.Name Into newGroup = Group _
Where newGroup.Count() >= 2 _
Select newGroup
' Page the display so that the results can be read.
Dim trimLength = root.Length
PageOutput(duplicates, trimLength)
End Sub
Sub QueryDuplicates2(ByVal root As String)
' This time a composite key is used. This sub finds all files
' that have been copied into multiple subfolders.
Dim duplicates = From aFile In GetFiles(root) _
Order By aFile.Name _
Group aFile By aFile.Name, aFile.CreationTime, aFile.Length Into newGroup = Group _
Where newGroup.Count() >= 2 _
Select newGroup
' Page the display so that the results can be read.
Dim trimLength = root.Length
PageOutput(duplicates, trimLength)
End Sub
' Pages console diplay for large query results. No more than one group per page.
' This sub specifically works with group queries of FileInfo objects
' but can be modified for any type.
Sub PageOutput(ByVal groupQuery, ByVal charsToSkip)
' "3" = 1 line for extension key + 1 for "Press any key" + 1 for input cursor.
Dim numLines As Integer = Console.WindowHeight - 3
' Flag to indicate whether there are more results to diplay
Dim goAgain As Boolean = True
For Each fg As IEnumerable(Of System.IO.FileInfo) In groupQuery
' Start a new extension at the top of a page.
Dim currentLine As Integer = 0
Do While (currentLine < fg.Count())
Console.Clear()
' Get the next page of results
' No more than one filename per page
Dim resultPage = From file In fg _
Skip currentLine Take numLines
' Execute the query. Trim the paths in the output.
For Each line In resultPage
Console.WriteLine(vbTab & line.FullName.Substring(charsToSkip))
Next
' Advance the current position
currentLine = numLines + currentLine
' Give the user a chance to break out of the loop
Console.WriteLine("Press any key for next page or the 'End' key to exit.")
Dim key As ConsoleKey = Console.ReadKey().Key
If key = ConsoleKey.End Then
goAgain = False
Exit For
End If
Loop
Next
End Sub
' Function to retrieve a list of files. Note that this is a copy
' of the file information.
Function GetFiles(ByVal root As String) As System.Collections.Generic.IEnumerable(Of System.IO.FileInfo)
Return From file In My.Computer.FileSystem.GetFiles _
(root, FileIO.SearchOption.SearchAllSubDirectories, "*.*") _
Select New System.IO.FileInfo(file)
End Function
End Module
class QueryDuplicateFileNames
{
static void Main(string[] args)
{
// Uncomment QueryDuplicates2 to run that query.
QueryDuplicates();
// QueryDuplicates2();
// Keep the console window open in debug mode.
Console.WriteLine("Press any key to exit.");
Console.ReadKey();
}
static void QueryDuplicates()
{
// Change the root drive or folder if necessary
string startFolder = @"c:\program files\Microsoft Visual Studio 9.0\";
// Take a snapshot of the file system.
IEnumerable<System.IO.FileInfo> fileList = GetFiles(startFolder);
// used in WriteLine to keep the lines shorter
int charsToSkip = startFolder.Length;
// var can be used for convenience with groups.
var queryDupNames =
from file in fileList
group file.FullName.Substring(charsToSkip) by file.Name into fileGroup
where fileGroup.Count() > 1
select fileGroup;
// Pass the query to a method that will
// output one page at a time.
PageOutput<string,string>(queryDupNames);
}
// A Group key that can be passed to a separate method.
// Override Equals and GetHashCode to define equality for the key.
// Override ToString to provide a friendly name for Key.ToString()
class PortableKey
{
public string Name { get; set; }
public DateTime CreationTime { get; set; }
public long Length {get; set;}
public override bool Equals(object obj)
{
PortableKey other = (PortableKey)obj;
return other.CreationTime == this.CreationTime &&
other.Length == this.Length &&
other.Name == this.Name;
}
public override int GetHashCode()
{
string str = String.Format("{0}{1}{2}", this.CreationTime, this.Length, this.Name);
return str.GetHashCode();
}
public override string ToString()
{
return String.Format("{0} {1} {2}", this.Name, this.Length, this.CreationTime);
}
}
static void QueryDuplicates2()
{
// Change the root drive or folder if necessary.
string startFolder = @"c:\program files\Microsoft Visual Studio 9.0\Common7";
// Make the the lines shorter for the console display
int charsToSkip = startFolder.Length;
// Take a snapshot of the file system.
IEnumerable<System.IO.FileInfo> fileList = GetFiles(startFolder);
// Note the use of a compound key. Files that match
// all three properties belong to the same group.
// A named type is used to enable the query to be
// passed to another method. Anonymous types can also be used
// for composite keys but cannot be passed across method boundaries
//
var queryDupFiles =
from file in fileList
group file.FullName.Substring(charsToSkip) by
new PortableKey{ Name=file.Name, CreationTime=file.CreationTime, Length=file.Length } into fileGroup
where fileGroup.Count() > 1
select fileGroup;
var list = queryDupFiles.ToList();
int i = queryDupFiles.Count();
PageOutput<PortableKey, string>(queryDupFiles);
}
// A generic method to page the output of the QueryDuplications methods
// Here the type of the group must be specified explicitly. "var" cannot
// be used in method signatures. This method does not display more than one
// group per page.
private static void PageOutput<K,V>(IEnumerable<System.Linq.IGrouping<K, V>> groupByExtList)
{
// Flag to break out of paging loop.
bool goAgain = true;
// "3" = 1 line for extension + 1 for "Press any key" + 1 for input cursor.
int numLines = Console.WindowHeight - 3;
// Iterate through the outer collection of groups.
foreach (var filegroup in groupByExtList)
{
// Start a new extension at the top of a page.
int currentLine = 0;
// Output only as many lines of the current group as will fit in the window.
do
{
Console.Clear();
Console.WriteLine("Filename = {0}", filegroup.Key.ToString() == String.Empty ? "[none]" : filegroup.Key.ToString());
// Get 'numLines' number of items starting at number 'currentLine'.
var resultPage = filegroup.Skip(currentLine).Take(numLines);
//Execute the resultPage query
foreach (var fileName in resultPage)
{
Console.WriteLine("\t{0}", fileName);
}
// Increment the line counter.
currentLine += numLines;
// Give the user a chance to escape.
Console.WriteLine("Press any key to continue or the 'End' key to break...");
ConsoleKey key = Console.ReadKey().Key;
if (key == ConsoleKey.End)
{
goAgain = false;
break;
}
} while (currentLine < filegroup.Count());
if (goAgain == false)
break;
}
}
// This method assumes that the application has discovery
// permissions for all folders under the specified path.
static IEnumerable<System.IO.FileInfo> GetFiles(string path)
{
if (!System.IO.Directory.Exists(path))
throw new System.IO.DirectoryNotFoundException();
string[] fileNames = null;
List<System.IO.FileInfo> files = new List<System.IO.FileInfo>();
fileNames = System.IO.Directory.GetFiles(path, "*.*", System.IO.SearchOption.AllDirectories);
foreach (string name in fileNames)
{
files.Add(new System.IO.FileInfo(name));
}
return files;
}
}
最初のクエリでは、単純なキーを使用して一致を判断します。これにより、同じ名前を持つファイルが検索されますが、各ファイルの内容が一致するとは限りません。2 番目のクエリでは、複合キーを使用して、FileInfo オブジェクトの 3 つのプロパティと照合します。このクエリでは、同じ名前を持ち、内容も類似または一致するファイルが検索される可能性が高くなります。
コードのコンパイル方法
.NET Framework Version 3.5 を対象とする Visual Studio プロジェクトを作成します。プロジェクトには、System.Core.dll への参照と、System.Linq 名前空間に対する using ディレクティブ (C#) または Imports ステートメント (Visual Basic) が既定で含まれます。C# プロジェクトでは、System.IO 名前空間に対する using ディレクティブを追加します。
このコードをプロジェクト内にコピーします。
F5 キーを押して、プログラムをコンパイルおよび実行します。
任意のキーを押して、コンソール ウィンドウを終了します。
堅牢性の高いプログラム
複数の種類のドキュメントやファイルの内容を対象に、集中的にクエリ操作を実行する場合は、Windows デスクトップ サーチ エンジンを使用することを検討してください。