如何:将 LINQ 查询与正则表达式合并在一起
此示例演示如何使用 Regex 类创建正则表达式以便在文本字符串中进行更复杂的匹配。 使用 LINQ 查询可以方便地对您要用正则表达式搜索的文件进行准确筛选,以及对结果进行加工。
示例
Class LinqRegExVB
Shared Sub Main()
' Root folder to query, along with all subfolders.
' Modify this path as necessary so that it accesses your Visual Studio folder.
Dim startFolder As String = "C:\program files\Microsoft Visual Studio 9.0\"
' One of the following paths may be more appropriate on your computer.
'string startFolder = @"c:\program files (x86)\Microsoft Visual Studio 9.0\";
'string startFolder = @"c:\program files\Microsoft Visual Studio 10.0\";
'string startFolder = @"c:\program files (x86)\Microsoft Visual Studio 10.0\";
' Take a snapshot of the file system.
Dim fileList As IEnumerable(Of System.IO.FileInfo) = GetFiles(startFolder)
' Create a regular expression to find all things "Visual".
Dim searchTerm As System.Text.RegularExpressions.Regex =
New System.Text.RegularExpressions.Regex("Visual (Basic|C#|C\+\+|J#|SourceSafe|Studio)")
' Search the contents of each .htm file.
' Remove the where clause to find even more matches!
' This query produces a list of files where a match
' was found, and a list of the matches in that file.
' Note: Explicit typing of "Match" in select clause.
' This is required because MatchCollection is not a
' generic IEnumerable collection.
Dim queryMatchingFiles = From afile In fileList
Where afile.Extension = ".htm"
Let fileText = System.IO.File.ReadAllText(afile.FullName)
Let matches = searchTerm.Matches(fileText)
Where (matches.Count > 0)
Select Name = afile.FullName,
Matches = From match As System.Text.RegularExpressions.Match In matches
Select match.Value
' Execute the query.
Console.WriteLine("The term " & searchTerm.ToString() & " was found in:")
For Each fileMatches In queryMatchingFiles
' Trim the path a bit, then write
' the file name in which a match was found.
Dim s = fileMatches.Name.Substring(startFolder.Length - 1)
Console.WriteLine(s)
' For this file, write out all the matching strings
For Each match In fileMatches.Matches
Console.WriteLine(" " + match)
Next
Next
' Keep the console window open in debug mode
Console.WriteLine("Press any key to exit")
Console.ReadKey()
End Sub
' Function to retrieve a list of files. Note that this is a copy
' of the file information.
Shared Function GetFiles(ByVal root As String) As IEnumerable(Of System.IO.FileInfo)
Return From file In My.Computer.FileSystem.GetFiles(
root, FileIO.SearchOption.SearchAllSubDirectories, "*.*")
Select New System.IO.FileInfo(file)
End Function
End Class
class QueryWithRegEx
{
public static void Main()
{
// Modify this path as necessary so that it accesses your version of Visual Studio.
string startFolder = @"c:\program files\Microsoft Visual Studio 9.0\";
// One of the following paths may be more appropriate on your computer.
//string startFolder = @"c:\program files (x86)\Microsoft Visual Studio 9.0\";
//string startFolder = @"c:\program files\Microsoft Visual Studio 10.0\";
//string startFolder = @"c:\program files (x86)\Microsoft Visual Studio 10.0\";
// Take a snapshot of the file system.
IEnumerable<System.IO.FileInfo> fileList = GetFiles(startFolder);
// Create the regular expression to find all things "Visual".
System.Text.RegularExpressions.Regex searchTerm =
new System.Text.RegularExpressions.Regex(@"Visual (Basic|C#|C\+\+|J#|SourceSafe|Studio)");
// Search the contents of each .htm file.
// Remove the where clause to find even more matchedValues!
// This query produces a list of files where a match
// was found, and a list of the matchedValues in that file.
// Note: Explicit typing of "Match" in select clause.
// This is required because MatchCollection is not a
// generic IEnumerable collection.
var queryMatchingFiles =
from file in fileList
where file.Extension == ".htm"
let fileText = System.IO.File.ReadAllText(file.FullName)
let matches = searchTerm.Matches(fileText)
where matches.Count > 0
select new
{
name = file.FullName,
matchedValues = from System.Text.RegularExpressions.Match match in matches
select match.Value
};
// Execute the query.
Console.WriteLine("The term \"{0}\" was found in:", searchTerm.ToString());
foreach (var v in queryMatchingFiles)
{
// Trim the path a bit, then write
// the file name in which a match was found.
string s = v.name.Substring(startFolder.Length - 1);
Console.WriteLine(s);
// For this file, write out all the matching strings
foreach (var v2 in v.matchedValues)
{
Console.WriteLine(" " + v2);
}
}
// Keep the console window open in debug mode
Console.WriteLine("Press any key to exit");
Console.ReadKey();
}
// This method assumes that the application has discovery
// permissions for all folders under the specified path.
static IEnumerable<System.IO.FileInfo> GetFiles(string path)
{
if (!System.IO.Directory.Exists(path))
throw new System.IO.DirectoryNotFoundException();
string[] fileNames = null;
List<System.IO.FileInfo> files = new List<System.IO.FileInfo>();
fileNames = System.IO.Directory.GetFiles(path, "*.*", System.IO.SearchOption.AllDirectories);
foreach (string name in fileNames)
{
files.Add(new System.IO.FileInfo(name));
}
return files;
}
}
请注意,您还可以查询由 RegEx 搜索返回的 MatchCollection 对象。 在此示例中,结果中仅生成每个匹配项的值。 但也可使用 LINQ 对该集合执行各种筛选、排序和分组操作。 由于 MatchCollection 是非泛型 IEnumerable 集合,因此必须显式声明查询中的范围变量的类型。
编译代码
创建一个面向 .NET Framework 3.5 版的 Visual Studio 项目。 默认情况下,该项目具有对 System.Core.dll 的引用以及针对 System.Linq 命名空间的 using 指令 (C#) 或 Imports 语句 (Visual Basic)。 在 C# 项目中,添加 System.IO 命名空间的 using 指令。
将此代码复制到您的项目。
按 F5 编译并运行程序。
按任意键退出控制台窗口。