StringInfo 類別
定義
重要
部分資訊涉及發行前產品,在發行之前可能會有大幅修改。 Microsoft 對此處提供的資訊,不做任何明確或隱含的瑕疵擔保。
提供將字串分隔為文字項目並逐一查看那些文字項目的功能。
public ref class StringInfo
public class StringInfo
[System.Serializable]
public class StringInfo
[System.Serializable]
[System.Runtime.InteropServices.ComVisible(true)]
public class StringInfo
type StringInfo = class
[<System.Serializable>]
type StringInfo = class
[<System.Serializable>]
[<System.Runtime.InteropServices.ComVisible(true)>]
type StringInfo = class
Public Class StringInfo
- 繼承
-
StringInfo
- 屬性
範例
這個範例會使用 GetTextElementEnumerator 類別的 StringInfo 和 ParseCombiningCharacters 方法來操作包含 Surrogate 和合併字元的字串。
using namespace System;
using namespace System::Text;
using namespace System::Globalization;
// Show how to enumerate each real character (honoring surrogates)
// in a string.
void EnumTextElements(String^ combiningChars)
{
// This StringBuilder holds the output results.
StringBuilder^ sb = gcnew StringBuilder();
// Use the enumerator returned from GetTextElementEnumerator
// method to examine each real character.
TextElementEnumerator^ charEnum =
StringInfo::GetTextElementEnumerator(combiningChars);
while (charEnum->MoveNext())
{
sb->AppendFormat("Character at index {0} is '{1}'{2}",
charEnum->ElementIndex, charEnum->GetTextElement(),
Environment::NewLine);
}
// Show the results.
Console::WriteLine("Result of GetTextElementEnumerator:");
Console::WriteLine(sb);
}
// Show how to discover the index of each real character
// (honoring surrogates) in a string.
void EnumTextElementIndexes(String^ combiningChars)
{
// This StringBuilder holds the output results.
StringBuilder^ sb = gcnew StringBuilder();
// Use the ParseCombiningCharacters method to
// get the index of each real character in the string.
array <int>^ textElemIndex =
StringInfo::ParseCombiningCharacters(combiningChars);
// Iterate through each real character showing the character
// and the index where it was found.
for (int i = 0; i < textElemIndex->Length; i++)
{
sb->AppendFormat("Character {0} starts at index {1}{2}",
i, textElemIndex[i], Environment::NewLine);
}
// Show the results.
Console::WriteLine("Result of ParseCombiningCharacters:");
Console::WriteLine(sb);
}
int main()
{
// The string below contains combining characters.
String^ combiningChars = L"a\u0304\u0308bc\u0327";
// Show each 'character' in the string.
EnumTextElements(combiningChars);
// Show the index in the string where each 'character' starts.
EnumTextElementIndexes(combiningChars);
};
// This code produces the following output.
//
// Result of GetTextElementEnumerator:
// Character at index 0 is 'a-"'
// Character at index 3 is 'b'
// Character at index 4 is 'c,'
//
// Result of ParseCombiningCharacters:
// Character 0 starts at index 0
// Character 1 starts at index 3
// Character 2 starts at index 4
using System;
using System.Text;
using System.Globalization;
public sealed class App {
static void Main() {
// The string below contains combining characters.
String s = "a\u0304\u0308bc\u0327";
// Show each 'character' in the string.
EnumTextElements(s);
// Show the index in the string where each 'character' starts.
EnumTextElementIndexes(s);
}
// Show how to enumerate each real character (honoring surrogates) in a string.
static void EnumTextElements(String s) {
// This StringBuilder holds the output results.
StringBuilder sb = new StringBuilder();
// Use the enumerator returned from GetTextElementEnumerator
// method to examine each real character.
TextElementEnumerator charEnum = StringInfo.GetTextElementEnumerator(s);
while (charEnum.MoveNext()) {
sb.AppendFormat(
"Character at index {0} is '{1}'{2}",
charEnum.ElementIndex, charEnum.GetTextElement(),
Environment.NewLine);
}
// Show the results.
Console.WriteLine("Result of GetTextElementEnumerator:");
Console.WriteLine(sb);
}
// Show how to discover the index of each real character (honoring surrogates) in a string.
static void EnumTextElementIndexes(String s) {
// This StringBuilder holds the output results.
StringBuilder sb = new StringBuilder();
// Use the ParseCombiningCharacters method to
// get the index of each real character in the string.
Int32[] textElemIndex = StringInfo.ParseCombiningCharacters(s);
// Iterate through each real character showing the character and the index where it was found.
for (Int32 i = 0; i < textElemIndex.Length; i++) {
sb.AppendFormat(
"Character {0} starts at index {1}{2}",
i, textElemIndex[i], Environment.NewLine);
}
// Show the results.
Console.WriteLine("Result of ParseCombiningCharacters:");
Console.WriteLine(sb);
}
}
// This code produces the following output:
//
// Result of GetTextElementEnumerator:
// Character at index 0 is 'ā̈'
// Character at index 3 is 'b'
// Character at index 4 is 'ç'
//
// Result of ParseCombiningCharacters:
// Character 0 starts at index 0
// Character 1 starts at index 3
// Character 2 starts at index 4
Imports System.Text
Imports System.Globalization
Public Module Example
Public Sub Main()
' The string below contains combining characters.
Dim s As String = "a" + ChrW(&h0304) + ChrW(&h0308) + "bc" + ChrW(&h0327)
' Show each 'character' in the string.
EnumTextElements(s)
' Show the index in the string where each 'character' starts.
EnumTextElementIndexes(s)
End Sub
' Show how to enumerate each real character (honoring surrogates) in a string.
Sub EnumTextElements(s As String)
' This StringBuilder holds the output results.
Dim sb As New StringBuilder()
' Use the enumerator returned from GetTextElementEnumerator
' method to examine each real character.
Dim charEnum As TextElementEnumerator = StringInfo.GetTextElementEnumerator(s)
Do While charEnum.MoveNext()
sb.AppendFormat("Character at index {0} is '{1}'{2}",
charEnum.ElementIndex,
charEnum.GetTextElement(),
Environment.NewLine)
Loop
' Show the results.
Console.WriteLine("Result of GetTextElementEnumerator:")
Console.WriteLine(sb)
End Sub
' Show how to discover the index of each real character (honoring surrogates) in a string.
Sub EnumTextElementIndexes(s As String)
' This StringBuilder holds the output results.
Dim sb As New StringBuilder()
' Use the ParseCombiningCharacters method to
' get the index of each real character in the string.
Dim textElemIndex() As Integer = StringInfo.ParseCombiningCharacters(s)
' Iterate through each real character showing the character and the index where it was found.
For i As Int32 = 0 To textElemIndex.Length - 1
sb.AppendFormat("Character {0} starts at index {1}{2}",
i, textElemIndex(i), Environment.NewLine)
Next
' Show the results.
Console.WriteLine("Result of ParseCombiningCharacters:")
Console.WriteLine(sb)
End Sub
End Module
' The example displays the following output:
'
' Result of GetTextElementEnumerator:
' Character at index 0 is 'ā̈'
' Character at index 3 is 'b'
' Character at index 4 is 'ç'
'
' Result of ParseCombiningCharacters:
' Character 0 starts at index 0
' Character 1 starts at index 3
' Character 2 starts at index 4
備註
.NET 會將文字元素定義為顯示為單一字元的文字單位,也就是 grapheme。 文字專案可以是基底字元、Surrogate 字組或合併字元序列。 Unicode 標準會將 Surrogate 字組定義為單一抽象字元的自動程式代碼字元表示法,此字元是由兩個程式碼單位的序列所組成,其中配對的第一個單位是高 Surrogate,而第二個則是低 Surrogate。 Unicode 標準會將結合字元序列定義為基底字元和一或多個結合字元的組合。 Surrogate 字組可以代表基底字元或結合字元。
類別 StringInfo 可讓您使用字串做為一系列文字元素,而不是個別 Char 物件。
若要具現化 StringInfo 代表指定字串的 物件,您可以執行下列其中一項動作:
呼叫建 StringInfo(String) 構函式,並傳遞物件要表示為自變數的 StringInfo 字串。
呼叫預設StringInfo()建構函式,並將物件要表示的String字串StringInfo指派給 屬性。
您可以透過兩種方式,在字串中使用個別的文字元素:
藉由列舉每個文字專案。 若要這樣做,您可以呼叫 GetTextElementEnumerator 方法,然後在傳TextElementEnumerator回的對象上重複呼叫 MoveNext 方法,直到方法傳
false
回 為止。呼叫 ParseCombiningCharacters 方法來擷取陣列,其中包含每個文字專案的起始索引。 接著,您可以將這些索引傳遞至 SubstringByTextElements 方法,以擷取個別的文字元素。
下列範例說明在字串中使用文字元素的兩種方式。 它會建立兩個字串:
strCombining
,這是包含三個具有多個 Char 物件的文字元素的阿拉伯字元字串。 第一個文字元素是基底字元 ARABIC LETTER ALEF (U+0627) ,後面接著下方的阿拉伯文 HAMZA (U+0655) 和 ARABIC KASRA (U+0650) 。 第二個文字元素是阿拉伯文字母 HEH (U+0647) ,後面接著阿拉伯文 FATHA (U+064E) 。 第三個文字元素是阿拉伯文字母 BEH (U+0628) ,後面接著阿拉伯文 DAMMATAN (U+064C) 。strSurrogates
,這是包含三個代理字組的字串:希臘文 ACROPHONIC FIVE TALENTS (U+10148) 來自增補多語系平面的 U+20026,以及私人用戶區域的 U+F1001。 每個字元的 UTF-16 編碼方式是 Surrogate 字組,由高 Surrogate 後面接著低 Surrogate 所組成。
每個字串都會由 ParseCombiningCharacters 方法剖析一次,然後再由 GetTextElementEnumerator 方法剖析。 這兩種方法都正確地剖析兩個字串中的文字元素,並顯示剖析作業的結果。
using System;
using System.Globalization;
public class Example
{
public static void Main()
{
// The Unicode code points specify Arabic base characters and
// combining character sequences.
string strCombining = "\u0627\u0655\u0650\u064A\u0647\u064E" +
"\u0627\u0628\u064C";
// The Unicode code points specify private surrogate pairs.
string strSurrogates = Char.ConvertFromUtf32(0x10148) +
Char.ConvertFromUtf32(0x20026) + "a" +
Char.ConvertFromUtf32(0xF1001);
EnumerateTextElements(strCombining);
EnumerateTextElements(strSurrogates);
}
public static void EnumerateTextElements(string str)
{
// Get the Enumerator.
TextElementEnumerator teEnum = null;
// Parse the string using the ParseCombiningCharacters method.
Console.WriteLine("\nParsing with ParseCombiningCharacters:");
int[] teIndices = StringInfo.ParseCombiningCharacters(str);
for (int i = 0; i < teIndices.Length; i++) {
if (i < teIndices.Length - 1)
Console.WriteLine("Text Element {0} ({1}..{2})= {3}", i,
teIndices[i], teIndices[i + 1] - 1,
ShowHexValues(str.Substring(teIndices[i], teIndices[i + 1] -
teIndices[i])));
else
Console.WriteLine("Text Element {0} ({1}..{2})= {3}", i,
teIndices[i], str.Length - 1,
ShowHexValues(str.Substring(teIndices[i])));
}
Console.WriteLine();
// Parse the string with the GetTextElementEnumerator method.
Console.WriteLine("Parsing with TextElementEnumerator:");
teEnum = StringInfo.GetTextElementEnumerator(str);
int teCount = - 1;
while (teEnum.MoveNext()) {
// Displays the current element.
// Both GetTextElement() and Current retrieve the current
// text element. The latter returns it as an Object.
teCount++;
Console.WriteLine("Text Element {0} ({1}..{2})= {3}", teCount,
teEnum.ElementIndex, teEnum.ElementIndex +
teEnum.GetTextElement().Length - 1, ShowHexValues((string)(teEnum.Current)));
}
}
private static string ShowHexValues(string s)
{
string hexString = "";
foreach (var ch in s)
hexString += $"{(ushort)ch:X4} ";
return hexString;
}
}
// The example displays the following output:
// Parsing with ParseCombiningCharacters:
// Text Element 0 (0..2)= 0627 0655 0650
// Text Element 1 (3..3)= 064A
// Text Element 2 (4..5)= 0647 064E
// Text Element 3 (6..6)= 0627
// Text Element 4 (7..8)= 0628 064C
//
// Parsing with TextElementEnumerator:
// Text Element 0 (0..2)= 0627 0655 0650
// Text Element 1 (3..3)= 064A
// Text Element 2 (4..5)= 0647 064E
// Text Element 3 (6..6)= 0627
// Text Element 4 (7..8)= 0628 064C
//
// Parsing with ParseCombiningCharacters:
// Text Element 0 (0..1)= D800 DD48
// Text Element 1 (2..3)= D840 DC26
// Text Element 2 (4..4)= 0061
// Text Element 3 (5..6)= DB84 DC01
//
// Parsing with TextElementEnumerator:
// Text Element 0 (0..1)= D800 DD48
// Text Element 1 (2..3)= D840 DC26
// Text Element 2 (4..4)= 0061
// Text Element 3 (5..6)= DB84 DC01
Imports System.Globalization
Public Module Example
Public Sub Main()
' The Unicode code points specify Arabic base characters and
' combining character sequences.
Dim strCombining As String = ChrW(&H627) & ChrW(&h0655) + ChrW(&H650) &
ChrW(&H64A) & ChrW(&H647) & ChrW(&H64E) & ChrW(&H627) &
ChrW(&H628) & ChrW(&H64C)
' The Unicode code points specify private surrogate pairs.
Dim strSurrogates As String = Char.ConvertFromUtf32(&h10148) +
Char.ConvertFromUtf32(&h20026) + "a" +
Char.ConvertFromUtf32(&hF1001)
EnumerateTextElements(strCombining)
EnumerateTextElements(strSurrogates)
End Sub
Public Sub EnumerateTextElements(str As String)
' Get the Enumerator.
Dim teEnum As TextElementEnumerator = Nothing
' Parse the string using the ParseCombiningCharacters method.
Console.WriteLine()
Console.WriteLine("Parsing with ParseCombiningCharacters:")
Dim teIndices As Integer() = StringInfo.ParseCombiningCharacters(str)
For i As Integer = 0 To teIndices.Length - 1
If i < teIndices.Length - 1 Then
Console.WriteLine("Text Element {0} ({1}..{2})= {3}", i,
TEIndices(i), TEIndices((i + 1)) - 1,
ShowHexValues(str.Substring(TEIndices(i), TEIndices((i + 1)) -
teIndices(i))))
Else
Console.WriteLine("Text Element {0} ({1}..{2})= {3}", i,
teIndices(i), str.Length - 1,
ShowHexValues(str.Substring(teIndices(i))))
End If
Next
Console.WriteLine()
' Parse the string with the GetTextElementEnumerator method.
Console.WriteLine("Parsing with TextElementEnumerator:")
teEnum = StringInfo.GetTextElementEnumerator(str)
Dim TECount As Integer = - 1
While teEnum.MoveNext()
' Prints the current element.
' Both GetTextElement() and Current retrieve the current
' text element. The latter returns it as an Object.
TECount += 1
Console.WriteLine("Text Element {0} ({1}..{2})= {3}", teCount,
teEnum.ElementIndex, teEnum.ElementIndex +
teEnum.GetTextElement().Length - 1, ShowHexValues(CStr(teEnum.Current)))
End While
End Sub
Private Function ShowHexValues(s As String) As String
Dim hexString As String = ""
For Each ch In s
hexString += String.Format("{0:X4} ", Convert.ToUInt16(ch))
Next
Return hexString
End Function
End Module
' The example displays the following output:
' Parsing with ParseCombiningCharacters:
' Text Element 0 (0..2)= 0627 0655 0650
' Text Element 1 (3..3)= 064A
' Text Element 2 (4..5)= 0647 064E
' Text Element 3 (6..6)= 0627
' Text Element 4 (7..8)= 0628 064C
'
' Parsing with TextElementEnumerator:
' Text Element 0 (0..2)= 0627 0655 0650
' Text Element 1 (3..3)= 064A
' Text Element 2 (4..5)= 0647 064E
' Text Element 3 (6..6)= 0627
' Text Element 4 (7..8)= 0628 064C
'
' Parsing with ParseCombiningCharacters:
' Text Element 0 (0..1)= D800 DD48
' Text Element 1 (2..3)= D840 DC26
' Text Element 2 (4..4)= 0061
' Text Element 3 (5..6)= DB84 DC01
'
' Parsing with TextElementEnumerator:
' Text Element 0 (0..1)= D800 DD48
' Text Element 1 (2..3)= D840 DC26
' Text Element 2 (4..4)= 0061
' Text Element 3 (5..6)= DB84 DC01
給呼叫者的注意事項
在內部,類別的 StringInfo 方法會呼叫 類別的 CharUnicodeInfo 方法來判斷字元類別。 從 .NET Framework 4.6.2 開始,字元分類是以 Unicode Standard 8.0.0 版為基礎。 針對 .NET Framework 4 到 .NET Framework 4.6.1,它是以 Unicode Standard 6.3.0 版為基礎。 在 .NET Core 中,它是以 Unicode Standard 8.0.0 版為基礎。
建構函式
StringInfo() |
初始化 StringInfo 類別的新執行個體。 |
StringInfo(String) |
將 StringInfo 類別的新執行個體初始化為指定的字串。 |
屬性
LengthInTextElements |
取得目前 StringInfo 物件中的文字元素數目。 |
String |
取得或設定目前 StringInfo 物件的值。 |