방법: CSV 텍스트 파일의 열 값 계산(LINQ)
업데이트: 2007년 11월
이 예제에서는 .csv 파일의 열에서 Sum, Average, Min 및 Max와 같은 집계 계산을 수행하는 방법을 보여 줍니다. 이 예제에서 설명하는 원리를 다른 형식의 구조화된 텍스트에 적용할 수 있습니다.
소스 파일을 만들려면
다음 줄을 scores.csv라는 파일에 복사하여 솔루션 폴더에 저장합니다. 첫 번째 열은 학생 ID를 나타내고 그 다음 열들은 4회의 시험 성적을 나타낸다고 가정합니다.
111, 97, 92, 81, 60 112, 75, 84, 91, 39 113, 88, 94, 65, 91 114, 97, 89, 85, 82 115, 35, 72, 91, 70 116, 99, 86, 90, 94 117, 93, 92, 80, 87 118, 92, 90, 83, 78 119, 68, 79, 88, 92 120, 99, 82, 81, 79 121, 96, 85, 91, 60 122, 94, 92, 91, 91
예제
Class SumColumns
Public Shared Sub Main()
Dim lines As String() = System.IO.File.ReadAllLines("../../../scores.csv")
' Specifies the column to compute
' This value could be passed in at runtime.
Dim exam = 3
' Spreadsheet format:
' Student ID Exam#1 Exam#2 Exam#3 Exam#4
' 111, 97, 92, 81, 60
' one is added to skip over the first column
' which holds the student ID.
SumColumn(lines, exam + 1)
Console.WriteLine()
MultiColumns(lines)
' Keep the console window open in debug mode.
Console.WriteLine("Press any key to exit...")
Console.ReadKey()
End Sub
Shared Sub SumColumn(ByVal lines As IEnumerable(Of String), ByVal col As Integer)
' This query performs two steps:
' split the string into a string array
' convert the specified element to
' integer and select it.
Dim columnQuery = From line In lines _
Let x = line.Split(",") _
Select Convert.ToInt32(x(col))
' Execute and cache the results for performance.
' Only needed with very large files.
Dim results = columnQuery.ToList()
' Perform aggregate calculations
' on the column specified by col.
Dim avgScore = Aggregate score In results Into Average(score)
Dim minScore = Aggregate score In results Into Min(score)
Dim maxScore = Aggregate score In results Into Max(score)
Console.WriteLine("Single Column Query:")
Console.WriteLine("Exam #{0}: Average:{1:##.##} High Score:{2} Low Score:{3}", _
col, avgScore, maxScore, minScore)
End Sub
Shared Sub MultiColumns(ByVal lines As IEnumerable(Of String))
Console.WriteLine("Multi Column Query:")
' Create the query. It will produce nested sequences.
' multiColQuery performs these steps:
' 1) convert the string to a string array
' 2) skip over the "Student ID" column and take the rest
' 3) convert each field to an int and select that
' entire sequence as one row in the results.
Dim multiColQuery = From line In lines _
Let fields = line.Split(",") _
Select From str In fields Skip 1 _
Select Convert.ToInt32(str)
Dim results = multiColQuery.ToList()
' Find out how many columns we have.
Dim columnCount = results(0).Count()
' Perform aggregate calculations on each column.
' One loop for each score column in scores.
' We can use a for loop because we have already
' executed the multiColQuery in the call to ToList.
For j As Integer = 0 To columnCount - 1
Dim column = j
Dim res2 = From row In results _
Select row.ElementAt(column)
' Perform aggregate calculations
' on the column specified by col.
Dim avgScore = Aggregate score In res2 Into Average(score)
Dim minScore = Aggregate score In res2 Into Min(score)
Dim maxScore = Aggregate score In res2 Into Max(score)
' Add 1 to column numbers because exams in this course start with #1
Console.WriteLine("Exam #{0} Average: {1:##.##} High Score: {2} Low Score: {3}", _
column + 1, avgScore, maxScore, minScore)
Next
End Sub
End Class
' Output:
' Single Column Query:
' Exam #4: Average:76.92 High Score:94 Low Score:39
' Multi Column Query:
' Exam #1 Average: 86.08 High Score: 99 Low Score: 35
' Exam #2 Average: 86.42 High Score: 94 Low Score: 72
' Exam #3 Average: 84.75 High Score: 91 Low Score: 65
' Exam #4 Average: 76.92 High Score: 94 Low Score: 39
class SumColumns
{
static void Main(string[] args)
{
string[] lines = System.IO.File.ReadAllLines(@"../../../scores.csv");
// Specifies the column to compute
int exam = 3;
// Spreadsheet format:
// Student ID Exam#1 Exam#2 Exam#3 Exam#4
// 111, 97, 92, 81, 60
// one is added to skip over the first column
// which holds the student ID.
SingleColumn(lines, exam + 1);
Console.WriteLine();
MultiColumns(lines);
Console.WriteLine("Press any key to exit");
Console.ReadKey();
}
static void SingleColumn(IEnumerable<string> strs, int examNum)
{
Console.WriteLine("Single Column Query:");
// examNum specifies the column to run the
// calculations on. This could also be
// passed in dynamically at runtime.
// columnQuery is a IEnumerable<int>
// This query performs two steps:
// 1) split the string into a string[]
// 2) convert the specified element to
// int and select it.
var columnQuery =
from line in strs
let x = line.Split(',')
select Convert.ToInt32(x[examNum]);
// Execute and cache the results for performance.
// Only needed with very large files.
var results = columnQuery.ToList();
// Perform aggregate calculations
// on the column specified by examNum.
double average = results.Average();
int max = results.Max();
int min = results.Min();
Console.WriteLine("Exam #{0}: Average:{1:##.##} High Score:{2} Low Score:{3}",
examNum, average, max, min);
}
static void MultiColumns(IEnumerable<string> strs)
{
Console.WriteLine("Multi Column Query:");
// Create the columnQuery. Explicit typing is used
// to make clear that the columnQuery will produce
// nested sequences. You can also just use 'var'.
// The columnQuery performs these steps:
// 1) convert the string to a string[]
// 2) skip over the "Student ID" column and take the rest
// 3) convert each string to an int and select that
// entire sequence as one row in the results.
IEnumerable<IEnumerable<int>> query =
from line in strs
let x = line.Split(',')
let y = x.Skip(1)
select (from str in y
select Convert.ToInt32(str));
// Execute and cache the results for performance.
// ToArray could also be used here.
var results = query.ToList();
// Find out how many columns we have.
int columnCount = results[0].Count();
// Perform aggregate calculations on each column.
// One loop for each score column in scores.
// We can use a for loop because we have already
// executed the columnQuery in the call to ToList.
for (int column = 0; column < columnCount; column++)
{
var res2 = from row in results
select row.ElementAt(column);
double average = res2.Average();
int max = res2.Max();
int min = res2.Min();
// 1 is added to column because Exam numbers
// begin with 1
Console.WriteLine("Exam #{0} Average: {1:##.##} High Score: {2} Low Score: {3}",
column + 1, average, max, min);
}
}
}
/* Output:
Single Column Query:
Exam #4: Average:76.92 High Score:94 Low Score:39
Multi Column Query:
Exam #1 Average: 86.08 High Score: 99 Low Score: 35
Exam #2 Average: 86.42 High Score: 94 Low Score: 72
Exam #3 Average: 84.75 High Score: 91 Low Score: 65
Exam #4 Average: 76.92 High Score: 94 Low Score: 39
*/
파일이 탭으로 구분된 파일인 경우 Split 메서드의 인수를 \t로 업데이트합니다.
코드 컴파일
.NET Framework 버전 3.5를 대상으로 하는 Visual Studio 프로젝트를 만듭니다. 기본적으로 프로젝트에는 System.Core.dll에 대한 참조 및 System.Linq 네임스페이스에 대한 using 지시문(C#) 또는 Imports 문(Visual Basic)이 있습니다.
프로젝트에 이 코드를 복사합니다.
F5 키를 눌러 프로그램을 컴파일하고 실행합니다.
아무 키나 눌러 콘솔 창을 닫습니다.