在现代软件开发中,硬件资源的利用越来越受到重视,尤其是CPU核心的利用。本文将介绍如何使用.NET的Task Parallel Library (TPL)来提高计算性能。将通过一个简单的报告计算示例,展示顺序计算和并行计算的性能对比。
本项目的目标是利用TPL来加速一个简单报告的计算性能。示例中使用了LINQ的group by操作,然后使用TPL进行并行计算。代码首先尝试了一个典型的顺序计算,并报告了所花费的时间;紧接着,运行相同的计算并行执行,并展示结果。
由于本项目面向中级开发者,因此不会详细介绍代码的某些方面。读者应具备对LINQ、谓词以及TPL的较好理解。
项目使用了一个抽象类。这样做是为了避免重复编写计时器和计算代码,如果足够大胆尝试第三种理论,可能需要重写三次。需要重写的方法是StartCalculations,其余代码负责比较各种方法的计时器。
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
namespace ParallelClass
{
public abstract class ReportCalculations
{
private readonly List<CompanyInfo> _totals = new List<CompanyInfo>();
private long _elapsedTime;
public long Elapsed_ms
{
get { return _elapsedTime; }
}
public void Begin(IEnumerable<IGrouping<int, IGrouping<int, CompanyInfo>>> companyGroups)
{
var sw = new Stopwatch();
sw.Start();
StartCalculations(companyGroups);
sw.Stop();
_elapsedTime = sw.ElapsedMilliseconds;
}
public virtual string Name
{
get { return "Generic Report Class"; }
}
public virtual void StartCalculations(IEnumerable<IGrouping<int, IGrouping<int, CompanyInfo>>> companyGroups)
{
}
}
}
其余代码相当直接。类接受一个int参数,定义了要在数据库中初始化的交易行数。示例中的数据库只是一个CompanyInfo类的集合。在抽象类中,会注意到它传递了一个有趣的IEnumerable对象。这是下面代码中GetGrouping方法设计的嵌套LINQ查询的结果。GetGrouping方法按CompanyID,然后按TransactionCode对对象进行分组,以便更容易地通过TPL处理多个计算。
PopulateCompanyTransactions方法随机生成所有将被排序和计算的交易。在这个示例中,有两个类派生自抽象类ReportCalculations。它们是MySeq和MyTPL。MySeq操作一个典型的顺序循环,计算每个事务组。后者MyTPL,在可能的情况下利用所有CPU计算总和。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
namespace ParallelClass
{
public class CompanyInfo
{
public int CompanyId { get; set; }
public int TransactionCode { get; set; }
public decimal Amount { get; set; }
}
public class Process
{
public Process(int recordsToProcess)
{
var rec = PopulateCompanyTransactions(recordsToProcess);
var grouping = GetGrouping(rec);
var calcClasses = new List<ReportCalculations> { new MySeq(), new MyTPL() };
foreach (var calc in calcClasses)
{
calc.Begin(grouping);
Console.WriteLine("{0} : {1}", calc.Name, calc.Elapsed_ms);
}
Console.ReadLine();
}
// Group records by Company then by Transaction
private static IEnumerable<IGrouping<int, IGrouping<int, CompanyInfo>>> GetGrouping(IEnumerable<CompanyInfo> companyInfos)
{
var query = from company in companyInfos
group company by company.CompanyId into companyGroup
from transactionGroup in (from company in companyGroup
group company by company.TransactionCode)
group transactionGroup by companyGroup.Key;
return query;
}
// Populate record values with random data
private static List<CompanyInfo> PopulateCompanyTransactions(int totalRecords)
{
var rnd = new Random();
var companyInfo = new List<CompanyInfo>();
for (int count = 0; count < totalRecords; count++)
companyInfo.Add(new CompanyInfo
{
Amount = (decimal)(rnd.Next(-50, 1000) * rnd.NextDouble()),
CompanyId = rnd.Next(0, 100),
TransactionCode = rnd.Next(100, 120)
});
return companyInfo;
}
}
public class MySeq : ReportCalculations
{
private readonly List<CompanyInfo> _totals = new List<CompanyInfo>();
public override string Name
{
get { return "Sequential"; }
}
public override void StartCalculations(IEnumerable<IGrouping<int, IGrouping<int, CompanyInfo>>> companyGroups)
{
foreach (var firstGroup in companyGroups)
{
foreach (var secondGroup in firstGroup)
{
decimal total = 0;
foreach (var details in secondGroup)
total += details.Amount;
_totals.Add(new CompanyInfo { Amount = total,
CompanyId = firstGroup.Key, TransactionCode = secondGroup.Key });
}
}
}
}
public class MyTPL : ReportCalculations
{
private readonly List<CompanyInfo> _totals = new List<CompanyInfo>();
public override string Name
{
get { return "TPL"; }
}
public override void StartCalculations(IEnumerable<IGrouping<int, IGrouping<int, CompanyInfo>>> companyGroups)
{
foreach (var firstGroup in companyGroups)
Parallel.ForEach(firstGroup, group => Calculate(group, firstGroup.Key));
}
// TPL Parallel method
private void Calculate(IGrouping<int, CompanyInfo> grouping, int companyID)
{
decimal total = 0;
Parallel.ForEach(grouping, g => { total += g.Amount; });
_totals.Add(new CompanyInfo { Amount = total,
CompanyId = companyID, TransactionCode = grouping.Key });
}
}
}
最初,没有期望计算结果有太大差异,因为假设是,当底层线程处理程序进行分析时,最后一个线程已经完成,这似乎是交易层次有限的情况。经验表明,当交易组包含足够多的行,以至于线程在处理程序检查时仍然处于活动状态时,性能会有巨大的提升。在这个示例中,这个提升从大约100万笔交易开始。在使用1000万笔交易时,性能提升了23%-25%,但低于100万笔交易的结果要慢得多。