[C#.NET][LINQ] List 與 Dictionary 查詢效能比較

  • 15510
  • 0
  • LINQ
  • 2021-07-18

[C#.NET][LINQ] LINQ查詢效能比較

相信有拜讀黑大的 當心LINQ搜尋的效能陷阱 文章的人,對於Linq的效能上的陷阱,就應該要更加的小心才是。

我把黑大的Code搬到可以執行單元測試的類別裡,我想測試 Single跟First的差異,想試看看能否為Linq平反一下

public class LinqPerformanceCompare
{
	private static readonly int MAX_NO = 100;
	private int _time = 500;

	public int Time
	{
		get { return _time; }
		set { _time = value; }
	}

	private List<Boo> _pool = new List<Boo>();

	public List<Boo> Pool
	{
		get { return _pool; }
		set { _pool = value; }
	}

	private List<Boo> _shuffled;

	public List<Boo> Shuffled
	{
		get { return _shuffled; }
		set { _shuffled = value; }
	}

	private Dictionary<string, Boo> _dict;

	public Dictionary<string, Boo> Dict
	{
		get { return _dict; }
		set { _dict = value; }
	}

	private Dictionary<string, string> _strDict;

	public Dictionary<string, string> StrDict
	{
		get { return _strDict; }
		set { _strDict = value; }
	}

	private List<Boo> _toFill = new List<Boo>();

	public List<Boo> ToFill
	{
		get { return _toFill; }
		set { _toFill = value; }
	}

	public LinqPerformanceCompare()
	{
		//使用相口同亂數種子確保每次執行之測試資料相同

		Random rnd = new Random(9527); //交給你了,9527
		//建立大量物件集合
		for (int i = 0; i < MAX_NO; i++)
		{
			for (int j = 0; j < rnd.Next(500, 1000); j++)
			{
				this.Pool.Add(new Boo()
				{
					No = i,
					SubNo = j,
					Code = "C" + rnd.Next(1000).ToString("000")
				});
			}
		}

		//打亂排序
		this.Shuffled = this.Pool.OrderBy(o => rnd.Next()).ToList();

		//建立Dictionary
		this.Dict = this.Pool.ToDictionary(
			o => string.Format("{0}\t{1}", o.No, o.SubNo),
			o => o);

		//建立字串Dictionary
		this.StrDict = this.Pool.ToDictionary(
			o => string.Format("{0}\t{1}", o.No, o.SubNo),
			o => o.Code);

		//產生TIMES個待查對象
		for (int i = 0; i < this.Time; i++)
		{
			Boo sample = this.Pool[rnd.Next(_pool.Count)];
			this.ToFill.Add(new Boo()
			{
				No = sample.No,
				SubNo = sample.SubNo
			});
		}
	}
{
    private static readonly int MAX_NO = 100;
    private int _time = 500;

    public int Time
    {
        get { return _time; }
        set { _time = value; }
    }

    private List<Boo> _pool = new List<Boo>();

    public List<Boo> Pool
    {
        get { return _pool; }
        set { _pool = value; }
    }

    private List<Boo> _shuffled;

    public List<Boo> Shuffled
    {
        get { return _shuffled; }
        set { _shuffled = value; }
    }

    private Dictionary<string, Boo> _dict;

    public Dictionary<string, Boo> Dict
    {
        get { return _dict; }
        set { _dict = value; }
    }

    private Dictionary<string, string> _strDict;

    public Dictionary<string, string> StrDict
    {
        get { return _strDict; }
        set { _strDict = value; }
    }

    private List<Boo> _toFill = new List<Boo>();

    public List<Boo> ToFill
    {
        get { return _toFill; }
        set { _toFill = value; }
    }

    public LinqPerformanceCompare()
    {
        //使用相口同亂數種子確保每次執行之測試資料相同

        Random rnd = new Random(9527); //交給你了,9527
        //建立大量物件集合
        for (int i = 0; i < MAX_NO; i++)
        {
            for (int j = 0; j < rnd.Next(500, 1000); j++)
            {
                this.Pool.Add(new Boo()
                {
                    No = i,
                    SubNo = j,
                    Code = "C" + rnd.Next(1000).ToString("000")
                });
            }
        }

        //打亂排序
        this.Shuffled = this.Pool.OrderBy(o => rnd.Next()).ToList();

        //建立Dictionary
        this.Dict = this.Pool.ToDictionary(
            o => string.Format("{0}\t{1}", o.No, o.SubNo),
            o => o);

        //建立字串Dictionary
        this.StrDict = this.Pool.ToDictionary(
            o => string.Format("{0}\t{1}", o.No, o.SubNo),
            o => o.Code);

        //產生TIMES個待查對象
        for (int i = 0; i < this.Time; i++)
        {
            Boo sample = this.Pool[rnd.Next(_pool.Count)];
            this.ToFill.Add(new Boo()
            {
                No = sample.No,
                SubNo = sample.SubNo
            });
        }
    }

    public string GetCode1(int n, int sn)
    {
        return this.Pool.Single(o => o.No == n && o.SubNo == sn).Code;
    }

    public string GetCode2(int n, int sn)
    {
        return this.Shuffled.Single(o => o.No == n && o.SubNo == sn).Code;
    }

    public string GetCode3(int n, int sn)
    {
        return this.Dict[string.Format("{0}\t{1}", n, sn)].Code;
    }

    public string GetCode4(int n, int sn)
    {
        return this.StrDict[string.Format("{0}\t{1}", n, sn)];
    }
}

 

 單元測試,為了觀察每次查詢的時間,我利用驅動測試來增加測試次數並觀察每一次的測試結果 

public void GetCode04Test()
{
    var count = TestContext.DataRow[0].ToString();
    for (int j = 0; j < _target.Time; j++)
    {
        var boo = _target.ToFill[j];
        boo.Code = _target.GetCode5(boo.No, boo.SubNo);
    }

    //挑選三個抽樣檢查執行結果是否一致
    var expected = _target.ToFill;
    var actual = new string[] { "C183", "C436", "C560" };
    Assert.AreEqual(expected[1].Code, actual[0]);
    Assert.AreEqual(expected[10].Code, actual[1]);
    Assert.AreEqual(expected[50].Code, actual[2]);
}

 

測試文件長這樣:

image

 

先測試搬過來的結果跟黑大的比較一下,証明類別沒有寫錯,很顯然結果就是跟黑大講的一樣,Linq跟Dictionary查詢效能差很多。

SNAGHTML5ad3b77

 

 

再新加了兩個調用First的方法

public string GetCode5(int n, int sn)
{
	return this.Pool.First(o => o.No == n && o.SubNo == sn).Code;
}

public string GetCode6(int n, int sn)
{
	return this.Shuffled.First(o => o.No == n && o.SubNo == sn).Code;
}

 

大夥兒再比一次,First 執行出來的結果比 Single 看起來好多了,但跟 Dictionary 比還是差很多

SNAGHTML5adc262

 

用PLINQ亂入一下

public string GetCode7(int n, int sn)
{
	return this.Pool.AsParallel().Single(o => o.No == n && o.SubNo == sn).Code;
}

public string GetCode8(int n, int sn)
{
	return this.Shuffled.AsParallel().Single(o => o.No == n && o.SubNo == sn).Code;
}

public string GetCode9(int n, int sn)
{
	return this.Pool.AsParallel().First(o => o.No == n && o.SubNo == sn).Code;
}

public string GetCode10(int n, int sn)
{
	return this.Shuffled.AsParallel().First(o => o.No == n && o.SubNo == sn).Code;
}

 

  平行運算在我電腦看來也沒太大差異。認識 PLINQ 中的加速 

SNAGHTML5cf1478

 

把Time提升到1000再比一次,仍是相差無幾

SNAGHTML5ccb059

 

結論仍是引用黑大講的:"依賴LINQ的Where查詢在大量資料中反覆查詢,很容易形成效能瓶頸。面對類似需求,可透過ToDictionary()簡單轉換成Dictionary,即可獲得可觀的效能提升。"


範例下載:

LinqPerformance

若有謬誤,煩請告知,新手發帖請多包涵


Microsoft MVP Award 2010~2017 C# 第四季
Microsoft MVP Award 2018~2022 .NET

Image result for microsoft+mvp+logo