skip to Main Content

After switching a personnal solution from .NET 6 to .NET 7, time to read large amount of data go from 18s to 4min 30s (approximately).

Before starting to develop it, I have a test function allowing me to have the critical path without big bottlenecks.

private void SpeedTest()
{
    int nbdata = 6000000;
    List<int> list = new(nbdata);
    var rnd = RandomNumberGenerator.Create();
    Random rand = new(12345);
    for (int i = 0; i < nbdata; i++)
    {
        var rnddata = new byte[sizeof(int)];
        rnd.GetBytes(rnddata);
        list.Add(BitConverter.ToInt32(rnddata));
    }
    int[] arr = list.ToArray();

    //Begin test
    int chk = 0;
    Stopwatch watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 100; rpt++)
    {
        int len = list.Count;
        for (int i = 0; i < len; i++)
        {
            chk += list[i];
        }
    }
    watch.Stop();
    SpeedText.Text += string.Format("List/for Count out: {0}ms ({1})", watch.ElapsedMilliseconds, chk) + Environment.NewLine;

    chk = 0;
    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 100; rpt++)
    {
        for (int i = 0; i < list.Count; i++)
        {
            chk += list[i];
        }
    }
    watch.Stop();
    SpeedText.Text += string.Format("List/for Count in: {0}ms ({1})", watch.ElapsedMilliseconds, chk) + Environment.NewLine;

    chk = 0;
    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 100; rpt++)
    {
        int len = arr.Length;
        for (int i = 0; i < len; i++)
        {
            chk += arr[i];
        }
    }
    watch.Stop();
    SpeedText.Text += string.Format("Array/for Count out: {0}ms ({1})", watch.ElapsedMilliseconds, chk) + Environment.NewLine;

    chk = 0;
    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 100; rpt++)
    {
        for (int i = 0; i < arr.Length; i++)
        {
            chk += arr[i];
        }
    }
    watch.Stop();
    SpeedText.Text += string.Format("Array/for Count in: {0}ms ({1})", watch.ElapsedMilliseconds, chk) + Environment.NewLine;

    chk = 0;
    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 100; rpt++)
    {
        int k = list.Count;
        for (int j = 0; j < k; j++)
        {
            chk += list[j];
        }
    }
    watch.Stop();
    SpeedText.Text += string.Format("List/for: {0}ms ({1})", watch.ElapsedMilliseconds, chk) + Environment.NewLine;

    chk = 0;
    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 100; rpt++)
    {
        foreach (int i in list)
        {
            chk += i;
        }
    }
    watch.Stop();
    SpeedText.Text += string.Format("List/foreach: {0}ms ({1})", watch.ElapsedMilliseconds, chk) + Environment.NewLine;

    chk = 0;
    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 100; rpt++)
    {
        list.ForEach(i => chk += i);
    }
    watch.Stop();
    SpeedText.Text += string.Format("List/foreach function: {0}ms ({1})", watch.ElapsedMilliseconds, chk) + Environment.NewLine;

    chk = 0;
    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 100; rpt++)
    {
        int k = arr.Length;
        for (int j = 0; j < k; j++)
        {
            chk += arr[j];
        }
    }
    watch.Stop();
    SpeedText.Text += string.Format("Array/for: {0}ms ({1})", watch.ElapsedMilliseconds, chk) + Environment.NewLine;

    chk = 0;
    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 100; rpt++)
    {
        foreach (int i in arr)
        {
            chk += i;
        }
    }
    watch.Stop();
    SpeedText.Text += string.Format("Array/foreach: {0}ms ({1})", watch.ElapsedMilliseconds, chk) + Environment.NewLine;
}

Result .NET 6:

List/for Count out: 1442ms (398007896)
List/for Count in: 1446ms (398007896)
Array/for Count out: 1256ms (398007896)
Array/for Count in: 1254ms (398007896)
List/for: 1435ms (398007896)
List/foreach: 1258ms (398007896)
List/foreach function: 1452ms (398007896) <=
Array/for: 1255ms (398007896)
Array/foreach: 1254ms (398007896)

Result .NET 7:

List/for Count out: 1483ms (272044760)
List/for Count in: 1489ms (272044760)
Array/for Count out: 1255ms (272044760)
Array/for Count in: 1263ms (272044760)
List/for: 1482ms (272044760)
List/foreach: 1873ms (272044760)
List/foreach function: 7997ms (272044760) <=
Array/for: 1254ms (272044760)
Array/foreach: 1255ms (272044760)

Code behind this issue:

list.ForEach(i => chk += i);

Is this problem inside .NET 7?

Do I have any hope of finding solution without having to change all calls to this function?

I use many other functions that perform better in .NET 7 than in .NET 6.
I wish I could stay on this version.

What do you recommend?

Thanks.

EDIT:

I used ForEach few times to reading code. Initially in .NET 6, the time loss was acceptable. I used Tuple with data read in large files.

Example:

listValue.ForEach(x => process((new col(x.name, position++, startId++, x.refState, x.refPosition, x.refTable, x.withoutRef, x.deleted, x.resetData), option)));
foreach((string name, uint refState, uint refPosition, uint refTable, bool withoutRef, bool deleted, bool resetData)x in listValue)
{
    process((new col(x.name, position++, startId++, x.refState, x.refPosition, x.refTable, x.withoutRef, x.deleted, x.resetData), option))
};

My software is far from being finished and I use public data files to test it:

  • xlsx file with 1,000,000 rows by 14 columns
  • csv file with 10,000,000 rows by 14 columns

I made some changes in my code between switching to .NET6 and .NET 7 and saw the times increase dramatically on my 1st test in .NET 7.
So I went back to my original benchmark code to see if there were any changes before revising my entire code.

I think processing time and code of this benchmark is suitable to make a decision in my case. I just want to see how long the user will have to wait. So I put myself in the same case as user. Benchmark with List of 5000 elements is not relevant. I work with large List and performance can be affected by this size.

This test is basic and reveals a big difference between .NET 6 and .NET 7 with same code.
Performance of Arrays vs. Lists

Here issue is not how measurement is made but the result.
There is no library used that could have differents versions and that could influence the result.

I test on Windows 10 with Ryzen 1700 and RAM 16Gb.

EDIT2:

Project to test:
https://github.com/gandf/TestPerfForEach

Clean & generate project and run outside Visual Studio.

Result .NET 6:

Test with 6000000 NbData
List/foreach: 1254ms (2107749308)
List/foreach function: 1295ms (2107749308)
Test with 6000000 NbData
List/foreach: 1259ms (1107007452)
List/foreach function: 1255ms (1107007452)
Test with 6000000 NbData
List/foreach: 1253ms (745733412)
List/foreach function: 1256ms (745733412)
Test with 6000000 NbData
List/foreach: 1253ms (-280872836)
List/foreach function: 1259ms (-280872836)

Result .NET 7:

Test with 6000000 NbData
List/foreach: 1866ms (-998431744)
List/foreach function: 8347ms (-998431744)
Test with 6000000 NbData
List/foreach: 1753ms (715062008)
List/foreach function: 1368ms (715062008)
Test with 6000000 NbData
List/foreach: 1754ms (667927108)
List/foreach function: 1335ms (667927108)
Test with 6000000 NbData
List/foreach: 1749ms (310491380)
List/foreach function: 1366ms (310491380)

Same condition and tests run several times:

  1. .NET 6 faster.
  2. Issue with list.ForEach only first run. After quicker than foreach.

2

Answers


  1. Chosen as BEST ANSWER

    I found source of this issue. One year ago, I see this : https://devblogs.microsoft.com/dotnet/performance-improvements-in-net-6/

    I left in PATH 2 options:

    • DOTNET_ReadyToRun 0
    • DOTNET_TieredPGO 1

    With these parameters, I had noticed a very slight degradation of the first call in .NET 6 with an improvement with others calls. So I kept it because the impact was negligible.

    So there is a case where the first call takes x6 more time in .NET7 with these options.

    I just deleted them. Results after reboot:

    .NET 6

    Test with 6000000 NbData
    List/foreach: 1263ms (-1425648688)
    List/foreach function: 1312ms (-1425648688)
    Test with 6000000 NbData
    List/foreach: 1253ms (-1169873892)
    List/foreach function: 1256ms (-1169873892)
    Test with 6000000 NbData
    List/foreach: 1257ms (1528933740)
    List/foreach function: 1256ms (1528933740)
    Test with 6000000 NbData
    List/foreach: 1254ms (-1327641484)
    List/foreach function: 1254ms (-1327641484)
    

    .NET 7

    Test with 6000000 NbData
    List/foreach: 1470ms (991593448)
    List/foreach function: 1411ms (991593448)
    Test with 6000000 NbData
    List/foreach: 1465ms (751941656)
    List/foreach function: 1434ms (751941656)
    Test with 6000000 NbData
    List/foreach: 1470ms (-17227852)
    List/foreach function: 1435ms (-17227852)
    Test with 6000000 NbData
    List/foreach: 1469ms (1422420324)
    List/foreach function: 1437ms (1422420324)
    

    It's fixed.


  2. Using BenchmarkDotNet, I tried re-creating your scenario and then ran it against both .NET6 and .NET7.

    I used smaller numbers because the benchmarking tool can take a minute.

    Here’s the code I used:

    using BenchmarkDotNet.Attributes;
    using BenchmarkDotNet.Order;
    using System.Security.Cryptography;
    
    namespace Experiments
    {
        [MemoryDiagnoser]
        [Orderer(SummaryOrderPolicy.FastestToSlowest)]
        [RankColumn]
        //[SimpleJob(BenchmarkDotNet.Jobs.RuntimeMoniker.Net70)]
        public class ForEachBenchmark
        {
            [Params(100, 1_000)]
            public int N;
    
            [Params(5_000)]
            public int NbData;
    
            private int[] arr = Array.Empty<int>();
            private List<int> list = new List<int>();
    
            [GlobalSetup]
            public void Setup()
            {
                arr = new int[NbData];
    
                var rnd = RandomNumberGenerator.Create();
    
                for (int i = 0; i < NbData; i++)
                {
                    var rnddata = new byte[sizeof(int)];
                    rnd.GetBytes(rnddata);
                    arr[i] = BitConverter.ToInt32(rnddata);
                }
    
                list = new List<int>(arr[..N]);
            }
    
            [Benchmark]
            public void ForLoop()
            {
                int chk = 0;
                for (int rpt = 0; rpt < N; rpt++)
                {
                    chk += arr[rpt];
                }
            }
    
            [Benchmark]
            public void ForEachLoop()
            {
                int chk = 0;
                foreach (var rpt in arr[..N])
                {
                    chk += rpt;
                }
            }
    
            [Benchmark]
            public void ListForEachLoop()
            {
                int chk = 0;
                list.ForEach(l => chk += l);
            }
        }
    }
    
    

    Here is the Program.cs on my console app:

    using BenchmarkDotNet.Running;
    
    BenchmarkRunner.Run<ForEachBenchmark>();
    
    

    Here are my results:

    .NET 6

    Method N NbData Mean Error StdDev Rank Gen0 Allocated
    ForLoop 100 5000 57.02 ns 0.583 ns 0.517 ns 1
    ForEachLoop 100 5000 118.96 ns 2.404 ns 3.290 ns 2 0.1013 424 B
    ListForEachLoop 100 5000 275.77 ns 5.468 ns 7.300 ns 3 0.0210 88 B
    ForLoop 1000 5000 611.56 ns 9.434 ns 9.266 ns 4
    ForEachLoop 1000 5000 1,235.28 ns 30.499 ns 88.968 ns 5 0.9613 4024 B
    ListForEachLoop 1000 5000 2,478.17 ns 88.920 ns 249.342 ns 6 0.0191 88 B

    .NET 7

    Method N NbData Mean Error StdDev Median Rank Gen0 Allocated
    ForLoop 100 5000 55.41 ns 0.907 ns 1.080 ns 55.22 ns 1
    ForEachLoop 100 5000 90.06 ns 2.250 ns 6.455 ns 86.91 ns 2 0.1013 424 B
    ListForEachLoop 100 5000 310.84 ns 6.278 ns 15.399 ns 305.42 ns 3 0.0210 88 B
    ForLoop 1000 5000 510.95 ns 10.273 ns 17.720 ns 511.14 ns 4
    ForEachLoop 1000 5000 792.89 ns 27.420 ns 80.849 ns 789.39 ns 5 0.9613 4024 B
    ListForEachLoop 1000 5000 2,527.76 ns 58.979 ns 168.271 ns 2,498.65 ns 6 0.0191 88 B

    To your point, the List ForEach does appear to have slowed down a bit between the two versions.
    Those numbers are in NANOseconds, so the change is pretty small (~50 ns).
    The other numbers all seem to have improved between versions. Memory allocation held steady.

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search