且构网

分享程序员开发的那些事...
且构网 - 分享程序员编程开发的那些事

为什么在这种情况下使用 AsParallel() 比 foreach 慢?

更新时间:2022-10-14 23:05:23

为了进行并行计算,你必须有多个处理器或内核,否则你只是在线程池中排队等待 CPU 的任务.IE.单核机器上的 AsParallel 是顺序的,加上线程池和线程上下文切换的开销.即使在双核机器上,您也可能无法获得两个核心,因为许多其他东西都在同一台机器上运行.

实际上 .AsParallel() 只有当您有长时间运行的带有阻塞操作 (I/O) 的任务时才有用,操作系统可以挂起阻塞线程并让另一个线程运行.

I am extracting data from excel that is in this format

 product1   | unnamedcol2 | product2  | unnamedcol4 | product3  | unnamedcol6 |
-------------------------------------------------------------------------------
 @1foo      |        1.10 | @1foo     |         0.3 | @1foo     |         0.3
 @2foo      |        1.00 | @2foo     |           2 | @2foo     |
 @3foo      |        1.52 | @3foo     |        2.53 | @3foo     |
 @4foo      |        1.47 |           |             | @4foo     |        1.31
 @5foo      |        1.49 |           |             | @5foo     |        1.31

The file uses all 255 fields. Using dapper-dot-net i get the data through this code

IEnumerable<IDictionary<string, object>> excelDataRaw =
                conn.Query(string.Format("select * from {0}", table)).Cast<IDictionary<string, object>>();

I pass this data to these test methods. The data is returned as an IEnumerable of IDictionaries where each key is a product and each value is an IDictionary where each key is a value from the product column and the corresponding value is a value from unnamedcol that is to the right of the product column.

var excelDataRefined = new List<IDictionary<string, IDictionary<string, decimal>>>();
excelDataRefined.Add(new Dictionary<string, IDictionary<string, decimal>>());
excelDataRefined[0].Add( "product", new Dictionary<string, decimal>());
excelDataRefined[0]["product"].Add("@1foo", 1.1m);

The methods:

private static Dictionary<string, IDictionary<string, decimal>> Benchmark_foreach(IEnumerable<IDictionary<string, object>> excelDataRaw)
{
    Console.WriteLine("1. Using foreach");
    var watch = new Stopwatch();
    watch.Start();

    List<string> headers = excelDataRaw.Select(dictionary => dictionary.Keys).First().ToList();
    bool isEven = false;
    List<string> products = headers.Where(h => isEven = !isEven).ToList();
    var dates = new List<IEnumerable<object>>();
    var prices = new List<IEnumerable<object>>();

    foreach (string field in headers)
    {
        string product1 = field;
        if (headers.IndexOf(field) % 2 == 0)
        {
            dates.Add(
                excelDataRaw.AsParallel().AsOrdered().Select(col => col[product1]).Where(row => row != null));
        }

        if (headers.IndexOf(field) % 2 == 1)
        {
            prices.Add(
                excelDataRaw.AsParallel().AsOrdered().Select(col => col[product1] ?? 0m).Take(dates.Last().Count()));
        }
    }

    watch.Stop();
    Console.WriteLine("Rearange the data in: {0}s", watch.Elapsed.TotalSeconds);
    watch.Restart();

    var excelDataRefined = new Dictionary<string, IDictionary<string, decimal>>();
    foreach (IEnumerable<object> datelist in dates)
    {
        decimal num;
        IEnumerable<object> datelist1 = datelist;
        IEnumerable<object> pricelist =
            prices[dates.IndexOf(datelist1)].Select(value => value ?? 0m).Where(
                content => decimal.TryParse(content.ToString(), out num));
        Dictionary<string, decimal> dict =
            datelist1.Zip(pricelist, (k, v) => new { k, v }).ToDictionary(
                x => (string)x.k, x => decimal.Parse(x.v.ToString()));

        if (!excelDataRefined.ContainsKey(products[dates.IndexOf(datelist1)]))
        {
            excelDataRefined.Add(products[dates.IndexOf(datelist1)], dict);
        }
    }

    watch.Stop();
    Console.WriteLine("Zipped the data in: {0}s", watch.Elapsed.TotalSeconds);

    return excelDataRefined;
}

private static Dictionary<string, IDictionary<string, decimal>> Benchmark_AsParallel(IEnumerable<IDictionary<string, object>> excelDataRaw)
{
    Console.WriteLine("2. Using AsParallel().AsOrdered().ForAll");
    var watch = new Stopwatch();
    watch.Start();

    List<string> headers = excelDataRaw.Select(dictionary => dictionary.Keys).First().ToList();
    bool isEven = false;
    List<string> products = headers.Where(h => isEven = !isEven).ToList();
    var dates = new List<IEnumerable<object>>();
    var prices = new List<IEnumerable<object>>();

    headers.AsParallel().AsOrdered().ForAll(
        field =>
        dates.Add(
            excelDataRaw.AsParallel().AsOrdered().TakeWhile(x => headers.IndexOf(field) % 2 == 0).Select(
                col => col[field]).Where(row => row != null).ToList()));
    headers.AsParallel().AsOrdered().ForAll(
        field =>
        prices.Add(
            excelDataRaw.AsParallel().AsOrdered().TakeWhile(x => headers.IndexOf(field) % 2 == 1).Select(
                col => col[field] ?? 0m).Take(256).ToList()));
    dates.RemoveAll(x => x.Count() == 0);
    prices.RemoveAll(x => x.Count() == 0);

    watch.Stop();
    Console.WriteLine("Rearange the data in: {0}s", watch.Elapsed.TotalSeconds);
    watch.Restart();

    var excelDataRefined = new Dictionary<string, IDictionary<string, decimal>>();
    foreach (IEnumerable<object> datelist in dates)
    {
        decimal num;
        IEnumerable<object> datelist1 = datelist;
        IEnumerable<object> pricelist =
            prices[dates.IndexOf(datelist1)].Select(value => value ?? 0m).Where(
                content => decimal.TryParse(content.ToString(), out num));
        Dictionary<string, decimal> dict =
            datelist1.Zip(pricelist, (k, v) => new { k, v }).ToDictionary(
                x => (string)x.k, x => decimal.Parse(x.v.ToString()));

        if (!excelDataRefined.ContainsKey(products[dates.IndexOf(datelist1)]))
        {
            excelDataRefined.Add(products[dates.IndexOf(datelist1)], dict);
        }
    }

    watch.Stop();
    Console.WriteLine("Zipped the data in: {0}s", watch.Elapsed.TotalSeconds);

    return excelDataRefined;
}

private static Dictionary<string, IDictionary<string, decimal>> Benchmark_ForEach(IEnumerable<IDictionary<string, object>> excelDataRaw)
{
    Console.WriteLine("3. Using ForEach");
    var watch = new Stopwatch();
    watch.Start();

    List<string> headers = excelDataRaw.Select(dictionary => dictionary.Keys).First().ToList();
    bool isEven = false;
    List<string> products = headers.Where(h => isEven = !isEven).ToList();
    var dates = new List<IEnumerable<object>>();
    var prices = new List<IEnumerable<object>>();

    headers.ForEach(
        field =>
        dates.Add(
            excelDataRaw.TakeWhile(x => headers.IndexOf(field) % 2 == 0).Select(col => col[field]).Where(
                row => row != null).ToList()));
    headers.ForEach(
        field =>
        prices.Add(
            excelDataRaw.TakeWhile(x => headers.IndexOf(field) % 2 == 1).Select(col => col[field] ?? 0m).
            Take(256).ToList()));
    dates.RemoveAll(x => x.Count() == 0);
    prices.RemoveAll(x => x.Count() == 0);

    watch.Stop();
    Console.WriteLine("Rearange the data in: {0}s", watch.Elapsed.TotalSeconds);
    watch.Restart();

    var excelDataRefined = new Dictionary<string, IDictionary<string, decimal>>();
    foreach (IEnumerable<object> datelist in dates)
    {
        decimal num;
        IEnumerable<object> datelist1 = datelist;
        IEnumerable<object> pricelist =
            prices[dates.IndexOf(datelist1)].Select(value => value ?? 0m).Where(
                content => decimal.TryParse(content.ToString(), out num));
        Dictionary<string, decimal> dict =
            datelist1.Zip(pricelist, (k, v) => new { k, v }).ToDictionary(
                x => (string)x.k, x => decimal.Parse(x.v.ToString()));

        if (!excelDataRefined.ContainsKey(products[dates.IndexOf(datelist1)]))
        {
            excelDataRefined.Add(products[dates.IndexOf(datelist1)], dict);
        }
    }

    watch.Stop();
    Console.WriteLine("Zipped the data in: {0}s", watch.Elapsed.TotalSeconds);

    return excelDataRefined;
}

  • Benchmark_foreach needs app. 3,5s to rearrange and 3s to zip the data.
  • Benchmark_AsParallel needs app. 12s to rearrange and 0,005s to zip the data.
  • Benchmark_ForEach needs app. 16s to rearrange and 0,005s to zip the data.

Why does it behave like this? I expected AsParallel to be the fastest because it executes in parallel instead of sequential. Ho do i optimize this?

In order for parallel computation to happen you have to have multiple processors or cores, otherwise you are just queueing up tasks in the threadpool waiting for the CPU. I.e. AsParallel on a single core machine is sequential plus the overhead of threadpool and thread context switch. Even on a two core machine, you may not get both cores, since lots of other things are running on the same machine.

Really .AsParallel() only becomes useful if you have long running tasks with blocking operations (I/O) where the OS can suspend the blocking thread and let another one run.