JuliaData · nalimilan · Jun 12, 2022 · Mar 27, 2022 · Mar 27, 2022 · Apr 8, 2022
diff --git a/NEWS.md b/NEWS.md
@@ -36,6 +36,9 @@
 * Add `allcombinations` function that returns a data frame created
   from all combinations of the passed vectors
   ([#3031](https://github.com/JuliaData/DataFrames.jl/pull/3031))
+* New `multithreaded` argument allows disabling multithreading in
+  `combine`, `select`, `select!`, `transform`, `transform!`, `subset` and `subset!`
+  ([#3030](https://github.com/JuliaData/DataFrames.jl/pull/3030))
 
 ## Previously announced breaking changes
 

diff --git a/docs/src/lib/functions.md b/docs/src/lib/functions.md
@@ -4,10 +4,13 @@ CurrentModule = DataFrames
 
 # Functions
 
-## Multi-threading support
+## Multithreading support
 
-Selected operations in DataFrames.jl automatically use multiple threads when available.
-It is task-based and implemented using the `@spawn` macro from Julia Base.
+By default, selected operations in DataFrames.jl automatically use multiple threads
+when available. It is task-based and implemented using the `@spawn` macro from Julia Base.
+Functions that take user-defined functions and may run it in parallel
+accept a `multithreaded` keyword argument which allows disabling multithreading
+when the provided function requires serial execution or is not thread-safe.
 
 This is a list of operations that currently make use of multi-threading:
 - `DataFrame` constructor with `copycols=true`; also recursively all functions

diff --git a/docs/src/lib/internals.md b/docs/src/lib/internals.md
@@ -16,6 +16,8 @@ getmaxwidths
 ourshow
 ourstrwidth
 @spawn_for_chunks
+@spawn_or_run_task
+@spawn_or_run
 default_table_transformation
 isreadonly
 ```
diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
@@ -200,15 +200,15 @@ end
     unstack(df::AbstractDataFrame, rowkeys, colkey, value;
             renamecols::Function=identity, allowmissing::Bool=false,
             allowduplicates::Bool=false, valuestransform=nothing,
-            fill=missing)
+            fill=missing, multithreaded::Bool=true)
     unstack(df::AbstractDataFrame, colkey, value;
             renamecols::Function=identity, allowmissing::Bool=false,
             allowduplicates::Bool=false, valuestransform=nothing,
-            fill=missing)
+            fill=missing, multithreaded::Bool=true)
     unstack(df::AbstractDataFrame;
             renamecols::Function=identity, allowmissing::Bool=false,
             allowduplicates::Bool=false, valuestransform=nothing,
-            fill=missing)
+            fill=missing, multithreaded::Bool=true)
 
 Unstack data frame `df`, i.e. convert it from long to wide format.
 
@@ -244,6 +244,10 @@ Row and column keys will be ordered in the order of their first appearance.
   default is `missing`. If the `value` column is a `CategoricalVector` and
   `fill` is not `missing` then in order to keep unstacked value columns also
   `CategoricalVector` the `fill` must be passed as `CategoricalValue`
+  - `multithreaded`: whether `valuestransform` may be run in separate tasks which
+  can execute in parallel (possibly being applied to multiple groups at the same time).
+  Whether or not tasks are actually spawned and their number are determined automatically.
+  Set to `false` if `valuestransform` requires serial execution or is not thread-safe.
 
 # Examples
 
@@ -396,7 +400,8 @@ julia> unstack(df, :cols, :values, valuestransform=sum)
 function unstack(df::AbstractDataFrame, rowkeys, colkey::ColumnIndex,
                  values::ColumnIndex; renamecols::Function=identity,
                  allowmissing::Bool=false, allowduplicates::Bool=false,
-                 valuestransform=nothing, fill=missing)
+                 valuestransform=nothing, fill=missing,
+                 multithreaded::Bool=true)
     # first make sure that rowkeys are unique and
     # normalize all selectors as a strings
     # if some of the selectors are wrong we will get an early error here
@@ -428,7 +433,8 @@ function unstack(df::AbstractDataFrame, rowkeys, colkey::ColumnIndex,
             # Ref that will get unwrapped by combine
             agg_fun = Ref∘valuestransform
         end
-        df_op = combine(gdf, values => agg_fun => values_out)
+        df_op = combine(gdf, values => agg_fun => values_out,
+                        multithreaded=multithreaded)
 
         group_rows = find_group_row(gdf)
         if !issorted(group_rows)
@@ -452,19 +458,23 @@ end
 function unstack(df::AbstractDataFrame, colkey::ColumnIndex, values::ColumnIndex;
                  renamecols::Function=identity,
                  allowmissing::Bool=false, allowduplicates::Bool=false,
-                 valuestransform=nothing, fill=missing)
+                 valuestransform=nothing, fill=missing,
+                 multithreaded::Bool=true)
     colkey_int = index(df)[colkey]
     value_int = index(df)[values]
     return unstack(df, Not(colkey_int, value_int), colkey_int, value_int,
             renamecols=renamecols, allowmissing=allowmissing,
-            allowduplicates=allowduplicates, valuestransform=valuestransform, fill=fill)
+            allowduplicates=allowduplicates, valuestransform=valuestransform,
+            fill=fill, multithreaded=multithreaded)
 end
 
 unstack(df::AbstractDataFrame; renamecols::Function=identity,
         allowmissing::Bool=false, allowduplicates::Bool=false,
-        valuestransform=nothing, fill=missing) =
+        valuestransform=nothing, fill=missing,
+        multithreaded::Bool=true) =
     unstack(df, :variable, :value, renamecols=renamecols, allowmissing=allowmissing,
-            allowduplicates=allowduplicates, valuestransform=valuestransform, fill=fill)
+            allowduplicates=allowduplicates, valuestransform=valuestransform,
+            fill=fill, multithreaded=multithreaded)
 
 # we take into account the fact that idx, starts and ends are computed lazily
 # so we rather directly reference the gdf.groups