diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5715081c5c3ee7ec8b495bc53b7d7772e272ddc8..ecbc8b745f36195b957f6654ab89b466714b1f0e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,3 +3,4 @@
 * added `write_json_to_file`
 * added `read_json_from_file`
 * added `write_json_to_file_if_different`
+* added `satella.distutils`
diff --git a/docs/distutils.rst b/docs/distutils.rst
new file mode 100644
index 0000000000000000000000000000000000000000..552c4b9d5a5d8e73b5db34cc421c825603606dc2
--- /dev/null
+++ b/docs/distutils.rst
@@ -0,0 +1,5 @@
+Distutils extensions
+====================
+
+.. autofunction:: satella.distutils.monkey_patch_parallel_compilation
+
diff --git a/docs/index.rst b/docs/index.rst
index 7a13933cb0d8dd74e3500d4710d4380590bec94c..a7eaa31a73716c897f97c2722df5b4f56d493a7d 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -38,6 +38,7 @@ Visit the project's page at GitHub_!
            processes
            cassandra
            opentracing
+           distutils
 
 
 Indices and tables
diff --git a/satella/__init__.py b/satella/__init__.py
index 702d0b12c8b6dbd385e2b9a9af9777a7f59d52b0..16d5df042c3cda7808e278181222b589f56eab59 100644
--- a/satella/__init__.py
+++ b/satella/__init__.py
@@ -1 +1 @@
-__version__ = '2.14.23_a2'
+__version__ = '2.14.23_a3'
diff --git a/satella/distutils.py b/satella/distutils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a048b9623947cf3490a220df1e1d6e149fd5a54
--- /dev/null
+++ b/satella/distutils.py
@@ -0,0 +1,43 @@
+import typing as tp
+import multiprocessing
+
+__all__ = ['monkey_patch_parallel_compilation']
+
+
+def monkey_patch_parallel_compilation(cores: tp.Optional[int] = None):
+    """
+    This monkey-patches distutils to provide parallel compilation, even if you have
+    a single extension built from multiple .c files.
+
+    Invoke in your setup.py file
+
+    :param cores: amount of cores. Leave at default (None) for autodetection.
+    """
+    if cores is None:
+        cores = multiprocessing.cpu_count()
+
+    # monkey-patch for parallel compilation
+    def parallelCCompile(self, sources, output_dir=None, macros=None, include_dirs=None, debug=0,
+                         extra_preargs=None, extra_postargs=None, depends=None):
+        # those lines are copied from distutils.ccompiler.CCompiler directly
+        macros, objects, extra_postargs, pp_opts, build = self._setup_compile(output_dir, macros,
+                                                                              include_dirs, sources,
+                                                                              depends,
+                                                                              extra_postargs)
+        cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
+        # parallel code
+        N = 2  # number of parallel compilations
+        import multiprocessing.pool
+        def _single_compile(obj):
+            try:
+                src, ext = build[obj]
+            except KeyError:
+                return
+            self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
+
+        # convert to list, imap is evaluated on-demand
+        list(multiprocessing.pool.ThreadPool(cores).imap(_single_compile, objects))
+        return objects
+
+    import distutils.ccompiler
+    distutils.ccompiler.CCompiler.compile = parallelCCompile