diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2dda6132bb1219870a26b0b197ec4b52c055d439..7f07370323e46b4577e0822ab4feee42f52596d4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # v1.3.1
 
-* _TBA_
+* Cython build step ("cythonize") will be parallelized by default
 
 # v1.3
 
diff --git a/snakehouse/__init__.py b/snakehouse/__init__.py
index 22a747f2be629545c2770e432d07263356597189..32f790990b09014edfb07005db9534e8e8698c10 100644
--- a/snakehouse/__init__.py
+++ b/snakehouse/__init__.py
@@ -2,4 +2,4 @@ from .build import build
 from .multibuild import Multibuild
 from .faster_builds import monkey_patch_parallel_compilation
 
-__version__ = '1.3.1a1'
+__version__ = '1.3.1'
diff --git a/snakehouse/build.py b/snakehouse/build.py
index 6d333fb651d3d16a8e4cd9ed6aa8e5767a0fe1c9..d8b4333cf6c712829b97ab82c7fa04bb2bc2bc3b 100644
--- a/snakehouse/build.py
+++ b/snakehouse/build.py
@@ -1,4 +1,6 @@
 import logging
+import multiprocessing
+import sys
 import typing as tp
 from Cython.Build import cythonize
 from setuptools import Extension
@@ -9,7 +11,10 @@ MultiBuildType = tp.Union[Multibuild, Exception]
 logger = logging.getLogger(__name__)
 
 
-def build(extensions: tp.List[MultiBuildType], *args, **kwargs):
+def build(extensions: tp.List[MultiBuildType], *args, nthreads=None, **kwargs):
+    if nthreads is None:
+        nthreads = multiprocessing.cpu_count()
+    kwargs['nthreads'] = nthreads
     returns = []
     multi_builds = []
     for multi_build in extensions: