ylib2to3/pgen2/grammar: Fix Grammar.dump for parallel use with Grammar.load (#1243)
Previously, bad timing could make another process run into reading a
half-written pickle cache file, and thus fail like this:
> Traceback (most recent call last):
> File "[..]/bin/yapf", line 5, in <module>
> from yapf import run_main
> File "[..]/lib/python3.11/site-packages/yapf/__init__.py", line 41, in <module>
> from yapf.yapflib import yapf_api
> File "[..]/lib/python3.11/site-packages/yapf/yapflib/yapf_api.py", line 38, in <module>
> from yapf.pyparser import pyparser
> File "[..]/lib/python3.11/site-packages/yapf/pyparser/pyparser.py", line 44, in <module>
> from yapf.yapflib import format_token
> File "[..]/lib/python3.11/site-packages/yapf/yapflib/format_token.py", line 23, in <module>
> from yapf.pytree import pytree_utils
> File "[..]/lib/python3.11/site-packages/yapf/pytree/pytree_utils.py", line 30, in <module>
> from yapf_third_party._ylib2to3 import pygram
> File "[..]/lib/python3.11/site-packages/yapf_third_party/_ylib2to3/pygram.py", line 29, in <module>
> python_grammar = driver.load_grammar(_GRAMMAR_FILE)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File "[..]/lib/python3.11/site-packages/yapf_third_party/_ylib2to3/pgen2/driver.py", line 252, in load_grammar
> g.load(gp)
> File "[..]/lib/python3.11/site-packages/yapf_third_party/_ylib2to3/pgen2/grammar.py", line 95, in load
> d = pickle.load(f)
> ^^^^^^^^^^^^^^
> EOFError: Ran out of input
diff --git a/third_party/yapf_third_party/_ylib2to3/pgen2/grammar.py b/third_party/yapf_third_party/_ylib2to3/pgen2/grammar.py
index 0840c3c..3825ce7 100644
--- a/third_party/yapf_third_party/_ylib2to3/pgen2/grammar.py
+++ b/third_party/yapf_third_party/_ylib2to3/pgen2/grammar.py
@@ -12,7 +12,9 @@
"""
# Python imports
+import os
import pickle
+import tempfile
# Local imports
from . import token
@@ -86,8 +88,39 @@
def dump(self, filename):
"""Dump the grammar tables to a pickle file."""
- with open(filename, 'wb') as f:
- pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
+ # NOTE:
+ # - We're writing a tempfile first so that there is no chance
+ # for someone to read a half-written file from this very spot
+ # while we're were not done writing.
+ # - We're using ``os.rename`` to sure not copy data around (which
+ # would get us back to square one with a reading-half-written file
+ # race condition).
+ # - We're making the tempfile go to the same directory as the eventual
+ # target ``filename`` so that there is no chance of failing from
+ # cross-file-system renames in ``os.rename``.
+ # - We're using the same prefix and suffix for the tempfile so if we
+ # ever have to leave a tempfile around for failure of deletion,
+ # it will have a reasonable filename extension and its name will help
+ # explain is nature.
+ tempfile_dir = os.path.dirname(filename)
+ tempfile_prefix, tempfile_suffix = os.path.splitext(filename)
+ with tempfile.NamedTemporaryFile(
+ mode='wb',
+ suffix=tempfile_suffix,
+ prefix=tempfile_prefix,
+ dir=tempfile_dir,
+ delete=False) as f:
+ pickle.dump(self.__dict__, f.file, pickle.HIGHEST_PROTOCOL)
+ try:
+ os.rename(f.name, filename)
+ except OSError:
+ # This makes sure that we do not leave the tempfile around
+ # unless we have to...
+ try:
+ os.remove(f.name)
+ except OSError:
+ pass
+ raise
def load(self, filename):
"""Load the grammar tables from a pickle file."""