From 60d8e8fbecaa78b6430c7a755b50e1b891a45ceb Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Fri, 6 Sep 2019 23:37:02 +0300 Subject: [PATCH] erepo: fix environment before running git Fixes #2471 --- dvc/external_repo.py | 53 +++++++---------------------------------- dvc/repo/__init__.py | 14 ++++++++++- dvc/scm/__init__.py | 6 ++--- dvc/scm/base.py | 24 +++++++++++++++---- dvc/scm/git/__init__.py | 32 +++++++++++++++++++++++-- 5 files changed, 73 insertions(+), 56 deletions(-) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index cb5f38620a..3b055c920f 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -9,68 +9,31 @@ from dvc.config import Config from dvc.cache import CacheConfig -from dvc.exceptions import DvcException from dvc.utils import remove logger = logging.getLogger(__name__) -class ExternalRepoError(DvcException): - pass - - -class CloneError(ExternalRepoError): - def __init__(self, url, path, cause): - super(CloneError, self).__init__( - "Failed to clone repo '{}' to '{}'".format(url, path), cause=cause - ) - - -class RevError(ExternalRepoError): - def __init__(self, url, rev, cause): - super(RevError, self).__init__( - "Failed to access revision '{}' for repo '{}'".format(rev, url), - cause=cause, - ) - - -def _clone(cache_dir=None, **kwargs): +def _clone(cache_dir=None, url=None, rev=None, rev_lock=None): from dvc.repo import Repo _path = tempfile.mkdtemp("dvc-repo") - _clone_git_repo(_path, **kwargs) - - if cache_dir: - repo = Repo(_path) - cache_config = CacheConfig(repo.config) - cache_config.set_dir(cache_dir, level=Config.LEVEL_LOCAL) - repo.scm.close() - - return Repo(_path) - - -def _clone_git_repo(to_path, url=None, rev=None, rev_lock=None): - import git + repo = Repo.clone(url, _path, rev=(rev_lock or rev)) try: - repo = git.Repo.clone_from(url, to_path, no_single_branch=True) - except git.exc.GitCommandError as exc: - raise CloneError(url, to_path, exc) - try: - revision = rev_lock or rev - if revision: - try: - repo.git.checkout(revision) - except git.exc.GitCommandError as exc: - raise RevError(url, revision, exc) + if cache_dir: + cache_config = CacheConfig(repo.config) + cache_config.set_dir(cache_dir, level=Config.LEVEL_LOCAL) finally: repo.close() + return Repo(_path) + def _remove(repo): - repo.scm.close() + repo.close() if os.name == "nt": # git.exe may hang for a while not permitting to remove temp dir diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 2f743a6ff9..ab7d35a792 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -66,7 +66,7 @@ def __init__(self, root_dir=None): self.config = Config(self.dvc_dir) - self.scm = SCM(self.root_dir, repo=self) + self.scm = SCM(self.root_dir) self.tree = WorkingTree(self.root_dir) @@ -471,3 +471,15 @@ def _open(self, path, remote=None, mode="r", encoding=None): @cached_property def dvcignore(self): return DvcIgnoreFilter(self.root_dir) + + def close(self): + self.scm.close() + + @staticmethod + def clone(url, to_path, rev=None): + from dvc.scm.git import Git + + git = Git.clone(url, to_path, rev=rev) + git.close() + + return Repo(to_path) diff --git a/dvc/scm/__init__.py b/dvc/scm/__init__.py index 5a6347fb3d..a1d823e773 100644 --- a/dvc/scm/__init__.py +++ b/dvc/scm/__init__.py @@ -12,7 +12,7 @@ class NoSCM(Base): pass -def SCM(root_dir, repo=None): # pylint: disable=invalid-name +def SCM(root_dir): # pylint: disable=invalid-name """Returns SCM instance that corresponds to a repo at the specified path. @@ -24,6 +24,6 @@ def SCM(root_dir, repo=None): # pylint: disable=invalid-name dvc.scm.base.Base: SCM instance. """ if Git.is_repo(root_dir) or Git.is_submodule(root_dir): - return Git(root_dir, repo=repo) + return Git(root_dir) - return NoSCM(root_dir, repo=repo) + return NoSCM(root_dir) diff --git a/dvc/scm/base.py b/dvc/scm/base.py index e7cf61fa5d..a1b2bd4fa7 100644 --- a/dvc/scm/base.py +++ b/dvc/scm/base.py @@ -11,28 +11,42 @@ class SCMError(DvcException): """Base class for source control management errors.""" -class FileNotInRepoError(DvcException): +class FileNotInRepoError(SCMError): """Thrown when trying to find .gitignore for a file that is not in a scm repository. """ -class FileNotInCommitError(DvcException): +class FileNotInCommitError(SCMError): """Thrown when trying to find a file/directory that is not in the specified commit in the repository. """ -class FileNotInTargetSubdirError(DvcException): +class FileNotInTargetSubdirError(SCMError): """Thrown when trying to place .gitignore for a file that not in the file subdirectory.""" +class CloneError(SCMError): + def __init__(self, url, path, cause): + super(CloneError, self).__init__( + "Failed to clone repo '{}' to '{}'".format(url, path), cause=cause + ) + + +class RevError(SCMError): + def __init__(self, url, rev, cause): + super(RevError, self).__init__( + "Failed to access revision '{}' for repo '{}'".format(rev, url), + cause=cause, + ) + + class Base(object): """Base class for source control management driver implementations.""" - def __init__(self, root_dir=os.curdir, repo=None): - self.repo = repo + def __init__(self, root_dir=os.curdir): self.root_dir = os.path.realpath(root_dir) def __repr__(self): diff --git a/dvc/scm/git/__init__.py b/dvc/scm/git/__init__.py index 631f84744f..bf30a2359a 100644 --- a/dvc/scm/git/__init__.py +++ b/dvc/scm/git/__init__.py @@ -12,6 +12,8 @@ SCMError, FileNotInRepoError, FileNotInTargetSubdirError, + CloneError, + RevError, ) from dvc.scm.git.tree import GitTree @@ -32,11 +34,11 @@ class Git(Base): GITIGNORE = ".gitignore" GIT_DIR = ".git" - def __init__(self, root_dir=os.curdir, repo=None): + def __init__(self, root_dir=os.curdir): """Git class constructor. Requires `Repo` class from `git` module (from gitpython package). """ - super(Git, self).__init__(root_dir, repo=repo) + super(Git, self).__init__(root_dir) import git from git.exc import InvalidGitRepositoryError @@ -56,6 +58,32 @@ def __init__(self, root_dir=os.curdir, repo=None): self.ignored_paths = [] self.files_to_track = set() + @staticmethod + def clone(url, to_path, rev=None): + import git + + try: + tmp_repo = git.Repo.clone_from( + url, + to_path, + env=fix_env(None), # needed before we can fix it in __init__ + no_single_branch=True, + ) + tmp_repo.close() + except git.exc.GitCommandError as exc: + raise CloneError(url, to_path, exc) + + # NOTE: using our wrapper to make sure that env is fixed in __init__ + repo = Git(to_path) + + if rev: + try: + repo.checkout(rev) + except git.exc.GitCommandError as exc: + raise RevError(url, rev, exc) + + return repo + @staticmethod def is_repo(root_dir): return os.path.isdir(Git._get_git_dir(root_dir))