From ffc5270a6fa106fecad1a6a9f1520ca8f075c6b7 Mon Sep 17 00:00:00 2001 From: Raja R Harinath Date: Sat, 10 Jul 2010 16:20:04 +0530 Subject: Reduce size of fast-import marks file by not persisting file-level marks Use two allocators for marks, one persistent commit counter that starts at 0 and counts up commits, and a transitional counter, for files, that counts down from maxMark, and is reset on each SVN revision. Note that the marks file will still have marks for some, but not all, files. The number of such marks is limited by the size of the SVN revision that affects the most files. For instance, this changed the size of one marks file from 19M to 3.2M. fast-import issues: We currently set maxMark = (1<<20)-1. Anything large seems to trigger a bug in the sparse array dumping routine in git-fast-import in certain versions of git. --- src/repository.cpp | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'src/repository.cpp') diff --git a/src/repository.cpp b/src/repository.cpp index bd2931a..18deab5 100644 --- a/src/repository.cpp +++ b/src/repository.cpp @@ -25,6 +25,8 @@ static const int maxSimultaneousProcesses = 100; +static const int maxMark = (1 << 20) - 1; // some versions of git-fast-import are buggy for larger values of maxMark + class ProcessCache: QLinkedList { public: @@ -52,7 +54,7 @@ public: static ProcessCache processCache; Repository::Repository(const Rules::Repository &rule) - : name(rule.name), commitCount(0), outstandingTransactions(0), lastmark(0), processHasStarted(false) + : name(rule.name), commitCount(0), outstandingTransactions(0), last_commit_mark(0), next_file_mark(maxMark), processHasStarted(false) { foreach (Rules::Repository::Branch branchRule, rule.branches) { Branch branch; @@ -136,8 +138,8 @@ int Repository::setupIncremental(int resume_from) last_revnum = revnum; - if (lastmark < mark) - lastmark = mark; + if (last_commit_mark < mark) + last_commit_mark = mark; Branch &br = branches[branch]; if (!br.created || !mark || !br.marks.last()) @@ -303,6 +305,12 @@ Repository::Transaction *Repository::newTransaction(const QString &branch, const return txn; } +void Repository::forgetTransaction(Transaction *) +{ + if (!--outstandingTransactions) + next_file_mark = maxMark; +} + void Repository::createAnnotatedTag(const QString &ref, const QString &svnprefix, int revnum, const QByteArray &author, uint dt, @@ -396,7 +404,7 @@ void Repository::startFastImport() Repository::Transaction::~Transaction() { - --repository->outstandingTransactions; + repository->forgetTransaction(this); } void Repository::Transaction::setAuthor(const QByteArray &a) @@ -457,7 +465,10 @@ void Repository::Transaction::deleteFile(const QString &path) QIODevice *Repository::Transaction::addFile(const QString &path, int mode, qint64 length) { - int mark = ++repository->lastmark; + int mark = repository->next_file_mark--; + + // in case the two mark allocations meet, we might as well just abort + Q_ASSERT(mark > repository->last_commit_mark + 1); if (modifiedFiles.capacity() == 0) modifiedFiles.reserve(2048); @@ -484,7 +495,13 @@ void Repository::Transaction::commit() { processCache.touch(repository); - int mark = ++repository->lastmark; + // We might be tempted to use the SVN revision number as the fast-import commit mark. + // However, a single SVN revision can modify multple branches, and thus lead to multiple + // commits in the same repo. So, we need to maintain a separate commit mark counter. + int mark = ++repository->last_commit_mark; + + // in case the two mark allocations meet, we might as well just abort + Q_ASSERT(mark < repository->next_file_mark - 1); // create the commit message QByteArray message = log; -- cgit v1.2.1