#!/usr/bin/env python
-# Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net>
+# Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
# License: MIT <http://www.opensource.org/licenses/mit-license.php>
-from mercurial import repo,hg,cmdutil,util,ui,revlog,node
-from hg2git import setup_repo,fixup_user,get_branch,get_changeset,load_cache,save_cache,get_git_sha1
-from tempfile import mkstemp
+from mercurial import node
+from hg2git import setup_repo,fixup_user,get_branch,get_changeset
+from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name
from optparse import OptionParser
import re
import sys
import os
+if sys.platform == "win32":
+ # On Windows, sys.stdout is initially opened in text mode, which means that
+ # when a LF (\n) character is written to sys.stdout, it will be converted
+ # into CRLF (\r\n). That makes git blow up, so use this platform-specific
+ # code to change the mode of sys.stdout to binary.
+ import msvcrt
+ msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
+
# silly regex to catch Signed-off-by lines in log message
sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
# insert 'checkpoint' command after this many commits or none at all if 0
# write some progress message every this many file contents written
cfg_export_boundary=1000
-def gitmode(x):
- return x and '100755' or '100644'
+def gitmode(flags):
+ return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
def wr(msg=''):
- if msg == None:
- msg = ''
- print msg
+ if msg:
+ sys.stdout.write(msg)
+ sys.stdout.write('\n')
#map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
def checkpoint(count):
wr()
return count
-def get_parent_mark(parent,marks):
- """Get the mark for some parent.
- If we saw it in the current session, return :%d syntax and
- otherwise the SHA1 from the cache."""
- return marks.get(str(parent),':%d' % (parent+1))
+def revnum_to_revref(rev, old_marks):
+ """Convert an hg revnum to a git-fast-import rev reference (an SHA1
+ or a mark)"""
+ return old_marks.get(rev) or ':%d' % (rev+1)
def file_mismatch(f1,f2):
"""See if two revisions of a file are not equal."""
count=0
max=len(files)
for file in files:
+ # Skip .hgtags files. They only get us in trouble.
+ if file == ".hgtags":
+ sys.stderr.write('Skip %s\n' % (file))
+ continue
d=ctx.filectx(file).data()
- wr('M %s inline %s' % (gitmode(manifest.execf(file)),file))
+ wr('M %s inline %s' % (gitmode(manifest.flags(file)),file))
wr('data %d' % len(d)) # had some trouble with size()
wr(d)
count+=1
if max>cfg_export_boundary:
sys.stderr.write('Exported %d/%d files\n' % (count,max))
-def is_merge(parents):
- c=0
- for parent in parents:
- if parent>=0:
- c+=1
- return c>1
-
def sanitize_name(name,what="branch"):
"""Sanitize input roughly according to git-check-ref-format(1)"""
n=name
p=re.compile('([[ ~^:?*]|\.\.)')
n=p.sub('_', n)
- if n[-1] == '/': n=n[:-1]+'_'
+ if n[-1] in ('/', '.'): n=n[:-1]+'_'
n='/'.join(map(dot,n.split('/')))
p=re.compile('_+')
n=p.sub('_', n)
sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n))
return n
-def export_commit(ui,repo,revision,marks,heads,last,max,count,authors,sob,brmap):
+def export_commit(ui,repo,revision,old_marks,max,count,authors,sob,brmap):
def get_branchname(name):
if brmap.has_key(name):
return brmap[name]
return n
(revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors)
- parents=repo.changelog.parentrevs(revision)
branch=get_branchname(branch)
+ parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0]
+
+ if len(parents)==0 and revision != 0:
+ wr('reset refs/heads/%s' % branch)
+
wr('commit refs/heads/%s' % branch)
wr('mark :%d' % (revision+1))
if sob:
wr(desc)
wr()
- pidx1, pidx2 = 0, 1
- if parents[0] < parents[1]:
- pidx1, pidx2 = 1, 0
-
- src=heads.get(branch,'')
- link=''
- if src!='':
- # if we have a cached head, this is an incremental import: initialize it
- # and kill reference so we won't init it again
- wr('from %s' % src)
- heads[branch]=''
- sys.stderr.write('%s: Initializing to parent [%s]\n' %
- (branch,src))
- link=src # avoid making a merge commit for incremental import
- elif link=='' and not heads.has_key(branch) and revision>0:
- # newly created branch and not the first one: connect to parent
- tmp=get_parent_mark(parents[0],marks)
- wr('from %s' % tmp)
- sys.stderr.write('%s: Link new branch to parent [%s]\n' %
- (branch,tmp))
- link=tmp # avoid making a merge commit for branch fork
- elif last.get(branch,revision) != parents[pidx1] and parents[pidx1] > 0 and revision > 0:
- pm=get_parent_mark(parents[pidx1],marks)
- sys.stderr.write('%s: Placing commit [r%d] in branch [%s] on top of [r%d]\n' %
- (branch,revision,branch,parents[pidx1]));
- wr('from %s' % pm)
-
- if parents[pidx2] > 0:
- pm=get_parent_mark(parents[pidx2],marks)
- sys.stderr.write('%s: Merging with parent [%s] from [r%d]\n' %
- (branch,pm,parents[pidx2]))
- wr('merge %s' % pm)
-
- last[branch]=revision
- heads[branch]=''
- # we need this later to write out tags
- marks[str(revision)]=':%d'%(revision+1)
+
+ # Sort the parents based on revision ids so that we always get the
+ # same resulting git repo, no matter how the revisions were
+ # numbered.
+ parents.sort(key=repo.changelog.node, reverse=True)
ctx=repo.changectx(str(revision))
man=ctx.manifest()
added,changed,removed,type=[],[],[],''
- if revision==0:
+ if len(parents) == 0:
# first revision: feed in full manifest
added=man.keys()
added.sort()
type='full'
- elif is_merge(parents):
- # later merge revision: feed in changed manifest
- # for many files comparing checksums is expensive so only do it for
- # merges where we really need it due to hg's revlog logic
- added,changed,removed=get_filechanges(repo,revision,parents,man)
- type='thorough delta'
else:
- # later non-merge revision: feed in changed manifest
- # if we have exactly one parent, just take the changes from the
- # manifest without expensively comparing checksums
- f=repo.status(repo.lookup(parents[0]),revnode)[:3]
- added,changed,removed=f[1],f[0],f[2]
- type='simple delta'
+ wr('from %s' % revnum_to_revref(parents[0], old_marks))
+ if len(parents) == 1:
+ # later non-merge revision: feed in changed manifest
+ # if we have exactly one parent, just take the changes from the
+ # manifest without expensively comparing checksums
+ f=repo.status(repo.lookup(parents[0]),revnode)[:3]
+ added,changed,removed=f[1],f[0],f[2]
+ type='simple delta'
+ else: # a merge with two parents
+ wr('merge %s' % revnum_to_revref(parents[1], old_marks))
+ # later merge revision: feed in changed manifest
+ # for many files comparing checksums is expensive so only do it for
+ # merges where we really need it due to hg's revlog logic
+ added,changed,removed=get_filechanges(repo,revision,parents,man)
+ type='thorough delta'
sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
(branch,type,revision+1,max,len(added),len(changed),len(removed)))
return checkpoint(count)
-def export_tags(ui,repo,marks_cache,start,end,count,authors):
+def export_tags(ui,repo,old_marks,mapping_cache,count,authors):
l=repo.tagslist()
for tag,node in l:
tag=sanitize_name(tag,"tag")
# ignore latest revision
if tag=='tip': continue
- rev=repo.changelog.rev(node)
- # ignore those tags not in our import range
- if rev<start or rev>=end: continue
+ # ignore tags to nodes that are missing (ie, 'in the future')
+ if node.encode('hex_codec') not in mapping_cache:
+ sys.stderr.write('Tag %s refers to unseen node %s\n' % (tag, node.encode('hex_codec')))
+ continue
+
+ rev=int(mapping_cache[node.encode('hex_codec')])
- ref=get_parent_mark(rev,marks_cache)
+ ref=revnum_to_revref(rev, old_marks)
if ref==None:
sys.stderr.write('Failed to find reference for creating tag'
' %s at r%d\n' % (tag,rev))
sys.stderr.write('Loaded %d authors\n' % l)
return cache
+def branchtip(repo, heads):
+ '''return the tipmost branch head in heads'''
+ tip = heads[-1]
+ for h in reversed(heads):
+ if not repo[h].closesbranch():
+ tip = h
+ break
+ return tip
+
def verify_heads(ui,repo,cache,force):
- branches=repo.branchtags()
+ branches={}
+ for bn, heads in repo.branchmap().iteritems():
+ branches[bn] = branchtip(repo, heads)
l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
l.sort()
b=get_branch(b)
sha1=get_git_sha1(b)
c=cache.get(b)
- if sha1!=None and c!=None:
- sys.stderr.write('Verifying branch [%s]\n' % b)
if sha1!=c:
sys.stderr.write('Error: Branch [%s] modified outside hg-fast-export:'
'\n%s (repo) != %s (cache)\n' % (b,sha1,c))
return True
-def mangle_mark(mark):
- return str(int(mark)-1)
-
-def hg2git(repourl,m,marksfile,headsfile,tipfile,authors={},sob=False,force=False):
+def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,authors={},sob=False,force=False):
_max=int(m)
- marks_cache=load_cache(marksfile,mangle_mark)
+ old_marks=load_cache(marksfile,lambda s: int(s)-1)
+ mapping_cache=load_cache(mappingfile)
heads_cache=load_cache(headsfile)
state_cache=load_cache(tipfile)
if not verify_heads(ui,repo,heads_cache,force):
return 1
- tip=repo.changelog.count()
+ try:
+ tip=repo.changelog.count()
+ except AttributeError:
+ tip=len(repo)
min=int(state_cache.get('tip',0))
max=_max
if _max<0 or max>tip:
max=tip
+ for rev in range(0,max):
+ (revnode,_,_,_,_,_,_,_)=get_changeset(ui,repo,rev,authors)
+ mapping_cache[revnode.encode('hex_codec')] = str(rev)
+
+
c=0
- last={}
brmap={}
for rev in range(min,max):
- c=export_commit(ui,repo,rev,marks_cache,heads_cache,last,max,c,authors,sob,brmap)
-
- c=export_tags(ui,repo,marks_cache,min,max,c,authors)
-
- sys.stderr.write('Issued %d commands\n' % c)
+ c=export_commit(ui,repo,rev,old_marks,max,c,authors,sob,brmap)
state_cache['tip']=max
state_cache['repo']=repourl
save_cache(tipfile,state_cache)
+ save_cache(mappingfile,mapping_cache)
+
+ c=export_tags(ui,repo,old_marks,mapping_cache,c,authors)
+
+ sys.stderr.write('Issued %d commands\n' % c)
return 0
parser.add_option("-m","--max",type="int",dest="max",
help="Maximum hg revision to import")
+ parser.add_option("--mapping",dest="mappingfile",
+ help="File to read last run's hg-to-git SHA1 mapping")
parser.add_option("--marks",dest="marksfile",
help="File to read git-fast-import's marks from")
parser.add_option("--heads",dest="headsfile",
help="Read authormap from AUTHORFILE")
parser.add_option("-f","--force",action="store_true",dest="force",
default=False,help="Ignore validation errors by force")
+ parser.add_option("-M","--default-branch",dest="default_branch",
+ help="Set the default branch")
+ parser.add_option("-o","--origin",dest="origin_name",
+ help="use <name> as namespace to track upstream")
(options,args)=parser.parse_args()
if options.max!=None: m=options.max
if options.marksfile==None: bail(parser,'--marks')
+ if options.mappingfile==None: bail(parser,'--mapping')
if options.headsfile==None: bail(parser,'--heads')
if options.statusfile==None: bail(parser,'--status')
if options.repourl==None: bail(parser,'--repo')
if options.authorfile!=None:
a=load_authors(options.authorfile)
- sys.exit(hg2git(options.repourl,m,options.marksfile,options.headsfile,
+ if options.default_branch!=None:
+ set_default_branch(options.default_branch)
+
+ if options.origin_name!=None:
+ set_origin_name(options.origin_name)
+
+ sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,options.headsfile,
options.statusfile,authors=a,sob=options.sob,force=options.force))