# Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
# License: MIT <http://www.opensource.org/licenses/mit-license.php>
-from mercurial import repo,hg,cmdutil,util,ui,revlog,node
+from mercurial import node
from hg2git import setup_repo,fixup_user,get_branch,get_changeset
from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name
-from tempfile import mkstemp
from optparse import OptionParser
import re
import sys
import os
+if sys.platform == "win32":
+ # On Windows, sys.stdout is initially opened in text mode, which means that
+ # when a LF (\n) character is written to sys.stdout, it will be converted
+ # into CRLF (\r\n). That makes git blow up, so use this platform-specific
+ # code to change the mode of sys.stdout to binary.
+ import msvcrt
+ msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
+
# silly regex to catch Signed-off-by lines in log message
sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
# insert 'checkpoint' command after this many commits or none at all if 0
def gitmode(flags):
return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
+def wr_no_nl(msg=''):
+ if msg:
+ sys.stdout.write(msg)
+
def wr(msg=''):
- if msg == None:
- msg = ''
- print msg
+ wr_no_nl(msg)
+ sys.stdout.write('\n')
#map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
def checkpoint(count):
wr()
return count
-def get_parent_mark(parent,marks):
- """Get the mark for some parent.
- If we saw it in the current session, return :%d syntax and
- otherwise the SHA1 from the cache."""
- return marks.get(str(parent),':%d' % (parent+1))
+def revnum_to_revref(rev, old_marks):
+ """Convert an hg revnum to a git-fast-import rev reference (an SHA1
+ or a mark)"""
+ return old_marks.get(rev) or ':%d' % (rev+1)
def file_mismatch(f1,f2):
"""See if two revisions of a file are not equal."""
if right==None:
# we have the file but our parent hasn't: add to left set
l.append(left)
- elif match(dleft[left],right):
+ elif match(dleft[left],right) or gitmode(dleft.flags(left))!=gitmode(dright.flags(left)):
# we have it but checksums mismatch: add to center set
c.append(left)
for right in dright.keys():
return r
return committer
-def export_file_contents(ctx,manifest,files):
+def export_file_contents(ctx,manifest,files,hgtags,encoding=''):
count=0
max=len(files)
for file in files:
# Skip .hgtags files. They only get us in trouble.
- if file == ".hgtags":
+ if not hgtags and file == ".hgtags":
sys.stderr.write('Skip %s\n' % (file))
continue
d=ctx.filectx(file).data()
- wr('M %s inline %s' % (gitmode(manifest.flags(file)),file))
+ if encoding:
+ filename=file.decode(encoding).encode('utf8')
+ else:
+ filename=file
+ wr('M %s inline %s' % (gitmode(manifest.flags(file)),
+ strip_leading_slash(filename)))
wr('data %d' % len(d)) # had some trouble with size()
wr(d)
count+=1
if max>cfg_export_boundary:
sys.stderr.write('Exported %d/%d files\n' % (count,max))
-def is_merge(parents):
- c=0
- for parent in parents:
- if parent>=0:
- c+=1
- return c>1
-
def sanitize_name(name,what="branch"):
"""Sanitize input roughly according to git-check-ref-format(1)"""
return name
n=name
- p=re.compile('([[ ~^:?*]|\.\.)')
+ p=re.compile('([[ ~^:?\\\\*]|\.\.)')
n=p.sub('_', n)
- if n[-1] == '/': n=n[:-1]+'_'
+ if n[-1] in ('/', '.'): n=n[:-1]+'_'
n='/'.join(map(dot,n.split('/')))
p=re.compile('_+')
n=p.sub('_', n)
sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n))
return n
-def export_commit(ui,repo,revision,marks,mapping,heads,last,max,count,authors,sob,brmap):
+def strip_leading_slash(filename):
+ if filename[0] == '/':
+ return filename[1:]
+ return filename
+
+def export_commit(ui,repo,revision,old_marks,max,count,authors,
+ branchesmap,sob,brmap,hgtags,notes,encoding='',fn_encoding=''):
def get_branchname(name):
if brmap.has_key(name):
return brmap[name]
- n=sanitize_name(name)
+ n=sanitize_name(branchesmap.get(name,name))
brmap[name]=n
return n
- (revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors)
- parents=repo.changelog.parentrevs(revision)
+ (revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors,encoding)
branch=get_branchname(branch)
+ parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0]
+
+ if len(parents)==0 and revision != 0:
+ wr('reset refs/heads/%s' % branch)
+
wr('commit refs/heads/%s' % branch)
wr('mark :%d' % (revision+1))
if sob:
wr(desc)
wr()
- pidx1, pidx2 = 0, 1
- if parents[0] < parents[1]:
- pidx1, pidx2 = 1, 0
-
- src=heads.get(branch,'')
- link=''
- if src!='':
- # if we have a cached head, this is an incremental import: initialize it
- # and kill reference so we won't init it again
- wr('from %s' % src)
- heads[branch]=''
- sys.stderr.write('%s: Initializing to parent [%s]\n' %
- (branch,src))
- link=src # avoid making a merge commit for incremental import
- elif link=='' and not heads.has_key(branch) and revision>0:
- # newly created branch and not the first one: connect to parent
- tmp=get_parent_mark(parents[0],marks)
- wr('from %s' % tmp)
- sys.stderr.write('%s: Link new branch to parent [%s]\n' %
- (branch,tmp))
- link=tmp # avoid making a merge commit for branch fork
- elif last.get(branch,revision) != parents[pidx1] and parents[pidx1] > 0 and revision > 0:
- pm=get_parent_mark(parents[pidx1],marks)
- sys.stderr.write('%s: Placing commit [r%d] in branch [%s] on top of [r%d]\n' %
- (branch,revision,branch,parents[pidx1]));
- wr('from %s' % pm)
-
- if parents[pidx2] > 0:
- pm=get_parent_mark(parents[pidx2],marks)
- sys.stderr.write('%s: Merging with parent [%s] from [r%d]\n' %
- (branch,pm,parents[pidx2]))
- wr('merge %s' % pm)
-
- last[branch]=revision
- heads[branch]=''
- # we need this later to write out tags
- marks[str(revision)]=':%d'%(revision+1)
-
ctx=repo.changectx(str(revision))
man=ctx.manifest()
added,changed,removed,type=[],[],[],''
- if revision==0:
+ if len(parents) == 0:
# first revision: feed in full manifest
added=man.keys()
added.sort()
type='full'
- elif is_merge(parents):
- # later merge revision: feed in changed manifest
- # for many files comparing checksums is expensive so only do it for
- # merges where we really need it due to hg's revlog logic
- added,changed,removed=get_filechanges(repo,revision,parents,man)
- type='thorough delta'
else:
- # later non-merge revision: feed in changed manifest
- # if we have exactly one parent, just take the changes from the
- # manifest without expensively comparing checksums
- f=repo.status(repo.lookup(parents[0]),revnode)[:3]
- added,changed,removed=f[1],f[0],f[2]
- type='simple delta'
+ wr('from %s' % revnum_to_revref(parents[0], old_marks))
+ if len(parents) == 1:
+ # later non-merge revision: feed in changed manifest
+ # if we have exactly one parent, just take the changes from the
+ # manifest without expensively comparing checksums
+ f=repo.status(repo.lookup(parents[0]),revnode)[:3]
+ added,changed,removed=f[1],f[0],f[2]
+ type='simple delta'
+ else: # a merge with two parents
+ wr('merge %s' % revnum_to_revref(parents[1], old_marks))
+ # later merge revision: feed in changed manifest
+ # for many files comparing checksums is expensive so only do it for
+ # merges where we really need it due to hg's revlog logic
+ added,changed,removed=get_filechanges(repo,revision,parents,man)
+ type='thorough delta'
sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
(branch,type,revision+1,max,len(added),len(changed),len(removed)))
+ if fn_encoding:
+ removed=[r.decode(fn_encoding).encode('utf8') for r in removed]
+
+ removed=[strip_leading_slash(x) for x in removed]
+
map(lambda r: wr('D %s' % r),removed)
- export_file_contents(ctx,man,added)
- export_file_contents(ctx,man,changed)
+ export_file_contents(ctx,man,added,hgtags,fn_encoding)
+ export_file_contents(ctx,man,changed,hgtags,fn_encoding)
wr()
- return checkpoint(count)
+ count=checkpoint(count)
+ count=generate_note(user,time,timezone,revision,ctx,count,notes)
+ return count
-def export_tags(ui,repo,marks_cache,mapping_cache,count,authors):
+def generate_note(user,time,timezone,revision,ctx,count,notes):
+ if not notes:
+ return count
+ wr('commit refs/notes/hg')
+ wr('committer %s %d %s' % (user,time,timezone))
+ wr('data 0')
+ wr('N inline :%d' % (revision+1))
+ hg_hash=ctx.hex()
+ wr('data %d' % (len(hg_hash)))
+ wr_no_nl(hg_hash)
+ wr()
+ return checkpoint(count)
+
+def export_tags(ui,repo,old_marks,mapping_cache,count,authors,tagsmap):
l=repo.tagslist()
for tag,node in l:
- tag=sanitize_name(tag,"tag")
+ # Remap the branch name
+ tag=sanitize_name(tagsmap.get(tag,tag),"tag")
# ignore latest revision
if tag=='tip': continue
# ignore tags to nodes that are missing (ie, 'in the future')
rev=int(mapping_cache[node.encode('hex_codec')])
- ref=marks_cache.get(str(rev),':%d' % (rev))
+ ref=revnum_to_revref(rev, old_marks)
if ref==None:
sys.stderr.write('Failed to find reference for creating tag'
' %s at r%d\n' % (tag,rev))
count=checkpoint(count)
return count
-def load_authors(filename):
+def load_mapping(name, filename):
cache={}
if not os.path.exists(filename):
return cache
f=open(filename,'r')
l=0
+ a=0
lre=re.compile('^([^=]+)[ ]*=[ ]*(.+)$')
for line in f.readlines():
l+=1
+ line=line.strip()
+ if line=='' or line[0]=='#':
+ continue
m=lre.match(line)
if m==None:
sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
continue
# put key:value in cache, key without ^:
cache[m.group(1).strip()]=m.group(2).strip()
+ a+=1
f.close()
- sys.stderr.write('Loaded %d authors\n' % l)
+ sys.stderr.write('Loaded %d %s\n' % (a, name))
return cache
+def branchtip(repo, heads):
+ '''return the tipmost branch head in heads'''
+ tip = heads[-1]
+ for h in reversed(heads):
+ if 'close' not in repo.changelog.read(h)[5]:
+ tip = h
+ break
+ return tip
+
def verify_heads(ui,repo,cache,force):
- branches=repo.branchtags()
+ branches={}
+ for bn, heads in repo.branchmap().iteritems():
+ branches[bn] = branchtip(repo, heads)
l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
l.sort()
b=get_branch(b)
sha1=get_git_sha1(b)
c=cache.get(b)
- if sha1!=None and c!=None:
- sys.stderr.write('Verifying branch [%s]\n' % b)
if sha1!=c:
sys.stderr.write('Error: Branch [%s] modified outside hg-fast-export:'
'\n%s (repo) != %s (cache)\n' % (b,sha1,c))
return True
-def mangle_mark(mark):
- return str(int(mark)-1)
-
-def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,authors={},sob=False,force=False):
+def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
+ authors={},branchesmap={},tagsmap={},
+ sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding=''):
_max=int(m)
- marks_cache=load_cache(marksfile,mangle_mark)
+ old_marks=load_cache(marksfile,lambda s: int(s)-1)
mapping_cache=load_cache(mappingfile)
heads_cache=load_cache(headsfile)
state_cache=load_cache(tipfile)
c=0
- last={}
brmap={}
for rev in range(min,max):
- c=export_commit(ui,repo,rev,marks_cache,mapping_cache,heads_cache,last,max,c,authors,sob,brmap)
+ c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap,
+ sob,brmap,hgtags,notes,encoding,fn_encoding)
state_cache['tip']=max
state_cache['repo']=repourl
save_cache(tipfile,state_cache)
save_cache(mappingfile,mapping_cache)
- c=export_tags(ui,repo,marks_cache,mapping_cache,c,authors)
+ c=export_tags(ui,repo,old_marks,mapping_cache,c,authors,tagsmap)
sys.stderr.write('Issued %d commands\n' % c)
help="URL of repo to import")
parser.add_option("-s",action="store_true",dest="sob",
default=False,help="Enable parsing Signed-off-by lines")
+ parser.add_option("--hgtags",action="store_true",dest="hgtags",
+ default=False,help="Enable exporting .hgtags files")
parser.add_option("-A","--authors",dest="authorfile",
help="Read authormap from AUTHORFILE")
+ parser.add_option("-B","--branches",dest="branchesfile",
+ help="Read branch map from BRANCHESFILE")
+ parser.add_option("-T","--tags",dest="tagsfile",
+ help="Read tags map from TAGSFILE")
parser.add_option("-f","--force",action="store_true",dest="force",
default=False,help="Ignore validation errors by force")
parser.add_option("-M","--default-branch",dest="default_branch",
help="Set the default branch")
parser.add_option("-o","--origin",dest="origin_name",
help="use <name> as namespace to track upstream")
+ parser.add_option("--hg-hash",action="store_true",dest="notes",
+ default=False,help="Annotate commits with the hg hash as git notes in the hg namespace")
+ parser.add_option("-e",dest="encoding",
+ help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>")
+ parser.add_option("--fe",dest="fn_encoding",
+ help="Assume file names from Mercurial are encoded in <filename_encoding>")
(options,args)=parser.parse_args()
a={}
if options.authorfile!=None:
- a=load_authors(options.authorfile)
+ a=load_mapping('authors', options.authorfile)
+
+ b={}
+ if options.branchesfile!=None:
+ b=load_mapping('branches', options.branchesfile)
+
+ t={}
+ if options.tagsfile!=None:
+ t=load_mapping('tags', options.tagsfile)
if options.default_branch!=None:
set_default_branch(options.default_branch)
if options.origin_name!=None:
set_origin_name(options.origin_name)
- sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,options.headsfile,
- options.statusfile,authors=a,sob=options.sob,force=options.force))
+ encoding=''
+ if options.encoding!=None:
+ encoding=options.encoding
+
+ fn_encoding=encoding
+ if options.fn_encoding!=None:
+ fn_encoding=options.fn_encoding
+
+ sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,
+ options.headsfile, options.statusfile,
+ authors=a,branchesmap=b,tagsmap=t,
+ sob=options.sob,force=options.force,hgtags=options.hgtags,
+ notes=options.notes,encoding=encoding,fn_encoding=fn_encoding))