# Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
# License: MIT <http://www.opensource.org/licenses/mit-license.php>
-from mercurial import repo,hg,cmdutil,util,ui,revlog,node
+from mercurial import node
from hg2git import setup_repo,fixup_user,get_branch,get_changeset
from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name
-from tempfile import mkstemp
from optparse import OptionParser
import re
import sys
import os
+if sys.platform == "win32":
+ # On Windows, sys.stdout is initially opened in text mode, which means that
+ # when a LF (\n) character is written to sys.stdout, it will be converted
+ # into CRLF (\r\n). That makes git blow up, so use this platform-specific
+ # code to change the mode of sys.stdout to binary.
+ import msvcrt
+ msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
+
# silly regex to catch Signed-off-by lines in log message
sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
# insert 'checkpoint' command after this many commits or none at all if 0
def gitmode(flags):
return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
-def wr(msg=''):
+def wr_no_nl(msg=''):
if msg:
sys.stdout.write(msg)
+
+def wr(msg=''):
+ wr_no_nl(msg)
sys.stdout.write('\n')
#map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
if right==None:
# we have the file but our parent hasn't: add to left set
l.append(left)
- elif match(dleft[left],right):
+ elif match(dleft[left],right) or gitmode(dleft.flags(left))!=gitmode(dright.flags(left)):
# we have it but checksums mismatch: add to center set
c.append(left)
for right in dright.keys():
return r
return committer
-def export_file_contents(ctx,manifest,files):
+def export_file_contents(ctx,manifest,files,hgtags,encoding=''):
count=0
max=len(files)
for file in files:
# Skip .hgtags files. They only get us in trouble.
- if file == ".hgtags":
+ if not hgtags and file == ".hgtags":
sys.stderr.write('Skip %s\n' % (file))
continue
d=ctx.filectx(file).data()
- wr('M %s inline %s' % (gitmode(manifest.flags(file)),file))
+ if encoding:
+ filename=file.decode(encoding).encode('utf8')
+ else:
+ filename=file
+ wr('M %s inline %s' % (gitmode(manifest.flags(file)),
+ strip_leading_slash(filename)))
wr('data %d' % len(d)) # had some trouble with size()
wr(d)
count+=1
return name
n=name
- p=re.compile('([[ ~^:?*]|\.\.)')
+ p=re.compile('([[ ~^:?\\\\*]|\.\.)')
n=p.sub('_', n)
if n[-1] in ('/', '.'): n=n[:-1]+'_'
n='/'.join(map(dot,n.split('/')))
sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n))
return n
-def export_commit(ui,repo,revision,old_marks,max,count,authors,sob,brmap):
+def strip_leading_slash(filename):
+ if filename[0] == '/':
+ return filename[1:]
+ return filename
+
+def export_commit(ui,repo,revision,old_marks,max,count,authors,
+ branchesmap,sob,brmap,hgtags,notes,encoding='',fn_encoding=''):
def get_branchname(name):
if brmap.has_key(name):
return brmap[name]
- n=sanitize_name(name)
+ n=sanitize_name(branchesmap.get(name,name))
brmap[name]=n
return n
- (revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors)
+ (revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors,encoding)
branch=get_branchname(branch)
wr(desc)
wr()
-
- # Sort the parents based on revision ids so that we always get the
- # same resulting git repo, no matter how the revisions were
- # numbered.
- parents.sort(key=repo.changelog.node, reverse=True)
-
ctx=repo.changectx(str(revision))
man=ctx.manifest()
added,changed,removed,type=[],[],[],''
sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
(branch,type,revision+1,max,len(added),len(changed),len(removed)))
+ if fn_encoding:
+ removed=[r.decode(fn_encoding).encode('utf8') for r in removed]
+
+ removed=[strip_leading_slash(x) for x in removed]
+
map(lambda r: wr('D %s' % r),removed)
- export_file_contents(ctx,man,added)
- export_file_contents(ctx,man,changed)
+ export_file_contents(ctx,man,added,hgtags,fn_encoding)
+ export_file_contents(ctx,man,changed,hgtags,fn_encoding)
wr()
- return checkpoint(count)
+ count=checkpoint(count)
+ count=generate_note(user,time,timezone,revision,ctx,count,notes)
+ return count
-def export_tags(ui,repo,old_marks,mapping_cache,count,authors):
+def generate_note(user,time,timezone,revision,ctx,count,notes):
+ if not notes:
+ return count
+ wr('commit refs/notes/hg')
+ wr('committer %s %d %s' % (user,time,timezone))
+ wr('data 0')
+ wr('N inline :%d' % (revision+1))
+ hg_hash=ctx.hex()
+ wr('data %d' % (len(hg_hash)))
+ wr_no_nl(hg_hash)
+ wr()
+ return checkpoint(count)
+
+def export_tags(ui,repo,old_marks,mapping_cache,count,authors,tagsmap):
l=repo.tagslist()
for tag,node in l:
- tag=sanitize_name(tag,"tag")
+ # Remap the branch name
+ tag=sanitize_name(tagsmap.get(tag,tag),"tag")
# ignore latest revision
if tag=='tip': continue
# ignore tags to nodes that are missing (ie, 'in the future')
count=checkpoint(count)
return count
-def load_authors(filename):
+def load_mapping(name, filename):
cache={}
if not os.path.exists(filename):
return cache
f=open(filename,'r')
l=0
+ a=0
lre=re.compile('^([^=]+)[ ]*=[ ]*(.+)$')
for line in f.readlines():
l+=1
+ line=line.strip()
+ if line=='' or line[0]=='#':
+ continue
m=lre.match(line)
if m==None:
sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
continue
# put key:value in cache, key without ^:
cache[m.group(1).strip()]=m.group(2).strip()
+ a+=1
f.close()
- sys.stderr.write('Loaded %d authors\n' % l)
+ sys.stderr.write('Loaded %d %s\n' % (a, name))
return cache
+def branchtip(repo, heads):
+ '''return the tipmost branch head in heads'''
+ tip = heads[-1]
+ for h in reversed(heads):
+ if 'close' not in repo.changelog.read(h)[5]:
+ tip = h
+ break
+ return tip
+
def verify_heads(ui,repo,cache,force):
- branches=repo.branchtags()
+ branches={}
+ for bn, heads in repo.branchmap().iteritems():
+ branches[bn] = branchtip(repo, heads)
l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
l.sort()
return True
-def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,authors={},sob=False,force=False):
+def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
+ authors={},branchesmap={},tagsmap={},
+ sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding=''):
_max=int(m)
old_marks=load_cache(marksfile,lambda s: int(s)-1)
c=0
brmap={}
for rev in range(min,max):
- c=export_commit(ui,repo,rev,old_marks,max,c,authors,sob,brmap)
+ c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap,
+ sob,brmap,hgtags,notes,encoding,fn_encoding)
state_cache['tip']=max
state_cache['repo']=repourl
save_cache(tipfile,state_cache)
save_cache(mappingfile,mapping_cache)
- c=export_tags(ui,repo,old_marks,mapping_cache,c,authors)
+ c=export_tags(ui,repo,old_marks,mapping_cache,c,authors,tagsmap)
sys.stderr.write('Issued %d commands\n' % c)
help="URL of repo to import")
parser.add_option("-s",action="store_true",dest="sob",
default=False,help="Enable parsing Signed-off-by lines")
+ parser.add_option("--hgtags",action="store_true",dest="hgtags",
+ default=False,help="Enable exporting .hgtags files")
parser.add_option("-A","--authors",dest="authorfile",
help="Read authormap from AUTHORFILE")
+ parser.add_option("-B","--branches",dest="branchesfile",
+ help="Read branch map from BRANCHESFILE")
+ parser.add_option("-T","--tags",dest="tagsfile",
+ help="Read tags map from TAGSFILE")
parser.add_option("-f","--force",action="store_true",dest="force",
default=False,help="Ignore validation errors by force")
parser.add_option("-M","--default-branch",dest="default_branch",
help="Set the default branch")
parser.add_option("-o","--origin",dest="origin_name",
help="use <name> as namespace to track upstream")
+ parser.add_option("--hg-hash",action="store_true",dest="notes",
+ default=False,help="Annotate commits with the hg hash as git notes in the hg namespace")
+ parser.add_option("-e",dest="encoding",
+ help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>")
+ parser.add_option("--fe",dest="fn_encoding",
+ help="Assume file names from Mercurial are encoded in <filename_encoding>")
(options,args)=parser.parse_args()
a={}
if options.authorfile!=None:
- a=load_authors(options.authorfile)
+ a=load_mapping('authors', options.authorfile)
+
+ b={}
+ if options.branchesfile!=None:
+ b=load_mapping('branches', options.branchesfile)
+
+ t={}
+ if options.tagsfile!=None:
+ t=load_mapping('tags', options.tagsfile)
if options.default_branch!=None:
set_default_branch(options.default_branch)
if options.origin_name!=None:
set_origin_name(options.origin_name)
- sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,options.headsfile,
- options.statusfile,authors=a,sob=options.sob,force=options.force))
+ encoding=''
+ if options.encoding!=None:
+ encoding=options.encoding
+
+ fn_encoding=encoding
+ if options.fn_encoding!=None:
+ fn_encoding=options.fn_encoding
+
+ sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,
+ options.headsfile, options.statusfile,
+ authors=a,branchesmap=b,tagsmap=t,
+ sob=options.sob,force=options.force,hgtags=options.hgtags,
+ notes=options.notes,encoding=encoding,fn_encoding=fn_encoding))