3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial import node
7 from hg2git import setup_repo,fixup_user,get_branch,get_changeset
8 from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name
9 from optparse import OptionParser
14 if sys.platform == "win32":
15 # On Windows, sys.stdout is initially opened in text mode, which means that
16 # when a LF (\n) character is written to sys.stdout, it will be converted
17 # into CRLF (\r\n). That makes git blow up, so use this platform-specific
18 # code to change the mode of sys.stdout to binary.
20 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
22 # silly regex to catch Signed-off-by lines in log message
23 sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
24 # insert 'checkpoint' command after this many commits or none at all if 0
25 cfg_checkpoint_count=0
26 # write some progress message every this many file contents written
27 cfg_export_boundary=1000
30 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
38 sys.stdout.write('\n')
39 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
41 def checkpoint(count):
43 if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0:
44 sys.stderr.write("Checkpoint after %d commits\n" % count)
49 def revnum_to_revref(rev, old_marks):
50 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
52 return old_marks.get(rev) or ':%d' % (rev+1)
54 def file_mismatch(f1,f2):
55 """See if two revisions of a file are not equal."""
56 return node.hex(f1)!=node.hex(f2)
58 def split_dict(dleft,dright,l=[],c=[],r=[],match=file_mismatch):
59 """Loop over our repository and find all changed and missing files."""
60 for left in dleft.keys():
61 right=dright.get(left,None)
63 # we have the file but our parent hasn't: add to left set
65 elif match(dleft[left],right) or gitmode(dleft.flags(left))!=gitmode(dright.flags(left)):
66 # we have it but checksums mismatch: add to center set
68 for right in dright.keys():
69 left=dleft.get(right,None)
71 # if parent has file but we don't: add to right set
73 # change is already handled when comparing child against parent
76 def get_filechanges(repo,revision,parents,mleft):
77 """Given some repository and revision, find all changed/deleted files."""
81 mright=repo.changectx(p).manifest()
82 l,c,r=split_dict(mleft,mright,l,c,r)
88 def get_author(logmessage,committer,authors):
89 """As git distincts between author and committer of a patch, try to
90 extract author by detecting Signed-off-by lines.
92 This walks from the end of the log message towards the top skipping
93 empty lines. Upon the first non-empty line, it walks all Signed-off-by
94 lines upwards to find the first one. For that (if found), it extracts
95 authorship information the usual way (authors table, cleaning, etc.)
97 If no Signed-off-by line is found, this defaults to the committer.
99 This may sound stupid (and it somehow is), but in log messages we
100 accidentially may have lines in the middle starting with
101 "Signed-off-by: foo" and thus matching our detection regex. Prevent
104 loglines=logmessage.split('\n')
106 # from tail walk to top skipping empty lines
109 if len(loglines[i].strip())==0: continue
112 # walk further upwards to find first sob line, store in 'first'
115 m=sob_re.match(loglines[i])
119 # if the last non-empty line matches our Signed-Off-by regex: extract username
121 r=fixup_user(first.group(1),authors)
125 def export_file_contents(ctx,manifest,files,hgtags,encoding=''):
129 # Skip .hgtags files. They only get us in trouble.
130 if not hgtags and file == ".hgtags":
131 sys.stderr.write('Skip %s\n' % (file))
133 d=ctx.filectx(file).data()
135 filename=file.decode(encoding).encode('utf8')
138 wr('M %s inline %s' % (gitmode(manifest.flags(file)),filename))
139 wr('data %d' % len(d)) # had some trouble with size()
142 if count%cfg_export_boundary==0:
143 sys.stderr.write('Exported %d/%d files\n' % (count,max))
144 if max>cfg_export_boundary:
145 sys.stderr.write('Exported %d/%d files\n' % (count,max))
147 def sanitize_name(name,what="branch"):
148 """Sanitize input roughly according to git-check-ref-format(1)"""
151 if name[0] == '.': return '_'+name[1:]
155 p=re.compile('([[ ~^:?\\\\*]|\.\.)')
157 if n[-1] in ('/', '.'): n=n[:-1]+'_'
158 n='/'.join(map(dot,n.split('/')))
163 sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n))
166 def export_commit(ui,repo,revision,old_marks,max,count,authors,sob,brmap,hgtags,notes,encoding=''):
167 def get_branchname(name):
168 if brmap.has_key(name):
170 n=sanitize_name(name)
174 (revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors,encoding)
176 branch=get_branchname(branch)
178 parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0]
180 if len(parents)==0 and revision != 0:
181 wr('reset refs/heads/%s' % branch)
183 wr('commit refs/heads/%s' % branch)
184 wr('mark :%d' % (revision+1))
186 wr('author %s %d %s' % (get_author(desc,user,authors),time,timezone))
187 wr('committer %s %d %s' % (user,time,timezone))
188 wr('data %d' % (len(desc)+1)) # wtf?
192 ctx=repo.changectx(str(revision))
194 added,changed,removed,type=[],[],[],''
196 if len(parents) == 0:
197 # first revision: feed in full manifest
202 wr('from %s' % revnum_to_revref(parents[0], old_marks))
203 if len(parents) == 1:
204 # later non-merge revision: feed in changed manifest
205 # if we have exactly one parent, just take the changes from the
206 # manifest without expensively comparing checksums
207 f=repo.status(repo.lookup(parents[0]),revnode)[:3]
208 added,changed,removed=f[1],f[0],f[2]
210 else: # a merge with two parents
211 wr('merge %s' % revnum_to_revref(parents[1], old_marks))
212 # later merge revision: feed in changed manifest
213 # for many files comparing checksums is expensive so only do it for
214 # merges where we really need it due to hg's revlog logic
215 added,changed,removed=get_filechanges(repo,revision,parents,man)
216 type='thorough delta'
218 sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
219 (branch,type,revision+1,max,len(added),len(changed),len(removed)))
222 removed=[r.decode(encoding).encode('utf8') for r in removed]
224 map(lambda r: wr('D %s' % r),removed)
225 export_file_contents(ctx,man,added,hgtags,encoding)
226 export_file_contents(ctx,man,changed,hgtags,encoding)
229 count=checkpoint(count)
230 count=generate_note(user,time,timezone,revision,ctx,count,notes)
233 def generate_note(user,time,timezone,revision,ctx,count,notes):
236 wr('commit refs/notes/hg')
237 wr('committer %s %d %s' % (user,time,timezone))
239 wr('N inline :%d' % (revision+1))
241 wr('data %d' % (len(hg_hash)))
244 return checkpoint(count)
246 def export_tags(ui,repo,old_marks,mapping_cache,count,authors):
249 tag=sanitize_name(tag,"tag")
250 # ignore latest revision
251 if tag=='tip': continue
252 # ignore tags to nodes that are missing (ie, 'in the future')
253 if node.encode('hex_codec') not in mapping_cache:
254 sys.stderr.write('Tag %s refers to unseen node %s\n' % (tag, node.encode('hex_codec')))
257 rev=int(mapping_cache[node.encode('hex_codec')])
259 ref=revnum_to_revref(rev, old_marks)
261 sys.stderr.write('Failed to find reference for creating tag'
262 ' %s at r%d\n' % (tag,rev))
264 sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref))
265 wr('reset refs/tags/%s' % tag)
268 count=checkpoint(count)
271 def load_authors(filename):
273 if not os.path.exists(filename):
278 lre=re.compile('^([^=]+)[ ]*=[ ]*(.+)$')
279 for line in f.readlines():
282 if line=='' or line[0]=='#':
286 sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
288 # put key:value in cache, key without ^:
289 cache[m.group(1).strip()]=m.group(2).strip()
292 sys.stderr.write('Loaded %d authors\n' % a)
295 def branchtip(repo, heads):
296 '''return the tipmost branch head in heads'''
298 for h in reversed(heads):
299 if 'close' not in repo.changelog.read(h)[5]:
304 def verify_heads(ui,repo,cache,force):
306 for bn, heads in repo.branchmap().iteritems():
307 branches[bn] = branchtip(repo, heads)
308 l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
311 # get list of hg's branches to verify, don't take all git has
317 sys.stderr.write('Error: Branch [%s] modified outside hg-fast-export:'
318 '\n%s (repo) != %s (cache)\n' % (b,sha1,c))
319 if not force: return False
321 # verify that branch has exactly one head
323 for h in repo.heads():
324 (_,_,_,_,_,_,branch,_)=get_changeset(ui,repo,h)
325 if t.get(branch,False):
326 sys.stderr.write('Error: repository has at least one unnamed head: hg r%s\n' %
327 repo.changelog.rev(h))
328 if not force: return False
333 def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,authors={},sob=False,force=False,hgtags=False,notes=False,encoding=''):
336 old_marks=load_cache(marksfile,lambda s: int(s)-1)
337 mapping_cache=load_cache(mappingfile)
338 heads_cache=load_cache(headsfile)
339 state_cache=load_cache(tipfile)
341 ui,repo=setup_repo(repourl)
343 if not verify_heads(ui,repo,heads_cache,force):
347 tip=repo.changelog.count()
348 except AttributeError:
351 min=int(state_cache.get('tip',0))
353 if _max<0 or max>tip:
356 for rev in range(0,max):
357 (revnode,_,_,_,_,_,_,_)=get_changeset(ui,repo,rev,authors)
358 mapping_cache[revnode.encode('hex_codec')] = str(rev)
363 for rev in range(min,max):
364 c=export_commit(ui,repo,rev,old_marks,max,c,authors,sob,brmap,hgtags,notes,encoding)
366 state_cache['tip']=max
367 state_cache['repo']=repourl
368 save_cache(tipfile,state_cache)
369 save_cache(mappingfile,mapping_cache)
371 c=export_tags(ui,repo,old_marks,mapping_cache,c,authors)
373 sys.stderr.write('Issued %d commands\n' % c)
377 if __name__=='__main__':
378 def bail(parser,opt):
379 sys.stderr.write('Error: No %s option given\n' % opt)
383 parser=OptionParser()
385 parser.add_option("-m","--max",type="int",dest="max",
386 help="Maximum hg revision to import")
387 parser.add_option("--mapping",dest="mappingfile",
388 help="File to read last run's hg-to-git SHA1 mapping")
389 parser.add_option("--marks",dest="marksfile",
390 help="File to read git-fast-import's marks from")
391 parser.add_option("--heads",dest="headsfile",
392 help="File to read last run's git heads from")
393 parser.add_option("--status",dest="statusfile",
394 help="File to read status from")
395 parser.add_option("-r","--repo",dest="repourl",
396 help="URL of repo to import")
397 parser.add_option("-s",action="store_true",dest="sob",
398 default=False,help="Enable parsing Signed-off-by lines")
399 parser.add_option("--hgtags",action="store_true",dest="hgtags",
400 default=False,help="Enable exporting .hgtags files")
401 parser.add_option("-A","--authors",dest="authorfile",
402 help="Read authormap from AUTHORFILE")
403 parser.add_option("-f","--force",action="store_true",dest="force",
404 default=False,help="Ignore validation errors by force")
405 parser.add_option("-M","--default-branch",dest="default_branch",
406 help="Set the default branch")
407 parser.add_option("-o","--origin",dest="origin_name",
408 help="use <name> as namespace to track upstream")
409 parser.add_option("--hg-hash",action="store_true",dest="notes",
410 default=False,help="Annotate commits with the hg hash as git notes in the hg namespace")
411 parser.add_option("-e",dest="encoding",
412 help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>")
414 (options,args)=parser.parse_args()
417 if options.max!=None: m=options.max
419 if options.marksfile==None: bail(parser,'--marks')
420 if options.mappingfile==None: bail(parser,'--mapping')
421 if options.headsfile==None: bail(parser,'--heads')
422 if options.statusfile==None: bail(parser,'--status')
423 if options.repourl==None: bail(parser,'--repo')
426 if options.authorfile!=None:
427 a=load_authors(options.authorfile)
429 if options.default_branch!=None:
430 set_default_branch(options.default_branch)
432 if options.origin_name!=None:
433 set_origin_name(options.origin_name)
436 if options.encoding!=None:
437 encoding=options.encoding
439 sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,
440 options.headsfile, options.statusfile,authors=a,
441 sob=options.sob,force=options.force,hgtags=options.hgtags,
442 notes=options.notes,encoding=encoding))