3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial import node
7 from hg2git import setup_repo,fixup_user,get_branch,get_changeset
8 from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name
9 from optparse import OptionParser
14 if sys.platform == "win32":
15 # On Windows, sys.stdout is initially opened in text mode, which means that
16 # when a LF (\n) character is written to sys.stdout, it will be converted
17 # into CRLF (\r\n). That makes git blow up, so use this platform-specific
18 # code to change the mode of sys.stdout to binary.
20 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
22 # silly regex to catch Signed-off-by lines in log message
23 sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
24 # insert 'checkpoint' command after this many commits or none at all if 0
25 cfg_checkpoint_count=0
26 # write some progress message every this many file contents written
27 cfg_export_boundary=1000
30 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
35 sys.stdout.write('\n')
36 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
38 def checkpoint(count):
40 if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0:
41 sys.stderr.write("Checkpoint after %d commits\n" % count)
46 def revnum_to_revref(rev, old_marks):
47 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
49 return old_marks.get(rev) or ':%d' % (rev+1)
51 def file_mismatch(f1,f2):
52 """See if two revisions of a file are not equal."""
53 return node.hex(f1)!=node.hex(f2)
55 def split_dict(dleft,dright,l=[],c=[],r=[],match=file_mismatch):
56 """Loop over our repository and find all changed and missing files."""
57 for left in dleft.keys():
58 right=dright.get(left,None)
60 # we have the file but our parent hasn't: add to left set
62 elif match(dleft[left],right) or gitmode(dleft.flags(left))!=gitmode(dright.flags(left)):
63 # we have it but checksums mismatch: add to center set
65 for right in dright.keys():
66 left=dleft.get(right,None)
68 # if parent has file but we don't: add to right set
70 # change is already handled when comparing child against parent
73 def get_filechanges(repo,revision,parents,mleft):
74 """Given some repository and revision, find all changed/deleted files."""
78 mright=repo.changectx(p).manifest()
79 l,c,r=split_dict(mleft,mright,l,c,r)
85 def get_author(logmessage,committer,authors):
86 """As git distincts between author and committer of a patch, try to
87 extract author by detecting Signed-off-by lines.
89 This walks from the end of the log message towards the top skipping
90 empty lines. Upon the first non-empty line, it walks all Signed-off-by
91 lines upwards to find the first one. For that (if found), it extracts
92 authorship information the usual way (authors table, cleaning, etc.)
94 If no Signed-off-by line is found, this defaults to the committer.
96 This may sound stupid (and it somehow is), but in log messages we
97 accidentially may have lines in the middle starting with
98 "Signed-off-by: foo" and thus matching our detection regex. Prevent
101 loglines=logmessage.split('\n')
103 # from tail walk to top skipping empty lines
106 if len(loglines[i].strip())==0: continue
109 # walk further upwards to find first sob line, store in 'first'
112 m=sob_re.match(loglines[i])
116 # if the last non-empty line matches our Signed-Off-by regex: extract username
118 r=fixup_user(first.group(1),authors)
122 def export_file_contents(ctx,manifest,files,hgtags):
126 # Skip .hgtags files. They only get us in trouble.
127 if not hgtags and file == ".hgtags":
128 sys.stderr.write('Skip %s\n' % (file))
130 d=ctx.filectx(file).data()
131 wr('M %s inline %s' % (gitmode(manifest.flags(file)),file))
132 wr('data %d' % len(d)) # had some trouble with size()
135 if count%cfg_export_boundary==0:
136 sys.stderr.write('Exported %d/%d files\n' % (count,max))
137 if max>cfg_export_boundary:
138 sys.stderr.write('Exported %d/%d files\n' % (count,max))
140 def sanitize_name(name,what="branch"):
141 """Sanitize input roughly according to git-check-ref-format(1)"""
144 if name[0] == '.': return '_'+name[1:]
148 p=re.compile('([[ ~^:?*]|\.\.)')
150 if n[-1] in ('/', '.'): n=n[:-1]+'_'
151 n='/'.join(map(dot,n.split('/')))
156 sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n))
159 def export_commit(ui,repo,revision,old_marks,max,count,authors,sob,brmap,hgtags):
160 def get_branchname(name):
161 if brmap.has_key(name):
163 n=sanitize_name(name)
167 (revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors)
169 branch=get_branchname(branch)
171 parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0]
173 if len(parents)==0 and revision != 0:
174 wr('reset refs/heads/%s' % branch)
176 wr('commit refs/heads/%s' % branch)
177 wr('mark :%d' % (revision+1))
179 wr('author %s %d %s' % (get_author(desc,user,authors),time,timezone))
180 wr('committer %s %d %s' % (user,time,timezone))
181 wr('data %d' % (len(desc)+1)) # wtf?
186 # Sort the parents based on revision ids so that we always get the
187 # same resulting git repo, no matter how the revisions were
189 parents.sort(key=repo.changelog.node, reverse=True)
191 ctx=repo.changectx(str(revision))
193 added,changed,removed,type=[],[],[],''
195 if len(parents) == 0:
196 # first revision: feed in full manifest
201 wr('from %s' % revnum_to_revref(parents[0], old_marks))
202 if len(parents) == 1:
203 # later non-merge revision: feed in changed manifest
204 # if we have exactly one parent, just take the changes from the
205 # manifest without expensively comparing checksums
206 f=repo.status(repo.lookup(parents[0]),revnode)[:3]
207 added,changed,removed=f[1],f[0],f[2]
209 else: # a merge with two parents
210 wr('merge %s' % revnum_to_revref(parents[1], old_marks))
211 # later merge revision: feed in changed manifest
212 # for many files comparing checksums is expensive so only do it for
213 # merges where we really need it due to hg's revlog logic
214 added,changed,removed=get_filechanges(repo,revision,parents,man)
215 type='thorough delta'
217 sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
218 (branch,type,revision+1,max,len(added),len(changed),len(removed)))
220 map(lambda r: wr('D %s' % r),removed)
221 export_file_contents(ctx,man,added,hgtags)
222 export_file_contents(ctx,man,changed,hgtags)
225 return checkpoint(count)
227 def export_tags(ui,repo,old_marks,mapping_cache,count,authors):
230 tag=sanitize_name(tag,"tag")
231 # ignore latest revision
232 if tag=='tip': continue
233 # ignore tags to nodes that are missing (ie, 'in the future')
234 if node.encode('hex_codec') not in mapping_cache:
235 sys.stderr.write('Tag %s refers to unseen node %s\n' % (tag, node.encode('hex_codec')))
238 rev=int(mapping_cache[node.encode('hex_codec')])
240 ref=revnum_to_revref(rev, old_marks)
242 sys.stderr.write('Failed to find reference for creating tag'
243 ' %s at r%d\n' % (tag,rev))
245 sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref))
246 wr('reset refs/tags/%s' % tag)
249 count=checkpoint(count)
252 def load_authors(filename):
254 if not os.path.exists(filename):
258 lre=re.compile('^([^=]+)[ ]*=[ ]*(.+)$')
259 for line in f.readlines():
263 sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
265 # put key:value in cache, key without ^:
266 cache[m.group(1).strip()]=m.group(2).strip()
268 sys.stderr.write('Loaded %d authors\n' % l)
271 def branchtip(repo, heads):
272 '''return the tipmost branch head in heads'''
274 for h in reversed(heads):
275 if 'close' not in repo.changelog.read(h)[5]:
280 def verify_heads(ui,repo,cache,force):
282 for bn, heads in repo.branchmap().iteritems():
283 branches[bn] = branchtip(repo, heads)
284 l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
287 # get list of hg's branches to verify, don't take all git has
293 sys.stderr.write('Error: Branch [%s] modified outside hg-fast-export:'
294 '\n%s (repo) != %s (cache)\n' % (b,sha1,c))
295 if not force: return False
297 # verify that branch has exactly one head
299 for h in repo.heads():
300 (_,_,_,_,_,_,branch,_)=get_changeset(ui,repo,h)
301 if t.get(branch,False):
302 sys.stderr.write('Error: repository has at least one unnamed head: hg r%s\n' %
303 repo.changelog.rev(h))
304 if not force: return False
309 def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,authors={},sob=False,force=False,hgtags=False):
312 old_marks=load_cache(marksfile,lambda s: int(s)-1)
313 mapping_cache=load_cache(mappingfile)
314 heads_cache=load_cache(headsfile)
315 state_cache=load_cache(tipfile)
317 ui,repo=setup_repo(repourl)
319 if not verify_heads(ui,repo,heads_cache,force):
323 tip=repo.changelog.count()
324 except AttributeError:
327 min=int(state_cache.get('tip',0))
329 if _max<0 or max>tip:
332 for rev in range(0,max):
333 (revnode,_,_,_,_,_,_,_)=get_changeset(ui,repo,rev,authors)
334 mapping_cache[revnode.encode('hex_codec')] = str(rev)
339 for rev in range(min,max):
340 c=export_commit(ui,repo,rev,old_marks,max,c,authors,sob,brmap,hgtags)
342 state_cache['tip']=max
343 state_cache['repo']=repourl
344 save_cache(tipfile,state_cache)
345 save_cache(mappingfile,mapping_cache)
347 c=export_tags(ui,repo,old_marks,mapping_cache,c,authors)
349 sys.stderr.write('Issued %d commands\n' % c)
353 if __name__=='__main__':
354 def bail(parser,opt):
355 sys.stderr.write('Error: No %s option given\n' % opt)
359 parser=OptionParser()
361 parser.add_option("-m","--max",type="int",dest="max",
362 help="Maximum hg revision to import")
363 parser.add_option("--mapping",dest="mappingfile",
364 help="File to read last run's hg-to-git SHA1 mapping")
365 parser.add_option("--marks",dest="marksfile",
366 help="File to read git-fast-import's marks from")
367 parser.add_option("--heads",dest="headsfile",
368 help="File to read last run's git heads from")
369 parser.add_option("--status",dest="statusfile",
370 help="File to read status from")
371 parser.add_option("-r","--repo",dest="repourl",
372 help="URL of repo to import")
373 parser.add_option("-s",action="store_true",dest="sob",
374 default=False,help="Enable parsing Signed-off-by lines")
375 parser.add_option("--hgtags",action="store_true",dest="hgtags",
376 default=False,help="Enable exporting .hgtags files")
377 parser.add_option("-A","--authors",dest="authorfile",
378 help="Read authormap from AUTHORFILE")
379 parser.add_option("-f","--force",action="store_true",dest="force",
380 default=False,help="Ignore validation errors by force")
381 parser.add_option("-M","--default-branch",dest="default_branch",
382 help="Set the default branch")
383 parser.add_option("-o","--origin",dest="origin_name",
384 help="use <name> as namespace to track upstream")
386 (options,args)=parser.parse_args()
389 if options.max!=None: m=options.max
391 if options.marksfile==None: bail(parser,'--marks')
392 if options.mappingfile==None: bail(parser,'--mapping')
393 if options.headsfile==None: bail(parser,'--heads')
394 if options.statusfile==None: bail(parser,'--status')
395 if options.repourl==None: bail(parser,'--repo')
398 if options.authorfile!=None:
399 a=load_authors(options.authorfile)
401 if options.default_branch!=None:
402 set_default_branch(options.default_branch)
404 if options.origin_name!=None:
405 set_origin_name(options.origin_name)
407 sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,options.headsfile,
408 options.statusfile,authors=a,sob=options.sob,force=options.force,hgtags=options.hgtags))