3 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
4 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
6 from mercurial import repo,hg,cmdutil,util,ui,revlog,node
7 from hg2git import setup_repo,fixup_user,get_branch,get_changeset
8 from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name
9 from tempfile import mkstemp
10 from optparse import OptionParser
15 # silly regex to catch Signed-off-by lines in log message
16 sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
17 # insert 'checkpoint' command after this many commits or none at all if 0
18 cfg_checkpoint_count=0
19 # write some progress message every this many file contents written
20 cfg_export_boundary=1000
23 return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
29 #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
31 def checkpoint(count):
33 if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0:
34 sys.stderr.write("Checkpoint after %d commits\n" % count)
39 def get_parent_mark(parent,marks):
40 """Get the mark for some parent.
41 If we saw it in the current session, return :%d syntax and
42 otherwise the SHA1 from the cache."""
43 return marks.get(str(parent),':%d' % (parent+1))
45 def file_mismatch(f1,f2):
46 """See if two revisions of a file are not equal."""
47 return node.hex(f1)!=node.hex(f2)
49 def split_dict(dleft,dright,l=[],c=[],r=[],match=file_mismatch):
50 """Loop over our repository and find all changed and missing files."""
51 for left in dleft.keys():
52 right=dright.get(left,None)
54 # we have the file but our parent hasn't: add to left set
56 elif match(dleft[left],right):
57 # we have it but checksums mismatch: add to center set
59 for right in dright.keys():
60 left=dleft.get(right,None)
62 # if parent has file but we don't: add to right set
64 # change is already handled when comparing child against parent
67 def get_filechanges(repo,revision,parents,mleft):
68 """Given some repository and revision, find all changed/deleted files."""
72 mright=repo.changectx(p).manifest()
73 l,c,r=split_dict(mleft,mright,l,c,r)
79 def get_author(logmessage,committer,authors):
80 """As git distincts between author and committer of a patch, try to
81 extract author by detecting Signed-off-by lines.
83 This walks from the end of the log message towards the top skipping
84 empty lines. Upon the first non-empty line, it walks all Signed-off-by
85 lines upwards to find the first one. For that (if found), it extracts
86 authorship information the usual way (authors table, cleaning, etc.)
88 If no Signed-off-by line is found, this defaults to the committer.
90 This may sound stupid (and it somehow is), but in log messages we
91 accidentially may have lines in the middle starting with
92 "Signed-off-by: foo" and thus matching our detection regex. Prevent
95 loglines=logmessage.split('\n')
97 # from tail walk to top skipping empty lines
100 if len(loglines[i].strip())==0: continue
103 # walk further upwards to find first sob line, store in 'first'
106 m=sob_re.match(loglines[i])
110 # if the last non-empty line matches our Signed-Off-by regex: extract username
112 r=fixup_user(first.group(1),authors)
116 def export_file_contents(ctx,manifest,files):
120 # Skip .hgtags files. They only get us in trouble.
121 if file == ".hgtags":
122 sys.stderr.write('Skip %s\n' % (file))
124 d=ctx.filectx(file).data()
125 wr('M %s inline %s' % (gitmode(manifest.flags(file)),file))
126 wr('data %d' % len(d)) # had some trouble with size()
129 if count%cfg_export_boundary==0:
130 sys.stderr.write('Exported %d/%d files\n' % (count,max))
131 if max>cfg_export_boundary:
132 sys.stderr.write('Exported %d/%d files\n' % (count,max))
134 def is_merge(parents):
136 for parent in parents:
141 def sanitize_name(name,what="branch"):
142 """Sanitize input roughly according to git-check-ref-format(1)"""
145 if name[0] == '.': return '_'+name[1:]
149 p=re.compile('([[ ~^:?*]|\.\.)')
151 if n[-1] == '/': n=n[:-1]+'_'
152 n='/'.join(map(dot,n.split('/')))
157 sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n))
160 def export_commit(ui,repo,revision,marks,mapping,heads,last,max,count,authors,sob,brmap):
161 def get_branchname(name):
162 if brmap.has_key(name):
164 n=sanitize_name(name)
168 (revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors)
169 parents=repo.changelog.parentrevs(revision)
171 branch=get_branchname(branch)
173 wr('commit refs/heads/%s' % branch)
174 wr('mark :%d' % (revision+1))
176 wr('author %s %d %s' % (get_author(desc,user,authors),time,timezone))
177 wr('committer %s %d %s' % (user,time,timezone))
178 wr('data %d' % (len(desc)+1)) # wtf?
183 if parents[0] < parents[1]:
187 if revision==0: full_rev=True
189 src=heads.get(branch,'')
192 # if we have a cached head, this is an incremental import: initialize it
193 # and kill reference so we won't init it again
196 sys.stderr.write('%s: Initializing to parent [%s]\n' %
198 link=src # avoid making a merge commit for incremental import
199 elif link=='' and not heads.has_key(branch) and revision>0:
201 # newly created branch with parent: connect to parent
202 tmp=get_parent_mark(parents[0],marks)
204 sys.stderr.write('%s: Link new branch to parent [%s]\n' %
206 link=tmp # avoid making a merge commit for branch fork
208 # newly created branch without parent: feed full revision
210 elif last.get(branch,revision) != parents[pidx1] and parents[pidx1] > 0 and revision > 0:
211 pm=get_parent_mark(parents[pidx1],marks)
212 sys.stderr.write('%s: Placing commit [r%d] in branch [%s] on top of [r%d]\n' %
213 (branch,revision,branch,parents[pidx1]));
216 if parents[pidx2] > 0:
217 pm=get_parent_mark(parents[pidx2],marks)
218 sys.stderr.write('%s: Merging with parent [%s] from [r%d]\n' %
219 (branch,pm,parents[pidx2]))
222 last[branch]=revision
224 # we need this later to write out tags
225 marks[str(revision)]=':%d'%(revision+1)
227 ctx=repo.changectx(str(revision))
229 added,changed,removed,type=[],[],[],''
232 # first revision: feed in full manifest
236 elif is_merge(parents):
237 # later merge revision: feed in changed manifest
238 # for many files comparing checksums is expensive so only do it for
239 # merges where we really need it due to hg's revlog logic
240 added,changed,removed=get_filechanges(repo,revision,parents,man)
241 type='thorough delta'
243 # later non-merge revision: feed in changed manifest
244 # if we have exactly one parent, just take the changes from the
245 # manifest without expensively comparing checksums
246 f=repo.status(repo.lookup(parents[0]),revnode)[:3]
247 added,changed,removed=f[1],f[0],f[2]
250 sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
251 (branch,type,revision+1,max,len(added),len(changed),len(removed)))
253 map(lambda r: wr('D %s' % r),removed)
254 export_file_contents(ctx,man,added)
255 export_file_contents(ctx,man,changed)
258 return checkpoint(count)
260 def export_tags(ui,repo,marks_cache,mapping_cache,count,authors):
263 tag=sanitize_name(tag,"tag")
264 # ignore latest revision
265 if tag=='tip': continue
266 # ignore tags to nodes that are missing (ie, 'in the future')
267 if node.encode('hex_codec') not in mapping_cache:
268 sys.stderr.write('Tag %s refers to unseen node %s\n' % (tag, node.encode('hex_codec')))
271 rev=int(mapping_cache[node.encode('hex_codec')])
273 ref=marks_cache.get(str(rev),':%d' % (rev))
275 sys.stderr.write('Failed to find reference for creating tag'
276 ' %s at r%d\n' % (tag,rev))
278 sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref))
279 wr('reset refs/tags/%s' % tag)
282 count=checkpoint(count)
285 def load_authors(filename):
287 if not os.path.exists(filename):
291 lre=re.compile('^([^=]+)[ ]*=[ ]*(.+)$')
292 for line in f.readlines():
296 sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
298 # put key:value in cache, key without ^:
299 cache[m.group(1).strip()]=m.group(2).strip()
301 sys.stderr.write('Loaded %d authors\n' % l)
304 def verify_heads(ui,repo,cache,force):
305 branches=repo.branchtags()
306 l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
309 # get list of hg's branches to verify, don't take all git has
314 if sha1!=None and c!=None:
315 sys.stderr.write('Verifying branch [%s]\n' % b)
317 sys.stderr.write('Error: Branch [%s] modified outside hg-fast-export:'
318 '\n%s (repo) != %s (cache)\n' % (b,sha1,c))
319 if not force: return False
321 # verify that branch has exactly one head
323 for h in repo.heads():
324 (_,_,_,_,_,_,branch,_)=get_changeset(ui,repo,h)
325 if t.get(branch,False):
326 sys.stderr.write('Error: repository has at least one unnamed head: hg r%s\n' %
327 repo.changelog.rev(h))
328 if not force: return False
333 def mangle_mark(mark):
334 return str(int(mark)-1)
336 def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,authors={},sob=False,force=False):
339 marks_cache=load_cache(marksfile,mangle_mark)
340 mapping_cache=load_cache(mappingfile)
341 heads_cache=load_cache(headsfile)
342 state_cache=load_cache(tipfile)
344 ui,repo=setup_repo(repourl)
346 if not verify_heads(ui,repo,heads_cache,force):
350 tip=repo.changelog.count()
351 except AttributeError:
354 min=int(state_cache.get('tip',0))
356 if _max<0 or max>tip:
359 for rev in range(0,max):
360 (revnode,_,_,_,_,_,_,_)=get_changeset(ui,repo,rev,authors)
361 mapping_cache[revnode.encode('hex_codec')] = str(rev)
367 for rev in range(min,max):
368 c=export_commit(ui,repo,rev,marks_cache,mapping_cache,heads_cache,last,max,c,authors,sob,brmap)
370 state_cache['tip']=max
371 state_cache['repo']=repourl
372 save_cache(tipfile,state_cache)
373 save_cache(mappingfile,mapping_cache)
375 c=export_tags(ui,repo,marks_cache,mapping_cache,c,authors)
377 sys.stderr.write('Issued %d commands\n' % c)
381 if __name__=='__main__':
382 def bail(parser,opt):
383 sys.stderr.write('Error: No %s option given\n' % opt)
387 parser=OptionParser()
389 parser.add_option("-m","--max",type="int",dest="max",
390 help="Maximum hg revision to import")
391 parser.add_option("--mapping",dest="mappingfile",
392 help="File to read last run's hg-to-git SHA1 mapping")
393 parser.add_option("--marks",dest="marksfile",
394 help="File to read git-fast-import's marks from")
395 parser.add_option("--heads",dest="headsfile",
396 help="File to read last run's git heads from")
397 parser.add_option("--status",dest="statusfile",
398 help="File to read status from")
399 parser.add_option("-r","--repo",dest="repourl",
400 help="URL of repo to import")
401 parser.add_option("-s",action="store_true",dest="sob",
402 default=False,help="Enable parsing Signed-off-by lines")
403 parser.add_option("-A","--authors",dest="authorfile",
404 help="Read authormap from AUTHORFILE")
405 parser.add_option("-f","--force",action="store_true",dest="force",
406 default=False,help="Ignore validation errors by force")
407 parser.add_option("-M","--default-branch",dest="default_branch",
408 help="Set the default branch")
409 parser.add_option("-o","--origin",dest="origin_name",
410 help="use <name> as namespace to track upstream")
412 (options,args)=parser.parse_args()
415 if options.max!=None: m=options.max
417 if options.marksfile==None: bail(parser,'--marks')
418 if options.mappingfile==None: bail(parser,'--mapping')
419 if options.headsfile==None: bail(parser,'--heads')
420 if options.statusfile==None: bail(parser,'--status')
421 if options.repourl==None: bail(parser,'--repo')
424 if options.authorfile!=None:
425 a=load_authors(options.authorfile)
427 if options.default_branch!=None:
428 set_default_branch(options.default_branch)
430 if options.origin_name!=None:
431 set_origin_name(options.origin_name)
433 sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,options.headsfile,
434 options.statusfile,authors=a,sob=options.sob,force=options.force))