From: Chris Lee Date: Sun, 14 Jan 2007 13:16:29 +0000 (-0800) Subject: Adding initial rev of my svn-fast-export app, in Python and C. X-Git-Url: http://crossforests.com/gitweb?a=commitdiff_plain;h=0f33b8fe9e74cff03480c117b60f8c57d35b0fef;p=python%2Ffast-export.git Adding initial rev of my svn-fast-export app, in Python and C. --- 0f33b8fe9e74cff03480c117b60f8c57d35b0fef diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6a84cd7 --- /dev/null +++ b/Makefile @@ -0,0 +1,5 @@ +SVN = /usr/local/svn +CFLAGS = -I/usr/include/apr-1.0 -I${SVN}/include/subversion-1 -pipe -g3 -std=c99 +LDFLAGS = -L${SVN}/lib -lsvn_repos-1 + +svn-fast-export: svn-fast-export.c diff --git a/svn-fast-export.c b/svn-fast-export.c new file mode 100644 index 0000000..ce4d4d0 --- /dev/null +++ b/svn-fast-export.c @@ -0,0 +1,169 @@ +/* + * svn-fast-export.c + * ---------- + * Walk through each revision of a local Subversion repository and export it + * in a stream that git-fast-import can consume. + * + * Author: Chris Lee + * License: MIT + */ + +#include +#include +#include + +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +#include +#include +#include + +#include +#include +#include + +#undef SVN_ERR +#define SVN_ERR(expr) SVN_INT_ERR(expr) +#define apr_sane_push(arr, contents) *(char **)apr_array_push(arr) = contents + +#define TRUNK "/trunk/" + +int dump_blob(svn_fs_root_t *root, char *full_path, apr_pool_t *pool) +{ + svn_filesize_t stream_length; + svn_stream_t *stream; + apr_size_t len; + char buf[8]; + + SVN_ERR(svn_fs_file_length(&stream_length, root, full_path, pool)); + SVN_ERR(svn_fs_file_contents(&stream, root, full_path, pool)); + + fprintf(stdout, "data %li\n", stream_length); + + do { + len = sizeof(buf); + SVN_ERR(svn_stream_read(stream, buf, &len)); + fprintf (stdout, "%s\0", buf); + } while (len); + + fprintf(stdout, "\n"); + + return 0; +} + +int export_revision(svn_revnum_t rev, svn_repos_t *repo, svn_fs_t *fs, apr_pool_t *pool) +{ + apr_array_header_t *file_changes; + apr_hash_t *changes, *props; + apr_hash_index_t *i; + apr_pool_t *revpool; + + svn_fs_path_change_t *change; + svn_fs_root_t *root_obj; + svn_boolean_t is_dir; + + char *path, *file_change; + unsigned int mark; + const void *key; + void *val; + + fprintf(stderr, "Committing revision %li... ", rev); + + SVN_ERR(svn_fs_revision_root(&root_obj, fs, rev, pool)); + SVN_ERR(svn_fs_paths_changed(&changes, root_obj, pool)); + SVN_ERR(svn_fs_revision_proplist(&props, fs, rev, pool)); + + revpool = svn_pool_create(pool); + + file_changes = apr_array_make(pool, apr_hash_count(changes), sizeof(char *)); + mark = 1; + for (i = apr_hash_first(pool, changes); i; i = apr_hash_next(i)) { + svn_pool_clear(revpool); + apr_hash_this(i, &key, NULL, &val); + path = (char *)key; + change = (svn_fs_path_change_t *)val; + + SVN_ERR(svn_fs_is_dir(&is_dir, root_obj, path, revpool)); + + if (is_dir || strncmp(TRUNK, path, strlen(TRUNK))) { + continue; + } + + if (change->change_kind == svn_fs_path_change_delete) { + fprintf(stderr, " delete\n"); + *(char **)apr_array_push(file_changes) = ((char *)svn_string_createf(pool, "D %s", path + strlen(TRUNK))->data); + fprintf(stderr, " done\n"); + } else { + *(char **)apr_array_push(file_changes) = (char *)svn_string_createf(pool, "M 644 :%u %s", mark, path + strlen(TRUNK))->data; + fprintf(stdout, "blob\nmark :%u\n", mark++); + // dump_blob(root_obj, (char *)path, revpool); + } + } + + if (file_changes->nelts == 0) { + fprintf(stderr, "skipping.\n"); + svn_pool_destroy(revpool); + return 0; + } + + fprintf(stdout, "commit refs/heads/master\n"); + fprintf(stdout, apr_array_pstrcat(pool, file_changes, '\n')); + fprintf(stdout, "\n\n"); + + svn_pool_destroy(revpool); + + fprintf(stderr, "done!\n"); + + return 0; +} + +int crawl_revisions(char *repos_path) +{ + apr_pool_t *pool, *subpool; + svn_repos_t *repos; + svn_revnum_t youngest_rev, min_rev, max_rev, rev; + svn_fs_t *fs; + + pool = svn_pool_create(NULL); + + SVN_ERR(svn_repos_open(&repos, repos_path, pool)); + + fs = svn_repos_fs(repos); + + SVN_ERR(svn_fs_initialize(pool)); + SVN_ERR(svn_fs_youngest_rev(&youngest_rev, fs, pool)); + + min_rev = 1; + max_rev = youngest_rev; + + subpool = svn_pool_create(pool); + for (rev = min_rev; rev <= max_rev; rev++) { + svn_pool_clear(subpool); + export_revision(rev, repos, fs, subpool); + } + + svn_pool_destroy(pool); + + return 0; +} + +int main(int argc, char *argv[]) +{ + if (argc != 2) { + fprintf(stderr, "usage: %s REPOS_PATH\n", argv[0]); + return -1; + } + + if (apr_initialize() != APR_SUCCESS) { + fprintf(stderr, "You lose at apr_initialize().\n"); + return -1; + } + + crawl_revisions(argv[1]); + + apr_terminate(); + + return 0; +} diff --git a/svn-fast-export.py b/svn-fast-export.py new file mode 100755 index 0000000..176973b --- /dev/null +++ b/svn-fast-export.py @@ -0,0 +1,167 @@ +#!/usr/bin/python +# +# svn-fast-export.py +# ---------- +# Walk through each revision of a local Subversion repository and export it +# in a stream that git-fast-import can consume. +# +# Author: Chris Lee +# License: MIT + +trunk_path = '/trunk/' +branches_path = '/branches/' +tags_path = '/tags/' + +first_rev = 1 +final_rev = 0 + +import gc, sys, os.path +from optparse import OptionParser +from time import sleep, mktime, localtime, strftime, strptime +from svn.fs import svn_fs_dir_entries, svn_fs_file_length, svn_fs_file_contents, svn_fs_is_dir, svn_fs_revision_root, svn_fs_youngest_rev, svn_fs_revision_proplist, svn_fs_revision_prop, svn_fs_paths_changed +from svn.core import svn_pool_create, svn_pool_clear, svn_pool_destroy, svn_stream_read, svn_stream_close, run_app +from svn.repos import svn_repos_open, svn_repos_fs + +ct_short = ['M', 'A', 'D', 'R', 'X'] + +def dump_file_blob(root, full_path, pool): + # Use an iteration subpool. + subpool = svn_pool_create(pool) + + # Clear the iteration subpool. + svn_pool_clear(subpool) + + # print full_path + stream_length = svn_fs_file_length(root, full_path, subpool) + stream = svn_fs_file_contents(root, full_path, subpool) + contents = svn_stream_read(stream, int(stream_length)) + sys.stdout.write("data %s\n" % stream_length) + sys.stdout.write(contents) + sys.stdout.write("\n") + + # Destroy the iteration subpool. + svn_pool_destroy(subpool) + + +def export_revision(rev, repo, fs, pool): + sys.stderr.write("Committing revision %s... " % rev) + + revpool = svn_pool_create(pool) + svn_pool_clear(revpool) + + # Open a root object representing the youngest (HEAD) revision. + root = svn_fs_revision_root(fs, rev, revpool) + + # And the list of what changed in this revision. + changes = svn_fs_paths_changed(root, revpool) + + i = 1 + marks = {} + file_changes = [] + + for path, change_type in changes.iteritems(): + c_t = ct_short[change_type.change_kind] + if svn_fs_is_dir(root, path, revpool): + continue + + if not path.startswith(trunk_path): + # We don't handle branches. Or tags. Yet. + pass + else: + if c_t == 'D': + file_changes.append("D %s" % path.replace(trunk_path, '')) + else: + sys.stdout.write("blob\nmark :%s\n" % i) + marks[i] = path.replace(trunk_path, '') + # dump_file_blob(root_obj, path, revpool) + file_changes.append("M 644 :%s %s" % (i, marks[i])) + i += 1 + + # Get the commit author and message + props = svn_fs_revision_proplist(fs, rev, revpool) + + # Do the recursive crawl. + if props.has_key('svn:author'): + author = "%s <%s@localhost>" % (props['svn:author'], props['svn:author']) + else: + author = 'nobody ' + + if len(file_changes) == 0: + svn_pool_destroy(revpool) + sys.stderr.write("skipping.\n") + return + + svndate = props['svn:date'][0:-8] + commit_time = mktime(strptime(svndate, '%Y-%m-%dT%H:%M:%S')) + sys.stdout.write("commit refs/heads/master\n") + #sys.stdout.write("committer %s %s -0000\n" % (author, int(commit_time))) + #sys.stdout.write("data %s\n" % len(props['svn:log'])) + #sys.stdout.write(props['svn:log']) + #sys.stdout.write("\n") + sys.stdout.write('\n'.join(file_changes)) + sys.stdout.write("\n\n") + + svn_pool_destroy(revpool) + sys.stderr.write("done!\n") + + #if rev % 1000 == 0: + # sys.stderr.write("gc: %s objects\n" % len(gc.get_objects())) + # sleep(5) + + +def crawl_revisions(pool, repos_path): + """Open the repository at REPOS_PATH, and recursively crawl all its + revisions.""" + global final_rev + + # Open the repository at REPOS_PATH, and get a reference to its + # versioning filesystem. + repos_obj = svn_repos_open(repos_path, pool) + fs_obj = svn_repos_fs(repos_obj) + + # Query the current youngest revision. + youngest_rev = svn_fs_youngest_rev(fs_obj, pool) + + + first_rev = 1 + if final_rev == 0: + final_rev = youngest_rev + for rev in xrange(first_rev, final_rev + 1): + export_revision(rev, repos_obj, fs_obj, pool) + + +if __name__ == '__main__': + usage = '%prog [options] REPOS_PATH' + parser = OptionParser() + parser.set_usage(usage) + parser.add_option('-f', '--final-rev', help='Final revision to import', + dest='final_rev', metavar='FINAL_REV', type='int') + parser.add_option('-t', '--trunk-path', help='Path in repo to /trunk', + dest='trunk_path', metavar='TRUNK_PATH') + parser.add_option('-b', '--branches-path', help='Path in repo to /branches', + dest='branches_path', metavar='BRANCHES_PATH') + parser.add_option('-T', '--tags-path', help='Path in repo to /tags', + dest='tags_path', metavar='TAGS_PATH') + (options, args) = parser.parse_args() + + if options.trunk_path != None: + trunk_path = options.trunk_path + if options.branches_path != None: + branches_path = options.branches_path + if options.tags_path != None: + tags_path = options.tags_path + if options.final_rev != None: + final_rev = options.final_rev + + if len(args) != 1: + parser.print_help() + sys.exit(2) + + # Canonicalize (enough for Subversion, at least) the repository path. + repos_path = os.path.normpath(args[0]) + if repos_path == '.': + repos_path = '' + + # Call the app-wrapper, which takes care of APR initialization/shutdown + # and the creation and cleanup of our top-level memory pool. + run_app(crawl_revisions, repos_path)