#!/usr/bin/env python """ fs2svn.py by Andrew Shearer Version 1.0. 2005-06-23. http://www.shearersoftware.com/software/developers/fs2svn/ Email: ashearerw at shearersoftware dot com Converts a structured set of directories, each containing one version of a project (or just the changed files in that version), to a Subversion repository. Depends on cvs2svn. main() function (command parsing) based heavily on cvs2svn's. Sample command lines: This command line generates a dumpfile suitable for svnadmin load, fills in MIME types from your Apache mime.types file, and suppresses native line ending conversion. (Apache mime.types location is correct for Mac OS X, at least.) python fs2svn.py --dumpfile=../svndumpfile.txt --dump-only --username=$USER --svnadmin=/usr/local/bin/svnadmin --no-default-eol --keywords-off --mime-types=/etc/httpd/mime.types --exclude="[.].*" --exclude="[.]DS_Store" --exclude="_vti_cnf" --ignore-deletes-in="(.*?) *delta" --ignore-deletes-in="(.*?) *part" --ignore-deletes-in="from +(.*)" --shorthand-folders=shorthand-folders.txt ../folder-with-many-revision-subfolders Making a dumpfile is often useful (you may want to perform futher processing before importing it into a repository--beware of using a text editor, though, because some, including BBEdit, aren't binary-safe and will silently normalize line endings, destroying binary files.) You can skip straight to the repository, though: python fs2svn.py -s ../myrepository --fs-type=fsfs --username=$USER ... (continue as before) Input Folder Structure (names of the revision folders don't actually matter, except where they match one of the --ignore-deletes-in patterns, marking them as "delta" folders) 2005-01-01 <== Full revision folder (no "delta") wwwroot <== 'Shorthand' folder; name in SVN looked up in shorthandFolders dict below file1.html file2.html programs-intra <== 'Shorthand' folder ... 2005-01-02delta <== Delta revision folder ("delta"); only changed files need to be included (i.e. deletes are ignored) file1.html <== No 'shorthand' folder name, so all contents are assumed to be in www (empty shorthand folder with config below) If there's a shorthand folder config present, then there's another layer of structure inside the revision folders. For each revision folder, either 1. All the children are shorthand folders, or 2. None of them are, and the default shorthand folder will be assumed as a parent Sample ShorthandFolderMapper file (set with --shorthand-folders=) # Format is dir-name-in-filesystem:repository path (under trunk) # # If there's no colon, the name and path are taken to be the same. # If the first part is empty (line starts with a colon), the second part # specifies a default respository path. If revision dir's subfolders don't # match the other shorthand folders, it's assumed that all its contents were meant to be # put under this default parent. # # Example: ":www" means that revision folders containing no shorthand subfolders # will have their contents placed in /trunk/www/. # "mssql:db" means that revision folders containing a direct "mssql" child # will have that sufolders contents placed in /trunk/db/. # "programs-shared" means that programs-shared is recognized as a shorthand # folder but the name is unchanged in the repository. # :www wwwroot:www www programs-shared mssql:db db:db wwwroot dev:www wwwroot myproject:www myproject:www Copyright (c) 2005 Andrew Shearer. License: MIT License (except for main() function; see below). == Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. == The command-line parsing code in main() is derived from cvs2svn. Accordingly, here is cvs2svn's license. It applies only to the main() function. All other code is governed by the Apache License. === This license applies to all portions of cvs2svn which are not externally-maintained libraries (e.g. rcsparse). Such libraries have their own licenses; we recommend you read them, as their terms may differ from the terms below. This is version 1 of this license. It is also available online at http://subversion.tigris.org/license-1.html. If newer versions of this license are posted there (the same URL, but with the version number incremented: .../license-2.html, .../license-3.html, and so on), you may use a newer version instead, at your option. ==================================================================== Copyright (c) 2000-2004 CollabNet. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The end-user documentation included with the redistribution, if any, must include the following acknowledgment: "This product includes software developed by CollabNet (http://www.Collab.Net/)." Alternately, this acknowledgment may appear in the software itself, if and wherever such third-party acknowledgments normally appear. 4. The hosted project names must not be used to endorse or promote products derived from this software without prior written permission. For written permission, please contact info@collab.net. 5. Products derived from this software may not use the "Tigris" name nor may "Tigris" appear in their names without prior written permission of CollabNet. THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COLLABNET OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ==================================================================== This software consists of voluntary contributions made by many individuals on behalf of CollabNet. == End of CollabNet license. """ import getopt import os import re import StringIO import sys VERSION = "0.5" import cvs2svn # Here's where we fake out cvs2svn so it doesn't try to read from CVS. # gCVSPathsToRealPaths, FakePipe, get_co_pipe are involved. gCVSPathsToRealPaths = {} class FakePipe: def __init__(self, stream): self.fromchild = stream self.childerr = StringIO.StringIO() def wait(self): return 0 # Replace cvs2svn's get_co_pipe function, which would call out to 'cvs' or 'co'. # Instead, we read the file directly off the disk. def get_co_pipe(c_rev, extra_arguments = ''): # instead of reading through CVS, return file raw # get the real path, as opposed to the CVS path we're handed with trailing ",v" # The real-to-CVS path transformation isn't generally invertible, so # we have to use the global gCVSPathsToRealPaths array populated by fileChanged. path = gCVSPathsToRealPaths[c_rev.fname] #print path if os.path.isdir(path): #pipeobj = FakePipe(StringIO.StringIO()) # ??? sometimes we are called for directories -- return empty string raise ("Error: tried to get file content from a directory: %s" % path) else: pipeobj = FakePipe(open(path, 'rb')) return ('pipe', pipeobj) cvs2svn.get_co_pipe = get_co_pipe ### Initialize placeholder databases, to placate cvs2svn's innards #tags_db cvs2svn.TagsDatabase(cvs2svn.DB_OPEN_NEW) open(cvs2svn.temp(cvs2svn.SYMBOL_OPENINGS_CLOSINGS_SORTED), 'w').close() #offsets_db cvs2svn.Database(cvs2svn.temp(cvs2svn.SYMBOL_OFFSETS_DB), cvs2svn.DB_OPEN_NEW) class ShorthandFolderMapper: def __init__(self, configFilename = None): self.name_to_repos_path = {} if configFilename: for line in open(configFilename, 'rU').readlines(): line = line.strip() if line and not line.startswith('#'): line = line.strip() if line.find(':') == -1: fs_name = repos_path = line # no colon; fs_name and repos_path are the same else: fs_name, repos_path = line.split(':', 1) self.name_to_repos_path[fs_name.lower()] = repos_path def getReposPathForName(self, name): """ Given a raw shorthand first-level directory name, such as "wwwroot", return the corresponding repository subpath inside trunk, such as "www". Names are case-insensitive. """ return self.name_to_repos_path.get(name.lower()) def isExcluded(filename): excluded = False for excludeRE in cvs2svn.Ctx().excludes: if excludeRE.match(filename): return True return False # or filename[:1] == '.': def ignoreDeletesIn(foldername): """ Returns transformed folder name if deletes should be ignored inside it (meaning that it only stores deltas) or None if deletes should be respected. For example, depending on settings, "from wwwroot", "2005-01-01delta", and "wwwroot" might be returned as "wwwroot", "2005-01-01", and None, respectively. """ for deltaFolderRE in cvs2svn.Ctx().ignore_deletes_in: match = deltaFolderRE.match(foldername) if match: if match.groups(1): foldername = match.group(1) return foldername return None def handleRevisionFolder(svn_commit, state, realpath, repospath, ignore_deletes): filenames = os.listdir(realpath) sys.stderr.write("Reading Folder: %s\n" % os.path.basename(realpath)) prependDefaultRootFolder = None # None means 'unknown so far'; can be set later to True or False shorthandFolders = cvs2svn.Ctx().shorthand_folder_mapper defaultRootFolder = None if shorthandFolders: # figure out whether we need to use our shorthand folders or the default root folder defaultRootFolder = shorthandFolders.getReposPathForName('') for filename in filenames: if isExcluded(filename): pass elif os.path.isdir(os.path.join(realpath, filename)): filename = ignoreDeletesIn(filename) or filename if shorthandFolders.getReposPathForName(filename): if prependDefaultRootFolder is True: raise ("Error: a shorthand folder was found mixed with other content at %s." % realpath) prependDefaultRootFolder = False #break else: if prependDefaultRootFolder is False: raise ("Error: a non-shorthand folder was found mixed with shorthand folders at %s." % realpath) prependDefaultRootFolder = True else: if prependDefaultRootFolder is False: raise ("Error: a non-shorthand folder was found mixed with shorthand folders at %s." % realpath) prependDefaultRootFolder = True if prependDefaultRootFolder and defaultRootFolder: # include contents in a synthethic default folder substate = state['items'].get(defaultRootFolder, None) state['items'][defaultRootFolder] = checkFolder(svn_commit, substate, realpath, os.path.join(repospath, defaultRootFolder), ignore_deletes) elif not shorthandFolders: # include contents raw, without shorthand folders or a default folder return checkFolder(svn_commit, state, realpath, repospath, ignore_deletes) else: # contents must be all shorthand folders for filename in filenames: subrealpath = os.path.join(realpath, filename) if not isExcluded(filename): if os.path.isfile(subrealpath): raise ("Error: cannot determine correct parent for file %s -- please organize its parent folder correctly" % subrealpath) sub_ignore_deletes = ignore_deletes nameInRepos = ignoreDeletesIn(filename) if nameInRepos: sub_ignore_deletes = True else: nameInRepos = filename nameInRepos = shorthandFolders.getReposPathForName(nameInRepos) or nameInRepos subrepospath = os.path.join(repospath, nameInRepos) substate = state['items'].get(nameInRepos) state['items'][nameInRepos] = checkFolder(svn_commit, substate, subrealpath, subrepospath, sub_ignore_deletes) return state def checkFolder(svn_commit, prev_state, realpath, repospath, ignore_deletes): if cvs2svn.Ctx().verbose > 1: sys.stderr.write(" Entering %s\n" % repospath) new_state = { 'type': 'd', 'items': {}} if not os.path.isdir(realpath): if prev_state is not None and not ignore_deletes: sys.stderr.write(" *** Folder deleted (flagged in checkFolder): %s (not found at %s)\n" % (repospath, realpath)) new_state = folderDeleted(svn_commit, realpath, repospath) else: #print realpath if prev_state is None: new_state = folderAdded(svn_commit, realpath, repospath) prev_state = {'type': 'd', 'items': {}} for filename in os.listdir(realpath): #print filename realsubpath = os.path.join(realpath, filename) if isExcluded(filename): pass elif os.path.isdir(realsubpath): if filename.startswith('from '): sub_ignore_deletes = True # 'from xxx' folders contain just the changed subset of xxx filename = filename[5:] else: sub_ignore_deletes = False new_state['items'][filename] = checkFolder(svn_commit, prev_state['items'].get(filename), realsubpath, os.path.join(repospath, filename), sub_ignore_deletes) else: if prev_state['items'].get(filename, None) is None: op = cvs2svn.OP_ADD sub_prev_state = {} else: sub_prev_state = prev_state['items'][filename] stat = os.stat(realsubpath) if stat.st_mtime == prev_state['items'][filename].get('timestamp'): op = None # file modtime unchanged; signal to ignore it else: op = cvs2svn.OP_CHANGE if op: new_state['items'][filename] = fileChanged(svn_commit, realsubpath, os.path.join(repospath, filename), op, sub_prev_state) else: new_state['items'][filename] = sub_prev_state # no change to file modtime; ignore it # handle files & folders newly deleted for filename in prev_state['items']: if new_state['items'].get(filename) is None and prev_state['items'][filename] is not None: if ignore_deletes: new_state['items'][filename] = prev_state['items'][filename] else: subrealpath = os.path.join(realpath, filename) subrepospath = os.path.join(repospath, filename) if prev_state['items'][filename]['type'] == 'd': if cvs2svn.Ctx().verbose: sys.stderr.write(" *** Folder deleted: %s (not found at %s)\n" % (subrepospath, subrealpath)) new_state['items'][filename] = folderDeleted(svn_commit, subrealpath, subrepospath) else: if cvs2svn.Ctx().verbose: sys.stderr.write(" *** File deleted: %s (not found at %s)\n" % (subrepospath, subrealpath)) new_state['items'][filename] = fileChanged(svn_commit, subrealpath, subrepospath, cvs2svn.OP_DELETE, prev_state['items'][filename]) if cvs2svn.Ctx().verbose > 1: sys.stderr.write(" Leaving %s\n" % repospath) return new_state def folderAdded(svn_commit, realpath, repospath): return {'type': 'd', 'items': {}} def folderDeleted(svn_commit, realpath, repospath): fileChanged(svn_commit, realpath, repospath, cvs2svn.OP_DELETE, {}) return None def fileChanged(svn_commit, realpath, repospath, op, prev_state): #timestamp): # returns new state if op == cvs2svn.OP_DELETE: timestamp = 0 file_size = 0 digest = '' #print "*** Deleted %s (not found at %s)" % (repospath, realpath) else: if not os.path.isfile(realpath): raise "*** Error: file expected; %s is not a file" % realpath stat = os.stat(realpath) timestamp = stat.st_mtime digest = '' if op == cvs2svn.OP_ADD: if cvs2svn.Ctx().verbose: sys.stderr.write(" * File added: %s (from %s)\n" % (repospath, realpath)) else: if cvs2svn.Ctx().verbose: sys.stderr.write(" Changing %s\n" % repospath) file_size = stat.st_size #op = cvs2svn.OP_CHANGE # OP_ADD, OP.CHANGE, OP_DELETE prev_timestamp = prev_state.get('timestamp', 0) prev_rev = rev = '' next_rev = None deltatext_code = 'N' fname = repospath + ',v' gCVSPathsToRealPaths[fname] = realpath mode = branch_name = None tags = branches = None c_rev = cvs2svn.CVSRevision(cvs2svn.Ctx(), timestamp, digest, prev_timestamp, op, prev_rev, rev, next_rev, None, None, #self.file_in_attic, self.file_executable, file_size, deltatext_code, fname, mode, branch_name, tags, branches) svn_commit.add_revision(c_rev) if op == cvs2svn.OP_DELETE: return None else: return {'timestamp': timestamp, 'type': 'f'} def run(realpath, repospath = ''): #deltaFolderExpr = re.compile(r'\d{4}-\d\d-\d\d.? ?delta|delta ?\d{4}-\d\d-\d\d.?', re.IGNORECASE) #fullFolderExpr = re.compile(r'\d{4}-\d\d-\d\d.?', re.IGNORECASE) folderList = [] for foldername in os.listdir(realpath): fullpath = os.path.join(realpath, foldername) if os.path.isdir(fullpath): ignore_deletes = ignoreDeletesIn(foldername) is not None if not isExcluded(foldername): folderList.append((fullpath, ignore_deletes)) handleRevisionFolderList(folderList, repospath) def handleRevisionFolderList(realpath_list, repospath = ''): # realpath_list is a list of (path, ignore_deletes) tuples # repospath is the parent path in the repository, typically the empty string svncounter = 2 # Repository initialization is 1. repos = cvs2svn.SVNRepositoryMirror() state = {'items': {}, 'type': 'd'} ctx = cvs2svn.Ctx() if (ctx.target): if not ctx.dry_run: repos.add_delegate(cvs2svn.RepositoryDelegate()) #Log().write(LOG_QUIET, "Starting Subversion Repository.") else: if not ctx.dry_run: repos.add_delegate(cvs2svn.DumpfileDelegate()) #print "Dumping" #Log().write(LOG_QUIET, "Starting Subversion Dumpfile.") #repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count())) repos.add_delegate(cvs2svn.StdoutDelegate(svncounter)) for realpath, ignore_deletes in realpath_list: svn_commit = cvs2svn.SVNCommit("commit " + os.path.basename(realpath)) #for c_rev in deletes: # svn_commit.add_revision(c_rev) svn_commit.set_author(ctx.username) #'ashearer') svn_commit.set_log_msg(os.path.basename(realpath)) state = handleRevisionFolder(svn_commit, state, realpath, repospath, ignore_deletes) repos.commit(svn_commit) svncounter += 1 repos.finish() def main(): # This main() function is mostly copied from cvs2svn; there didn't seem # to be a way to call the original to get its argument parsing without # also requiring argv[0] to point to a CVS repository. ctx = cvs2svn.Ctx() ctx.verbose = 0 ctx.shorthand_file = None ctx.shorthand_folder_mapper = None ctx.ignore_deletes_in = [] try: opts, args = getopt.getopt(sys.argv[1:], 's:hv', [ "help", "create", "trunk=", "username=", "existing-svnrepos", "branches=", "tags=", "encoding=", "force-branch=", "force-tag=", "exclude=", "use-cvs", "mime-types=", "eol-from-mime-type", "no-default-eol", "trunk-only", "no-prune", "dry-run", "dump-only", "dumpfile=", "tmpdir=", "svnadmin=", "skip-cleanup", #"cvs-revnums", "bdb-txn-nosync", "fs-type=", "version", "profile", "keywords-off", "symbol-transform=", "verbose", "shorthand-folders=", "ignore-deletes-in="]) except getopt.GetoptError, e: sys.stderr.write(cvs2svn.error_prefix + ': ' + str(e) + '\n\n') cvs2svn.usage() sys.exit(1) for opt, value in opts: if opt == '--version': print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION) sys.exit(0) elif (opt == '--help') or (opt == '-h'): ctx.print_help = 1 elif opt == '-s': ctx.target = value elif opt == '--existing-svnrepos': ctx.existing_svnrepos = 1 elif opt == '--dumpfile': ctx.dumpfile = value elif opt == '--tmpdir': ctx.tmpdir = value elif opt == '--use-cvs': ctx.use_cvs = 1 elif opt == '--svnadmin': ctx.svnadmin = value elif opt == '--trunk-only': ctx.trunk_only = 1 elif opt == '--trunk': if not value: sys.exit("%s: cannot pass an empty path to %s." % (cvs2svn.error_prefix, opt)) ctx.trunk_base = value elif opt == '--branches': if not value: sys.exit("%s: cannot pass an empty path to %s." % (cvs2svn.error_prefix, opt)) ctx.branches_base = value elif opt == '--tags': if not value: sys.exit("%s: cannot pass an empty path to %s." % (cvs2svn.error_prefix, opt)) ctx.tags_base = value elif opt == '--no-prune': ctx.prune = None elif opt == '--dump-only': ctx.dump_only = 1 elif opt == '--dry-run': ctx.dry_run = 1 elif opt == '--encoding': ctx.encoding = value elif opt == '--force-branch': ctx.forced_branches.append(value) elif opt == '--force-tag': ctx.forced_tags.append(value) elif opt == '--exclude': try: ctx.excludes.append(re.compile('^' + value + '$')) except re.error, e: sys.exit(error_prefix + ": '%s' is not a valid regexp.\n" % (value)) elif opt == '--mime-types': ctx.mime_types_file = value elif opt == '--eol-from-mime-type': ctx.eol_from_mime_type = 1 elif opt == '--no-default-eol': ctx.no_default_eol = 1 elif opt == '--keywords-off': ctx.keywords_off = 1 elif opt == '--username': ctx.username = value elif opt == '--bdb-txn-nosync': ctx.bdb_txn_nosync = 1 elif opt == '--fs-type': ctx.fs_type = value elif opt == '--symbol-transform': ctx.symbol_transforms.append(value.split(":")) elif opt == '--verbose' or opt == '-v': ctx.verbose += 1 elif opt == '--shorthand-folders': ctx.shorthand_file = value elif opt == '--ignore-deletes-in': try: ctx.ignore_deletes_in.append(re.compile('^' + value + '$')) except re.error, e: sys.exit(error_prefix + ": '%s' is not a valid regexp.\n" % (value)) if ctx.print_help: cvs2svn.usage() sys.exit(0) if ctx.existing_svnrepos and not os.path.isdir(ctx.target): sys.stderr.write(error_prefix + ": the svn-repos-path '%s' is not an " "existing directory.\n" % ctx.target) sys.exit(1) if not ctx.dump_only and not ctx.existing_svnrepos \ and (not ctx.dry_run) and os.path.exists(ctx.target): sys.stderr.write(error_prefix + ": the svn-repos-path '%s' exists.\nRemove it, or pass " "'--existing-svnrepos'.\n" % ctx.target) sys.exit(1) if ctx.mime_types_file: ctx.mime_mapper = cvs2svn.MimeMapper() ctx.mime_mapper.set_mime_types_file(ctx.mime_types_file) if ctx.shorthand_file: ctx.shorthand_folder_mapper = ShorthandFolderMapper(ctx.shorthand_file) ctx.cvsroot = '' run(args[0], '') if __name__ == '__main__': main()