The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package SVN::Dump::Replayer::Git;
BEGIN {
  $SVN::Dump::Replayer::Git::VERSION = '1.000';
}

{
	# TODO - Refactor into its own class?
	# It feels odd making an entire class for a data structure.
	package SVN::Dump::Replayer::Git::Author;
BEGIN {
  $SVN::Dump::Replayer::Git::Author::VERSION = '1.000';
}
	use Moose;
	has name  => ( is => 'ro', isa => 'Str', required => 1 );
	has email => ( is => 'ro', isa => 'Str', required => 1 );
	1;
}

{
	# TODO - Refactor into its own class?
	# It feels odd making an entire class for a data structure.
	package GitTag;
BEGIN {
  $GitTag::VERSION = '1.000';
}
	use Moose;
	has revision => ( is => 'ro', isa => 'SVN::Dump::Revision', required => 1 );
}

use Moose;
extends 'SVN::Dump::Replayer';
use Carp qw(croak cluck);
use File::Path qw(mkpath);

has authors_file    => ( is => 'ro', isa => 'Maybe[Str]' );
has authors => (
	is => 'rw',
	isa => 'HashRef[SVN::Dump::Replayer::Git::Author]',
);

has files_needing_add => (
	is => 'rw',
	isa => 'HashRef',
	default => sub { {} }
);

has directories_needing_add => (
	is => 'rw',
	isa => 'HashRef',
	default => sub { {} }
);

has needs_commit => ( is => 'rw', isa => 'Bool', default => 0 );

has revisions_between_gc => ( is => 'ro', isa => 'Int', default => 1000 );
has revisions_until_gc => ( is => 'rw', isa => 'Int', default => 1000 );

has tags => ( is => 'rw', isa => 'HashRef[GitTag]', default => sub { {} } );

has current_branch => ( is => 'rw', isa => 'Str',  default => 'master' );
has current_rw     => ( is => 'rw', isa => 'Bool', default => 1 );

###

after on_revision_done => sub {
	my ($self, $revision_id) = @_;
	my $final_revision = $self->arborist()->pending_revision();
	$self->git_commit($final_revision);

	# Changes are done.  Remember any copy sources that pull from this
	# revision.  For git, a copy source is a revision SHA1 and
	# branch-relative path.
	#
	# TODO - If the copy source is only used to "branch" or "tag"
	# something, then we can rename the branch or tag instead of saving
	# a copy here.
	#
	# TODO - Git can copy files & directories across branches.  Do we
	# even need the tarballs?

	$self->push_dir($self->replay_base());

	COPY: foreach my $copy_source_obj (
		$self->arborist()->get_copy_sources_for_revision($revision_id)
	) {
		my $cps_kind = $copy_source_obj->kind();
		my $cps_path = $copy_source_obj->src_path();

		$self->log("CPY) saving $cps_kind $cps_path for later.");

		# Switch to the copy source branch.
		my $src_info_method = "get_" . $cps_kind . "_analysis_info";
		my $src_dir_info = $self->arborist()->$src_info_method(
			$revision_id, $cps_path
		);

		$self->set_branch(
			$final_revision,
			$src_dir_info->ent_type(),
			$src_dir_info->ent_name(),
		);

		my $relative_src_path = $src_dir_info->fix_path($cps_path);
		$relative_src_path = "." unless length $relative_src_path;

		# Get the copy depot information, based on absolute path/rev tuples.
		my ($copy_depot_id, $copy_depot_path) = $self->calculate_depot_info(
			$cps_path, $revision_id
		);

		# Tarball a directory.
		if ($cps_kind eq "dir") {
			$copy_depot_path .= ".tar.gz";
			$self->log(
				"CPY) Saving directory $relative_src_path in: $copy_depot_path"
			);
			$self->push_dir($relative_src_path);
			$self->do_or_die("tar", "czf", $copy_depot_path, ".");
			$self->pop_dir();
			next COPY;
		}

		$self->log("CPY) Saving file $relative_src_path in: $copy_depot_path");
		$self->copy_file_or_die($relative_src_path, $copy_depot_path);
		next COPY;
	}

	$self->pop_dir();
};

# Analysis is generic for Subversion.  Map entity names to Git
# specific ones.

before on_walk_begin => sub {
	my $self = shift;

	# Remove from consideration all copy sources that create entities.
	# Branch and tag creation doesn't really copy files.

	SOURCE: foreach my $source ($self->arborist()->get_all_copy_sources()) {

		# Only directories can be entities, so skip everything else.
		next SOURCE if $source->kind() ne "dir";

		COPY: foreach my $copy (
			$self->arborist()->get_all_copies_for_src($source)
		) {

			my $destination_dir_info = $self->arborist()->get_dir_analysis_info(
				$copy->dst_rev(),
				$copy->dst_path(),
			);

			next COPY unless (
				defined($destination_dir_info) and $destination_dir_info->is_entity()
			);

			$self->arborist()->ignore_copy($copy);
		}
	}
};

after on_walk_begin => sub {
	my $self = shift;

	# Set up authors mapping.
	if (defined $self->authors_file()) {
		# Initialize it.  Probably can use Moose to tell us it's been set.
		$self->authors({});

		open my $fh, "<", $self->authors_file() or confess $!;
		while (<$fh>) {
			my ($nick, $name, $email) = (/^\s*([^=]*?)\s*=\s*([^<]*?)\s*<(\S+?)>/);

			$name = $nick unless defined $name and length $name;

			$self->authors()->{$nick} = SVN::Dump::Replayer::Git::Author->new(
				name  => $name,
				email => $email,
			);
		}
	}

	$self->do_rmdir($self->replay_base()) if -e $self->replay_base();
	$self->do_mkdir($self->replay_base());

	$self->push_dir($self->replay_base());
	$self->do_or_die("git", "init", ($self->verbose() ? () : ("-q")));

	# Perform an initial commit so that the master branch is ready.
	# Needed in case the repository branches right away.
	# TODO - Detect when needed, and only use then.
	my $initial_file = "created_by_snerp_vortex.txt";
	open my $fh, ">", $initial_file or die $!;
	print $fh "This repository was created by Snerp Vortex.\n";
	close $fh;
	$self->do_or_die("git", "add", "-f", $initial_file);
	$self->needs_commit(1);

	$self->pop_dir();
};

sub on_branch_directory_creation {
	my ($self, $change, $revision) = @_;

	$self->push_dir($self->replay_base());

	# Current master branch.
	$self->set_branch($revision, $change->entity_type(), $change->entity_name());

	my $new_branch_name = $change->entity_name();

	# New branch is "master"?  But we already have that.
	# Switch over, rather than re-create (and fail).
	# TODO - I don't like this special case.  How to get rid of it?
	if ($new_branch_name eq "master") {
		$self->do_or_die("git", "checkout", "-q", $new_branch_name);
	}
	else {
		$self->do_or_die("git", "checkout", "-q", "-b", $new_branch_name);
	}

	$self->current_branch($new_branch_name);

	$self->pop_dir();
}

sub on_branch_directory_copy {
	my ($self, $change, $revision) = @_;

	# Branches must be created from containers.

	# TODO - Subversion supports "silly" things like branching and
	# tagging subdirectories within entities.
	# TODO - At the moment, the best we can do is tag or branch the
	# entire containing entity.
	# TODO - Consider identifying subdirectories that are treated like
	# sub-branches and mapping them to proper branches.  Then they can
	# be tagged as proper entities.

	$self->log(
		"GIT) creating branch from ", $change->src_path(),
		" to ", $change->path()
	);

	$self->push_dir($self->replay_base());
	$self->set_branch(
		$revision,
		$change->src_entity_type(),
		$change->src_entity_name()
	);

	my $new_branch_name = $change->entity_name();
	$self->do_or_die("git", "checkout", "-q", "-b", $new_branch_name);
	$self->current_branch($new_branch_name);
	$self->pop_dir();
	return;
}

sub on_directory_copy {
	my ($self, $change, $revision) = @_;
	$self->push_dir($self->replay_base());
	$self->set_branch(
		$revision,
		$change->entity_type(),
		$change->entity_name()
	);

	#my $dst_path = $self->arborist()->calculate_relative_path($change->path());
	my $dst_path = $change->rel_path();
	$self->do_directory_copy($change, $revision, $dst_path);
	$self->directories_needing_add()->{$dst_path} = 1;
	$self->pop_dir();
}

sub on_directory_creation {
	my ($self, $change, $revision) = @_;
	$self->push_dir($self->replay_base());
	$self->set_branch($revision, $change->entity_type(), $change->entity_name());
	$self->do_mkdir($change->rel_path());
	$self->pop_dir();
}

sub on_directory_deletion {
	my ($self, $change, $revision) = @_;

	# TODO - Doesn't need a commit if $rel_path is a directory that
	# contains no files.
	#   1. find $rel_path -type f
	#   2. If anything comes up, then we need a commit.
	#   3. Otherwise, we don't need one on account of this.

	# First try git rm, to remove from the repository.
	$self->push_dir($self->replay_base());
	$self->set_branch($revision, $change->entity_type(), $change->entity_name());

	my $rm_path = $change->rel_path();
	confess "can't remove nonexistent directory $rm_path" unless -e $rm_path;

	$self->git_env_setup($revision);

	$self->do_sans_die(
		"git", "rm", "-r", "--ignore-unmatch", "-f", "-q", "--",
		$rm_path,
	);

	# Second, try a plain filesystem remove in case the file hasn't yet
	# been staged.  Since git-rm may have removed any number of parent
	# directories for $rel_path, we only try to rmtree() if it still
	# exists.

	$self->do_rmdir($rm_path) if -e $rm_path;

	# Git cleans up directories; svn assumes they exist.
	$self->ensure_parent_dir_exists($rm_path);

	delete $self->directories_needing_add()->{$rm_path};
	$self->needs_commit(1);

	$self->pop_dir();
}

sub on_branch_directory_deletion {
	my ($self, $change, $revision) = @_;

	$self->push_dir($self->replay_base());

	my $branch_to_delete = $change->entity_name();

	# Get off the branch if we're deleting the one we're on.
	if ($branch_to_delete eq $self->current_branch()) {
		my $escape_dir_info = $self->arborist()->get_dir_analysis_info(
			$revision->id(),
			""
		);

		$self->set_branch(
			$revision,
			$escape_dir_info->ent_type(),
			$escape_dir_info->ent_name(),
		);
	}

	$self->git_env_setup($revision);
	$self->do_or_die("git", "branch", "-D", $change->entity_name());
	$self->pop_dir();
}

sub on_file_change {
	my ($self, $change, $revision) = @_;
	$self->push_dir($self->replay_base());
	$self->set_branch($revision, $change->entity_type(), $change->entity_name());
	my $rewrite_path = $change->rel_path();

	if ($self->rewrite_file($change, $rewrite_path)) {
		$self->files_needing_add()->{$rewrite_path} = 1;
	}
	$self->pop_dir();
}

sub on_file_copy {
	my ($self, $change, $revision) = @_;
	$self->push_dir($self->replay_base());
	$self->set_branch($revision, $change->entity_type(), $change->entity_name());

	my $dst_path = $change->rel_path();

	$self->do_file_copy($change, $revision);
	$self->files_needing_add()->{$dst_path} = 1;
	$self->pop_dir();
}

sub on_file_creation {
	my ($self, $change, $revision) = @_;
	$self->push_dir($self->replay_base());
	$self->set_branch($revision, $change->entity_type(), $change->entity_name());
	my $create_path = $change->rel_path();

	$self->write_new_file($change, $create_path);
	$self->files_needing_add()->{$create_path} = 1;
	$self->pop_dir();
}

sub on_file_deletion {
	my ($self, $change, $revision) = @_;

	$self->push_dir($self->replay_base());
	$self->set_branch($revision, $change->entity_type(), $change->entity_name());

	my $rm_path = $change->rel_path();
	confess "can't remove nonexistent file $rm_path" unless -e $rm_path;

	$self->git_env_setup($revision);

	$self->do_sans_die(
		"git", "rm", "-r", "--ignore-unmatch", "-f", "-q", "--",
		$rm_path,
	);

	# git-rm doesn't always remove the files right away.
	$self->do_rmdir($rm_path) if -e $rm_path;

	$self->ensure_parent_dir_exists($rm_path);
	$self->pop_dir();

	delete $self->files_needing_add()->{$rm_path};
	$self->needs_commit(1);
}

sub on_tag_directory_copy {
	my ($self, $change, $revision) = @_;

	$self->git_commit($revision);

	my $tag_name = $change->entity_name();

	$self->push_dir($self->replay_base());
	$self->set_branch(
		$revision,
		$change->src_entity_type(),
		$change->src_entity_name()
	);

	$self->git_env_setup($revision);

	$self->pipe_into_or_die($revision->message(), "git tag -a -F - $tag_name");

	$self->pop_dir();

	$self->log("TAG) setting tag $tag_name = $revision");
	$self->tags()->{$tag_name} = $revision;
}

sub on_tag_directory_creation {
	my ($self, $change, $revision) = @_;

	$self->git_commit($revision);

	my $tag_name = $change->entity_name();
	$self->push_dir($self->replay_base());
	$self->set_branch($revision, $change->entity_type(), $change->entity_name());

	$self->git_env_setup($revision);

	$self->pipe_into_or_die($revision->message(), "git tag -a -F - $tag_name");
	$self->pop_dir();

	$self->log("TAG) setting tag $tag_name = $revision");
	$self->tags()->{$tag_name} = $revision;
}

sub on_tag_directory_deletion {
	my ($self, $change, $revision) = @_;

	# Tag deletion is out of band.
	$self->push_dir($self->replay_base());
	$self->git_env_setup($revision);
	$self->do_or_die("git", "tag", "-d", $change->entity_name());
	$self->pop_dir();

	$self->log("TAG) deleting tag ", $change->entity_name());
	delete $self->tags()->{$change->entity_name()};
}

sub on_file_rename {
	my ($self, $change, $revision) = @_;
	$self->push_dir($self->replay_base());
	$self->set_branch($revision, $change->entity_type(), $change->entity_name());

	confess "target of file rename (", $change->rel_path(), ") exists" if (
		-e $change->rel_path()
	);

	$self->git_env_setup($revision);

	$self->do_sans_die(
		"git", "mv", $change->src_rel_path(), $change->rel_path()
	) or rename(
		$change->rel_src_path(), $change->rel_path()
	) or confess(
		"file rename from ", $change->rel_src_path(),
		" to ", $change->rel_path(),
		" failed: $!"
	);

	$self->ensure_parent_dir_exists($change->src_rel_path());
	$self->pop_dir();
	$self->needs_commit(1);
}

sub on_rename {
	my ($self, $change, $revision) = @_;
	$self->push_dir($self->replay_base());
	$self->set_branch($revision, $change->entity_type(), $change->entity_name());

	confess "target of rename (", $change->rel_path(), ") already exists" if (
		-e $change->rel_path()
	);

	$self->git_env_setup($revision);

	$self->do_sans_die(
		"git", "mv", $change->src_rel_path(), $change->rel_path()
	) or rename(
		$change->src_rel_path(), $change->rel_path()
	) or confess(
		"rename from ", $change->src_rel_path(),
		" to ", $change->rel_path(),
		" failed: $!"
	);

	$self->ensure_parent_dir_exists($change->src_rel_path());
	$self->pop_dir();
	$self->needs_commit(1);
}

sub on_directory_rename {
	my ($self, $change, $revision) = @_;
	$self->push_dir($self->replay_base());
	$self->set_branch($revision, $change->entity_type(), $change->entity_name());

	confess "target of dir rename (", $change->rel_path(), ") already exists" if (
		-e $change->rel_path()
	);

	$self->do_sans_die(
		"git", "mv", $change->src_rel_path(), $change->rel_path()
	) or rename(
		$change->src_rel_path(), $change->rel_path()
	) or confess(
		"directory rename from ", $change->src_rel_path(),
		" to ", $change->rel_path(),
		" failed: $!"
	);

	$self->ensure_parent_dir_exists($change->src_rel_path());
	$self->pop_dir();
	$self->needs_commit(1);
}

sub on_branch_rename {
	my ($self, $change, $revision) = @_;

	$self->push_dir($self->replay_base());
	$self->git_env_setup($revision);
	$self->do_or_die(
		"git", "branch", "-m",
		$change->src_entity_name(),
		$change->entity_name(),
	);
	$self->pop_dir();

	# Did we just rename the current branch?
	$self->current_branch($change->entity_name()) if (
		$change->src_entity_name() eq $self->current_branch()
	);
}

sub on_tag_rename {
	my ($self, $change, $revision) = @_;

	$self->push_dir($self->replay_base());

	my $old_tag_name = $change->src_entity_name();
	my $new_tag_name = $change->entity_name();

	# Find the change referenced by the old tag.
	my $old_tag_ref = $self->pipe_out_of_or_die(
		"git rev-parse -- $old_tag_name | tail -1"
	);
	confess "unreferenced tag $old_tag_name" unless (
		defined $old_tag_ref and length $old_tag_ref
	);
	chomp $old_tag_ref;

	# Get the old revision, so we can reuse its message.
	my $old_revision = delete $self->tags()->{$old_tag_name};
	$self->log("TAG) renaming from tag $old_tag_name = $old_revision");

	# Create the new tag with the old reference.
	$self->git_env_setup($old_revision);
	$self->pipe_into_or_die(
		$old_revision->message(),
		"git tag -a -F - $new_tag_name $old_tag_ref"
	);

	# Delete the old tag.
	$self->git_env_setup($revision);
	$self->do_or_die("git", "tag", "-d", $old_tag_name);

	$self->pop_dir();

	$self->tags()->{$new_tag_name} = $old_revision;
	$self->log("TAG) renaming to tag $new_tag_name = $old_revision");
}

### Git helpers.

sub git_commit {
	my ($self, $revision) = @_;

	unless ($self->current_rw()) {
		confess(
			"attempting a commit on read-only entity ",
			$self->current_branch()
		);
	}

	$self->push_dir($self->replay_base());

	# Every directory added is exploded into its constituent files.
	# Try to avoid "git-add --all".  It traverses the entire project
	# tree, which quickly gets expensive.

	if (scalar keys %{$self->directories_needing_add()}) {
		foreach my $dir (keys %{$self->directories_needing_add()}) {
			# TODO - Use File::Find when shell characters become an issue.
			foreach my $file (`find $dir -type f`) {
				chomp $file;
				$self->files_needing_add()->{$file} = 1;
			}
		}

		$self->directories_needing_add({});
		$self->needs_commit(1);
	}

	$self->git_env_setup($revision);

	my $needs_status = 1;
	if (scalar keys %{$self->files_needing_add()}) {
		# TODO - Break it up if the files list is too big.
		$self->do_or_die("git", "add", "-f", keys(%{$self->files_needing_add()}));
		$self->files_needing_add({});
		$self->needs_commit(1);
		$needs_status = 0;
	}

	unless ($self->needs_commit()) {
		$self->log("skipping git commit");
		$self->pop_dir();
		return;
	}

	my $git_commit_message_file = "/tmp/git-commit-$$.txt";

	my $message = $revision->message();
	$message = "(no message)" unless defined $message and length $message;

	open my $tmp, ">", $git_commit_message_file or confess $!;
	print $tmp $message or confess $!;
	close $tmp or confess $!;

	$self->git_env_setup($revision);

	# Some changes seem to alter no files.  We can detect whether a
	# commit is needed using git-status.  Otherwise, if we guess wrong,
	# git-commit will fail if there's nothing to commit.  We bother
	# checking git-commit because we do want to catch errors.

	# TODO - git-status is slow after a while.  Can we do something
	# smart to avoid it in all cases?
	if (
		!$needs_status or
		$self->do_sans_die("git status >/dev/null 2>/dev/null")
	) {
		$self->do_or_die(
			"git", "commit",
			($self->verbose() ? () : ("-q")),
			"--allow-empty", "-F", $git_commit_message_file
		);
	}

	unlink $git_commit_message_file;

	# Map between Subversion revisions and Git commits.
	chomp(my $git_id = qx(git rev-list -n 1 HEAD));
	$self->arborist()->map_revisions($revision->id(), $git_id);

	$self->needs_commit(0);
	$self->pop_dir();

	# Check for the need to GC.
	$self->revisions_until_gc( $self->revisions_until_gc() - 1 );
	if ($self->revisions_until_gc() < 1) {
		$self->do_git_gc();
		$self->revisions_until_gc( $self->revisions_between_gc() );
	}

	return;
}

sub do_git_gc {
	my $self = shift;
	$self->push_dir($self->replay_base());
	$self->do_or_die("git", "gc", ($self->verbose() ? () : ("--quiet")));
	$self->pop_dir();
}

### Helper methods.

#sub qualify_change_path {
#	my ($self, $change) = @_;
#	return $self->calculate_path($change->path());
#}

sub calculate_path {
	my ($self, $path) = @_;

	my $full_path = $self->replay_base() . "/" . $path;
	$full_path =~ s!//+!/!g;

	return $full_path;
}

sub git_env_setup {
	my ($self, $revision) = @_;

	confess "bad revision $revision" unless defined $revision and ref($revision);

	$ENV{GIT_COMMITTER_DATE} = $ENV{GIT_AUTHOR_DATE} = $revision->time();

	my $rev_author = $revision->author();

	my ($author_name, $author_email);
	if ($self->authors()) {
		my $git_author = $self->authors()->{$rev_author};
		unless (defined $git_author and length $git_author) {
			confess(
				"svn author '$rev_author' doesn't seem to be in your authors file"
			);
		}
		$author_name  = $git_author->name();
		$author_email = $git_author->email();
	}
	else {
		$author_name  = $rev_author;
		$author_email = "$rev_author\@example.com";
	}

	# TODO - Use the svn repository's GUID as the email host.
	$ENV{GIT_COMMITTER_NAME}  = $ENV{GIT_AUTHOR_NAME}  = $author_name;
	$ENV{GIT_COMMITTER_EMAIL} = $ENV{GIT_AUTHOR_EMAIL} = $author_email;
}

sub ensure_parent_dir_exists {
	my ($self, $path) = @_;
	$path =~ s!/*[^/]+/*$!!;
	return unless length $path and $path ne "/";
	return if -e $path;
	$self->log("mkpath $path");
	mkpath($path) or confess "mkpath failed: $!";
}

# Assumes that the cwd is already the replay repository.
sub set_branch {
	my ($self, $revision, $ent_type, $ent_name) = @_;

	if ($ent_name eq $self->current_branch()) {
		$self->log("GIT) already on branch $ent_name");
		return;
	}

	$self->git_commit($revision);

	if ($ent_type eq "branch") {
		$self->current_rw(1);
	}
	elsif ($ent_type eq "tag") {
		$self->current_rw(0);
	}
	else {
		confess "set_branch() inappropriately called for a $ent_type $ent_name";
	}

	$self->do_sans_die("git", "checkout", "-q", $ent_name);
	$self->current_branch($ent_name);

	# TODO - We also need to prune the paths within the entity.
	# Branches don't belong in /branch, for example.

	return;
}

# Already in the destination branch.
sub do_directory_copy {
	my ($self, $change, $revision, $branch_rel_path) = @_;

	confess "cp to $branch_rel_path failed: path exists" if -e $branch_rel_path;

	my ($copy_depot_descriptor, $copy_depot_path) = $self->get_copy_depot_info(
		$change
	);

	# Directory copy sources are tarballs.
	$copy_depot_path .= ".tar.gz";

	unless (-e $copy_depot_path) {
		confess "cp src $copy_depot_path ($copy_depot_descriptor) doesn't exist";
	}

	$self->do_mkdir($branch_rel_path);
	$self->push_dir($branch_rel_path);
	$self->do_or_die("tar", "xzf", $copy_depot_path);
	$self->pop_dir();

	$self->decrement_copy_source($change, $revision, $copy_depot_path);
}

sub do_file_copy {
	my ($self, $change, $revision) = @_;

	my $branch_rel_path = $change->rel_path();

	confess "cp to $branch_rel_path failed: path exists" if -e $branch_rel_path;

	my ($copy_depot_descriptor, $copy_depot_path) = $self->get_copy_depot_info(
		$change
	);

	unless (-e $copy_depot_path) {
		confess "cp src $copy_depot_path ($copy_depot_descriptor) doesn't exist";
	}

	# Weirdly, the copy source may not be authoritative.
	if (defined $change->content()) {
		$self->write_change_data($change, $branch_rel_path);
		$self->decrement_copy_source($change, $revision, $copy_depot_path);
		return;
	}

	# If content isn't provided, however, copy the file from the depot.
	$self->copy_file_or_die($copy_depot_path, $branch_rel_path);
	$self->decrement_copy_source($change, $revision, $copy_depot_path);
}

1;