The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/*
 * Copyright (C) the libgit2 contributors. All rights reserved.
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */

#include "common.h"

#include <zlib.h>
#include "git2/repository.h"
#include "git2/indexer.h"
#include "git2/sys/odb_backend.h"
#include "fileops.h"
#include "hash.h"
#include "odb.h"
#include "delta.h"
#include "sha1_lookup.h"
#include "mwindow.h"
#include "pack.h"

#include "git2/odb_backend.h"

/* re-freshen pack files no more than every 2 seconds */
#define FRESHEN_FREQUENCY 2

struct pack_backend {
	git_odb_backend parent;
	git_vector packs;
	struct git_pack_file *last_found;
	char *pack_folder;
};

struct pack_writepack {
	struct git_odb_writepack parent;
	git_indexer *indexer;
};

/**
 * The wonderful tale of a Packed Object lookup query
 * ===================================================
 *	A riveting and epic story of epicness and ASCII
 *			art, presented by yours truly,
 *				Sir Vicent of Marti
 *
 *
 *	Chapter 1: Once upon a time...
 *	Initialization of the Pack Backend
 *	--------------------------------------------------
 *
 *	# git_odb_backend_pack
 *	| Creates the pack backend structure, initializes the
 *	| callback pointers to our default read() and exist() methods,
 *	| and tries to preload all the known packfiles in the ODB.
 * |
 *	|-# packfile_load_all
 *	 | Tries to find the `pack` folder, if it exists. ODBs without
 *	 | a pack folder are ignored altogether. If there's a `pack` folder
 *	 | we run a `dirent` callback through every file in the pack folder
 *	 | to find our packfiles. The packfiles are then sorted according
 *	 | to a sorting callback.
 * 	 |
 *	 |-# packfile_load__cb
 *	 | | This callback is called from `dirent` with every single file
 *	 | | inside the pack folder. We find the packs by actually locating
 *	 | | their index (ends in ".idx"). From that index, we verify that
 *	 | | the corresponding packfile exists and is valid, and if so, we
 *	| | add it to the pack list.
 *	 | |
 *	 | |-# packfile_check
 *	 |		Make sure that there's a packfile to back this index, and store
 *	 |		some very basic information regarding the packfile itself,
 *	 |		such as the full path, the size, and the modification time.
 *	 |		We don't actually open the packfile to check for internal consistency.
 *	|
 *	|-# packfile_sort__cb
 *		Sort all the preloaded packs according to some specific criteria:
 *		we prioritize the "newer" packs because it's more likely they
 *		contain the objects we are looking for, and we prioritize local
 *		packs over remote ones.
 *
 *
 *
 *	Chapter 2: To be, or not to be...
 *	A standard packed `exist` query for an OID
 *	--------------------------------------------------
 *
 * # pack_backend__exists
 * | Check if the given SHA1 oid exists in any of the packs
 * | that have been loaded for our ODB.
 * |
 * |-# pack_entry_find
 *	| Iterate through all the packs that have been preloaded
 *	| (starting by the pack where the latest object was found)
 *	| to try to find the OID in one of them.
 *	|
 *	|-# pack_entry_find1
 *		| Check the index of an individual pack to see if the SHA1
 *		| OID can be found. If we can find the offset to that SHA1
 *		| inside of the index, that means the object is contained
 *		| inside of the packfile and we can stop searching.
 *		| Before returning, we verify that the packfile behing the
 *		| index we are searching still exists on disk.
 *		|
 *		|-# pack_entry_find_offset
 *		| | Mmap the actual index file to disk if it hasn't been opened
 *		| | yet, and run a binary search through it to find the OID.
 *		| | See <http://book.git-scm.com/7_the_packfile.html> for specifics
 *		| | on the Packfile Index format and how do we find entries in it.
 *		| |
 *		| |-# pack_index_open
 *		|	| Guess the name of the index based on the full path to the
 *		|	| packfile, open it and verify its contents. Only if the index
 *		|	| has not been opened already.
 *		|	|
 *		|	|-# pack_index_check
 *		|		Mmap the index file and do a quick run through the header
 *		|		to guess the index version (right now we support v1 and v2),
 *		|		and to verify that the size of the index makes sense.
 *		|
 *		|-# packfile_open
 *			See `packfile_open` in Chapter 3
 *
 *
 *
 *	Chapter 3: The neverending story...
 *	A standard packed `lookup` query for an OID
 *	--------------------------------------------------
 *	TODO
 *
 */


/***********************************************************
 *
 * FORWARD DECLARATIONS
 *
 ***********************************************************/

static int packfile_sort__cb(const void *a_, const void *b_);

static int packfile_load__cb(void *_data, git_buf *path);

static int pack_entry_find(struct git_pack_entry *e,
	struct pack_backend *backend, const git_oid *oid);

/* Can find the offset of an object given
 * a prefix of an identifier.
 * Sets GIT_EAMBIGUOUS if short oid is ambiguous.
 * This method assumes that len is between
 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
 */
static int pack_entry_find_prefix(
	struct git_pack_entry *e,
	struct pack_backend *backend,
	const git_oid *short_oid,
	size_t len);



/***********************************************************
 *
 * PACK WINDOW MANAGEMENT
 *
 ***********************************************************/

static int packfile_sort__cb(const void *a_, const void *b_)
{
	const struct git_pack_file *a = a_;
	const struct git_pack_file *b = b_;
	int st;

	/*
	 * Local packs tend to contain objects specific to our
	 * variant of the project than remote ones. In addition,
	 * remote ones could be on a network mounted filesystem.
	 * Favor local ones for these reasons.
	 */
	st = a->pack_local - b->pack_local;
	if (st)
		return -st;

	/*
	 * Younger packs tend to contain more recent objects,
	 * and more recent objects tend to get accessed more
	 * often.
	 */
	if (a->mtime < b->mtime)
		return 1;
	else if (a->mtime == b->mtime)
		return 0;

	return -1;
}


static int packfile_load__cb(void *data, git_buf *path)
{
	struct pack_backend *backend = data;
	struct git_pack_file *pack;
	const char *path_str = git_buf_cstr(path);
	size_t i, cmp_len = git_buf_len(path);
	int error;

	if (cmp_len <= strlen(".idx") || git__suffixcmp(path_str, ".idx") != 0)
		return 0; /* not an index */

	cmp_len -= strlen(".idx");

	for (i = 0; i < backend->packs.length; ++i) {
		struct git_pack_file *p = git_vector_get(&backend->packs, i);

		if (memcmp(p->pack_name, path_str, cmp_len) == 0)
			return 0;
	}

	error = git_mwindow_get_pack(&pack, path->ptr);

	/* ignore missing .pack file as git does */
	if (error == GIT_ENOTFOUND) {
		giterr_clear();
		return 0;
	}

	if (!error)
		error = git_vector_insert(&backend->packs, pack);

	return error;

}

static int pack_entry_find_inner(
	struct git_pack_entry *e,
	struct pack_backend *backend,
	const git_oid *oid,
	struct git_pack_file *last_found)
{
	size_t i;

	if (last_found &&
		git_pack_entry_find(e, last_found, oid, GIT_OID_HEXSZ) == 0)
		return 0;

	for (i = 0; i < backend->packs.length; ++i) {
		struct git_pack_file *p;

		p = git_vector_get(&backend->packs, i);
		if (p == last_found)
			continue;

		if (git_pack_entry_find(e, p, oid, GIT_OID_HEXSZ) == 0) {
			backend->last_found = p;
			return 0;
		}
	}

	return -1;
}

static int pack_entry_find(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *oid)
{
	struct git_pack_file *last_found = backend->last_found;

	if (backend->last_found &&
		git_pack_entry_find(e, backend->last_found, oid, GIT_OID_HEXSZ) == 0)
		return 0;

	if (!pack_entry_find_inner(e, backend, oid, last_found))
		return 0;

	return git_odb__error_notfound(
		"failed to find pack entry", oid, GIT_OID_HEXSZ);
}

static int pack_entry_find_prefix(
	struct git_pack_entry *e,
	struct pack_backend *backend,
	const git_oid *short_oid,
	size_t len)
{
	int error;
	size_t i;
	git_oid found_full_oid = {{0}};
	bool found = false;
	struct git_pack_file *last_found = backend->last_found;

	if (last_found) {
		error = git_pack_entry_find(e, last_found, short_oid, len);
		if (error == GIT_EAMBIGUOUS)
			return error;
		if (!error) {
			git_oid_cpy(&found_full_oid, &e->sha1);
			found = true;
		}
	}

	for (i = 0; i < backend->packs.length; ++i) {
		struct git_pack_file *p;

		p = git_vector_get(&backend->packs, i);
		if (p == last_found)
			continue;

		error = git_pack_entry_find(e, p, short_oid, len);
		if (error == GIT_EAMBIGUOUS)
			return error;
		if (!error) {
			if (found && git_oid_cmp(&e->sha1, &found_full_oid))
				return git_odb__error_ambiguous("found multiple pack entries");
			git_oid_cpy(&found_full_oid, &e->sha1);
			found = true;
			backend->last_found = p;
		}
	}

	if (!found)
		return git_odb__error_notfound("no matching pack entry for prefix",
			short_oid, len);
	else
		return 0;
}


/***********************************************************
 *
 * PACKED BACKEND PUBLIC API
 *
 * Implement the git_odb_backend API calls
 *
 ***********************************************************/
static int pack_backend__refresh(git_odb_backend *backend_)
{
	int error;
	struct stat st;
	git_buf path = GIT_BUF_INIT;
	struct pack_backend *backend = (struct pack_backend *)backend_;

	if (backend->pack_folder == NULL)
		return 0;

	if (p_stat(backend->pack_folder, &st) < 0 || !S_ISDIR(st.st_mode))
		return git_odb__error_notfound("failed to refresh packfiles", NULL, 0);

	git_buf_sets(&path, backend->pack_folder);

	/* reload all packs */
	error = git_path_direach(&path, 0, packfile_load__cb, backend);

	git_buf_free(&path);
	git_vector_sort(&backend->packs);

	return error;
}

static int pack_backend__read_header(
	size_t *len_p, git_otype *type_p,
	struct git_odb_backend *backend, const git_oid *oid)
{
	struct git_pack_entry e;
	int error;

	assert(len_p && type_p && backend && oid);

	if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0)
		return error;

	return git_packfile_resolve_header(len_p, type_p, e.p, e.offset);
}

static int pack_backend__freshen(
	git_odb_backend *backend, const git_oid *oid)
{
	struct git_pack_entry e;
	time_t now;
	int error;

	if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0)
		return error;

	now = time(NULL);

	if (e.p->last_freshen > now - FRESHEN_FREQUENCY)
		return 0;

	if ((error = git_futils_touch(e.p->pack_name, &now)) < 0)
		return error;

	e.p->last_freshen = now;
	return 0;
}

static int pack_backend__read(
	void **buffer_p, size_t *len_p, git_otype *type_p,
	git_odb_backend *backend, const git_oid *oid)
{
	struct git_pack_entry e;
	git_rawobj raw = {NULL};
	int error;

	if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0 ||
		(error = git_packfile_unpack(&raw, e.p, &e.offset)) < 0)
		return error;

	*buffer_p = raw.data;
	*len_p = raw.len;
	*type_p = raw.type;

	return 0;
}

static int pack_backend__read_prefix(
	git_oid *out_oid,
	void **buffer_p,
	size_t *len_p,
	git_otype *type_p,
	git_odb_backend *backend,
	const git_oid *short_oid,
	size_t len)
{
	int error = 0;

	if (len < GIT_OID_MINPREFIXLEN)
		error = git_odb__error_ambiguous("prefix length too short");

	else if (len >= GIT_OID_HEXSZ) {
		/* We can fall back to regular read method */
		error = pack_backend__read(buffer_p, len_p, type_p, backend, short_oid);
		if (!error)
			git_oid_cpy(out_oid, short_oid);
	} else {
		struct git_pack_entry e;
		git_rawobj raw = {NULL};

		if ((error = pack_entry_find_prefix(
				&e, (struct pack_backend *)backend, short_oid, len)) == 0 &&
			(error = git_packfile_unpack(&raw, e.p, &e.offset)) == 0)
		{
			*buffer_p = raw.data;
			*len_p = raw.len;
			*type_p = raw.type;
			git_oid_cpy(out_oid, &e.sha1);
		}
	}

	return error;
}

static int pack_backend__exists(git_odb_backend *backend, const git_oid *oid)
{
	struct git_pack_entry e;
	return pack_entry_find(&e, (struct pack_backend *)backend, oid) == 0;
}

static int pack_backend__exists_prefix(
	git_oid *out, git_odb_backend *backend, const git_oid *short_id, size_t len)
{
	int error;
	struct pack_backend *pb = (struct pack_backend *)backend;
	struct git_pack_entry e = {0};

	error = pack_entry_find_prefix(&e, pb, short_id, len);
	git_oid_cpy(out, &e.sha1);
	return error;
}

static int pack_backend__foreach(git_odb_backend *_backend, git_odb_foreach_cb cb, void *data)
{
	int error;
	struct git_pack_file *p;
	struct pack_backend *backend;
	unsigned int i;

	assert(_backend && cb);
	backend = (struct pack_backend *)_backend;

	/* Make sure we know about the packfiles */
	if ((error = pack_backend__refresh(_backend)) < 0)
		return error;

	git_vector_foreach(&backend->packs, i, p) {
		if ((error = git_pack_foreach_entry(p, cb, data)) < 0)
			return error;
	}

	return 0;
}

static int pack_backend__writepack_append(struct git_odb_writepack *_writepack, const void *data, size_t size, git_transfer_progress *stats)
{
	struct pack_writepack *writepack = (struct pack_writepack *)_writepack;

	assert(writepack);

	return git_indexer_append(writepack->indexer, data, size, stats);
}

static int pack_backend__writepack_commit(struct git_odb_writepack *_writepack, git_transfer_progress *stats)
{
	struct pack_writepack *writepack = (struct pack_writepack *)_writepack;

	assert(writepack);

	return git_indexer_commit(writepack->indexer, stats);
}

static void pack_backend__writepack_free(struct git_odb_writepack *_writepack)
{
	struct pack_writepack *writepack = (struct pack_writepack *)_writepack;

	assert(writepack);

	git_indexer_free(writepack->indexer);
	git__free(writepack);
}

static int pack_backend__writepack(struct git_odb_writepack **out,
	git_odb_backend *_backend,
        git_odb *odb,
	git_transfer_progress_cb progress_cb,
	void *progress_payload)
{
	struct pack_backend *backend;
	struct pack_writepack *writepack;

	assert(out && _backend);

	*out = NULL;

	backend = (struct pack_backend *)_backend;

	writepack = git__calloc(1, sizeof(struct pack_writepack));
	GITERR_CHECK_ALLOC(writepack);

	if (git_indexer_new(&writepack->indexer,
		backend->pack_folder, 0, odb, progress_cb, progress_payload) < 0) {
		git__free(writepack);
		return -1;
	}

	writepack->parent.backend = _backend;
	writepack->parent.append = pack_backend__writepack_append;
	writepack->parent.commit = pack_backend__writepack_commit;
	writepack->parent.free = pack_backend__writepack_free;

	*out = (git_odb_writepack *)writepack;

	return 0;
}

static void pack_backend__free(git_odb_backend *_backend)
{
	struct pack_backend *backend;
	size_t i;

	assert(_backend);

	backend = (struct pack_backend *)_backend;

	for (i = 0; i < backend->packs.length; ++i) {
		struct git_pack_file *p = git_vector_get(&backend->packs, i);
		git_mwindow_put_pack(p);
	}

	git_vector_free(&backend->packs);
	git__free(backend->pack_folder);
	git__free(backend);
}

static int pack_backend__alloc(struct pack_backend **out, size_t initial_size)
{
	struct pack_backend *backend = git__calloc(1, sizeof(struct pack_backend));
	GITERR_CHECK_ALLOC(backend);

	if (git_vector_init(&backend->packs, initial_size, packfile_sort__cb) < 0) {
		git__free(backend);
		return -1;
	}

	backend->parent.version = GIT_ODB_BACKEND_VERSION;

	backend->parent.read = &pack_backend__read;
	backend->parent.read_prefix = &pack_backend__read_prefix;
	backend->parent.read_header = &pack_backend__read_header;
	backend->parent.exists = &pack_backend__exists;
	backend->parent.exists_prefix = &pack_backend__exists_prefix;
	backend->parent.refresh = &pack_backend__refresh;
	backend->parent.foreach = &pack_backend__foreach;
	backend->parent.writepack = &pack_backend__writepack;
	backend->parent.freshen = &pack_backend__freshen;
	backend->parent.free = &pack_backend__free;

	*out = backend;
	return 0;
}

int git_odb_backend_one_pack(git_odb_backend **backend_out, const char *idx)
{
	struct pack_backend *backend = NULL;
	struct git_pack_file *packfile = NULL;

	if (pack_backend__alloc(&backend, 1) < 0)
		return -1;

	if (git_mwindow_get_pack(&packfile, idx) < 0 ||
		git_vector_insert(&backend->packs, packfile) < 0)
	{
		pack_backend__free((git_odb_backend *)backend);
		return -1;
	}

	*backend_out = (git_odb_backend *)backend;
	return 0;
}

int git_odb_backend_pack(git_odb_backend **backend_out, const char *objects_dir)
{
	int error = 0;
	struct pack_backend *backend = NULL;
	git_buf path = GIT_BUF_INIT;

	if (pack_backend__alloc(&backend, 8) < 0)
		return -1;

	if (!(error = git_buf_joinpath(&path, objects_dir, "pack")) &&
		git_path_isdir(git_buf_cstr(&path)))
	{
		backend->pack_folder = git_buf_detach(&path);
		error = pack_backend__refresh((git_odb_backend *)backend);
	}

	if (error < 0) {
		pack_backend__free((git_odb_backend *)backend);
		backend = NULL;
	}

	*backend_out = (git_odb_backend *)backend;

	git_buf_free(&path);

	return error;
}