| /* |
| * Copyright (C) the libgit2 contributors. All rights reserved. |
| * |
| * This file is part of libgit2, distributed under the GNU GPL v2 with |
| * a Linking Exception. For full terms see the included COPYING file. |
| */ |
| |
| #include "common.h" |
| |
| #include <zlib.h> |
| #include "git2/repository.h" |
| #include "git2/indexer.h" |
| #include "git2/sys/odb_backend.h" |
| #include "futils.h" |
| #include "hash.h" |
| #include "odb.h" |
| #include "delta.h" |
| #include "mwindow.h" |
| #include "pack.h" |
| |
| #include "git2/odb_backend.h" |
| |
| /* re-freshen pack files no more than every 2 seconds */ |
| #define FRESHEN_FREQUENCY 2 |
| |
| struct pack_backend { |
| git_odb_backend parent; |
| git_vector packs; |
| struct git_pack_file *last_found; |
| char *pack_folder; |
| }; |
| |
| struct pack_writepack { |
| struct git_odb_writepack parent; |
| git_indexer *indexer; |
| }; |
| |
| /** |
| * The wonderful tale of a Packed Object lookup query |
| * =================================================== |
| * A riveting and epic story of epicness and ASCII |
| * art, presented by yours truly, |
| * Sir Vicent of Marti |
| * |
| * |
| * Chapter 1: Once upon a time... |
| * Initialization of the Pack Backend |
| * -------------------------------------------------- |
| * |
| * # git_odb_backend_pack |
| * | Creates the pack backend structure, initializes the |
| * | callback pointers to our default read() and exist() methods, |
| * | and tries to preload all the known packfiles in the ODB. |
| * | |
| * |-# packfile_load_all |
| * | Tries to find the `pack` folder, if it exists. ODBs without |
| * | a pack folder are ignored altogether. If there's a `pack` folder |
| * | we run a `dirent` callback through every file in the pack folder |
| * | to find our packfiles. The packfiles are then sorted according |
| * | to a sorting callback. |
| * | |
| * |-# packfile_load__cb |
| * | | This callback is called from `dirent` with every single file |
| * | | inside the pack folder. We find the packs by actually locating |
| * | | their index (ends in ".idx"). From that index, we verify that |
| * | | the corresponding packfile exists and is valid, and if so, we |
| * | | add it to the pack list. |
| * | | |
| * | |-# packfile_check |
| * | Make sure that there's a packfile to back this index, and store |
| * | some very basic information regarding the packfile itself, |
| * | such as the full path, the size, and the modification time. |
| * | We don't actually open the packfile to check for internal consistency. |
| * | |
| * |-# packfile_sort__cb |
| * Sort all the preloaded packs according to some specific criteria: |
| * we prioritize the "newer" packs because it's more likely they |
| * contain the objects we are looking for, and we prioritize local |
| * packs over remote ones. |
| * |
| * |
| * |
| * Chapter 2: To be, or not to be... |
| * A standard packed `exist` query for an OID |
| * -------------------------------------------------- |
| * |
| * # pack_backend__exists |
| * | Check if the given SHA1 oid exists in any of the packs |
| * | that have been loaded for our ODB. |
| * | |
| * |-# pack_entry_find |
| * | Iterate through all the packs that have been preloaded |
| * | (starting by the pack where the latest object was found) |
| * | to try to find the OID in one of them. |
| * | |
| * |-# pack_entry_find1 |
| * | Check the index of an individual pack to see if the SHA1 |
| * | OID can be found. If we can find the offset to that SHA1 |
| * | inside of the index, that means the object is contained |
| * | inside of the packfile and we can stop searching. |
| * | Before returning, we verify that the packfile behing the |
| * | index we are searching still exists on disk. |
| * | |
| * |-# pack_entry_find_offset |
| * | | Mmap the actual index file to disk if it hasn't been opened |
| * | | yet, and run a binary search through it to find the OID. |
| * | | See <http://book.git-scm.com/7_the_packfile.html> for specifics |
| * | | on the Packfile Index format and how do we find entries in it. |
| * | | |
| * | |-# pack_index_open |
| * | | Guess the name of the index based on the full path to the |
| * | | packfile, open it and verify its contents. Only if the index |
| * | | has not been opened already. |
| * | | |
| * | |-# pack_index_check |
| * | Mmap the index file and do a quick run through the header |
| * | to guess the index version (right now we support v1 and v2), |
| * | and to verify that the size of the index makes sense. |
| * | |
| * |-# packfile_open |
| * See `packfile_open` in Chapter 3 |
| * |
| * |
| * |
| * Chapter 3: The neverending story... |
| * A standard packed `lookup` query for an OID |
| * -------------------------------------------------- |
| * TODO |
| * |
| */ |
| |
| |
| /*********************************************************** |
| * |
| * FORWARD DECLARATIONS |
| * |
| ***********************************************************/ |
| |
| static int packfile_sort__cb(const void *a_, const void *b_); |
| |
| static int packfile_load__cb(void *_data, git_buf *path); |
| |
| static int pack_entry_find(struct git_pack_entry *e, |
| struct pack_backend *backend, const git_oid *oid); |
| |
| /* Can find the offset of an object given |
| * a prefix of an identifier. |
| * Sets GIT_EAMBIGUOUS if short oid is ambiguous. |
| * This method assumes that len is between |
| * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ. |
| */ |
| static int pack_entry_find_prefix( |
| struct git_pack_entry *e, |
| struct pack_backend *backend, |
| const git_oid *short_oid, |
| size_t len); |
| |
| |
| |
| /*********************************************************** |
| * |
| * PACK WINDOW MANAGEMENT |
| * |
| ***********************************************************/ |
| |
| static int packfile_sort__cb(const void *a_, const void *b_) |
| { |
| const struct git_pack_file *a = a_; |
| const struct git_pack_file *b = b_; |
| int st; |
| |
| /* |
| * Local packs tend to contain objects specific to our |
| * variant of the project than remote ones. In addition, |
| * remote ones could be on a network mounted filesystem. |
| * Favor local ones for these reasons. |
| */ |
| st = a->pack_local - b->pack_local; |
| if (st) |
| return -st; |
| |
| /* |
| * Younger packs tend to contain more recent objects, |
| * and more recent objects tend to get accessed more |
| * often. |
| */ |
| if (a->mtime < b->mtime) |
| return 1; |
| else if (a->mtime == b->mtime) |
| return 0; |
| |
| return -1; |
| } |
| |
| |
| static int packfile_load__cb(void *data, git_buf *path) |
| { |
| struct pack_backend *backend = data; |
| struct git_pack_file *pack; |
| const char *path_str = git_buf_cstr(path); |
| size_t i, cmp_len = git_buf_len(path); |
| int error; |
| |
| if (cmp_len <= strlen(".idx") || git__suffixcmp(path_str, ".idx") != 0) |
| return 0; /* not an index */ |
| |
| cmp_len -= strlen(".idx"); |
| |
| for (i = 0; i < backend->packs.length; ++i) { |
| struct git_pack_file *p = git_vector_get(&backend->packs, i); |
| |
| if (strncmp(p->pack_name, path_str, cmp_len) == 0) |
| return 0; |
| } |
| |
| error = git_mwindow_get_pack(&pack, path->ptr); |
| |
| /* ignore missing .pack file as git does */ |
| if (error == GIT_ENOTFOUND) { |
| git_error_clear(); |
| return 0; |
| } |
| |
| if (!error) |
| error = git_vector_insert(&backend->packs, pack); |
| |
| return error; |
| |
| } |
| |
| static int pack_entry_find_inner( |
| struct git_pack_entry *e, |
| struct pack_backend *backend, |
| const git_oid *oid, |
| struct git_pack_file *last_found) |
| { |
| size_t i; |
| |
| if (last_found && |
| git_pack_entry_find(e, last_found, oid, GIT_OID_HEXSZ) == 0) |
| return 0; |
| |
| for (i = 0; i < backend->packs.length; ++i) { |
| struct git_pack_file *p; |
| |
| p = git_vector_get(&backend->packs, i); |
| if (p == last_found) |
| continue; |
| |
| if (git_pack_entry_find(e, p, oid, GIT_OID_HEXSZ) == 0) { |
| backend->last_found = p; |
| return 0; |
| } |
| } |
| |
| return -1; |
| } |
| |
| static int pack_entry_find(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *oid) |
| { |
| struct git_pack_file *last_found = backend->last_found; |
| |
| if (backend->last_found && |
| git_pack_entry_find(e, backend->last_found, oid, GIT_OID_HEXSZ) == 0) |
| return 0; |
| |
| if (!pack_entry_find_inner(e, backend, oid, last_found)) |
| return 0; |
| |
| return git_odb__error_notfound( |
| "failed to find pack entry", oid, GIT_OID_HEXSZ); |
| } |
| |
| static int pack_entry_find_prefix( |
| struct git_pack_entry *e, |
| struct pack_backend *backend, |
| const git_oid *short_oid, |
| size_t len) |
| { |
| int error; |
| size_t i; |
| git_oid found_full_oid = {{0}}; |
| bool found = false; |
| struct git_pack_file *last_found = backend->last_found; |
| |
| if (last_found) { |
| error = git_pack_entry_find(e, last_found, short_oid, len); |
| if (error == GIT_EAMBIGUOUS) |
| return error; |
| if (!error) { |
| git_oid_cpy(&found_full_oid, &e->sha1); |
| found = true; |
| } |
| } |
| |
| for (i = 0; i < backend->packs.length; ++i) { |
| struct git_pack_file *p; |
| |
| p = git_vector_get(&backend->packs, i); |
| if (p == last_found) |
| continue; |
| |
| error = git_pack_entry_find(e, p, short_oid, len); |
| if (error == GIT_EAMBIGUOUS) |
| return error; |
| if (!error) { |
| if (found && git_oid_cmp(&e->sha1, &found_full_oid)) |
| return git_odb__error_ambiguous("found multiple pack entries"); |
| git_oid_cpy(&found_full_oid, &e->sha1); |
| found = true; |
| backend->last_found = p; |
| } |
| } |
| |
| if (!found) |
| return git_odb__error_notfound("no matching pack entry for prefix", |
| short_oid, len); |
| else |
| return 0; |
| } |
| |
| |
| /*********************************************************** |
| * |
| * PACKED BACKEND PUBLIC API |
| * |
| * Implement the git_odb_backend API calls |
| * |
| ***********************************************************/ |
| static int pack_backend__refresh(git_odb_backend *backend_) |
| { |
| int error; |
| struct stat st; |
| git_buf path = GIT_BUF_INIT; |
| struct pack_backend *backend = (struct pack_backend *)backend_; |
| |
| if (backend->pack_folder == NULL) |
| return 0; |
| |
| if (p_stat(backend->pack_folder, &st) < 0 || !S_ISDIR(st.st_mode)) |
| return git_odb__error_notfound("failed to refresh packfiles", NULL, 0); |
| |
| git_buf_sets(&path, backend->pack_folder); |
| |
| /* reload all packs */ |
| error = git_path_direach(&path, 0, packfile_load__cb, backend); |
| |
| git_buf_dispose(&path); |
| git_vector_sort(&backend->packs); |
| |
| return error; |
| } |
| |
| static int pack_backend__read_header( |
| size_t *len_p, git_object_t *type_p, |
| struct git_odb_backend *backend, const git_oid *oid) |
| { |
| struct git_pack_entry e; |
| int error; |
| |
| assert(len_p && type_p && backend && oid); |
| |
| if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0) |
| return error; |
| |
| return git_packfile_resolve_header(len_p, type_p, e.p, e.offset); |
| } |
| |
| static int pack_backend__freshen( |
| git_odb_backend *backend, const git_oid *oid) |
| { |
| struct git_pack_entry e; |
| time_t now; |
| int error; |
| |
| if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0) |
| return error; |
| |
| now = time(NULL); |
| |
| if (e.p->last_freshen > now - FRESHEN_FREQUENCY) |
| return 0; |
| |
| if ((error = git_futils_touch(e.p->pack_name, &now)) < 0) |
| return error; |
| |
| e.p->last_freshen = now; |
| return 0; |
| } |
| |
| static int pack_backend__read( |
| void **buffer_p, size_t *len_p, git_object_t *type_p, |
| git_odb_backend *backend, const git_oid *oid) |
| { |
| struct git_pack_entry e; |
| git_rawobj raw = {NULL}; |
| int error; |
| |
| if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0 || |
| (error = git_packfile_unpack(&raw, e.p, &e.offset)) < 0) |
| return error; |
| |
| *buffer_p = raw.data; |
| *len_p = raw.len; |
| *type_p = raw.type; |
| |
| return 0; |
| } |
| |
| static int pack_backend__read_prefix( |
| git_oid *out_oid, |
| void **buffer_p, |
| size_t *len_p, |
| git_object_t *type_p, |
| git_odb_backend *backend, |
| const git_oid *short_oid, |
| size_t len) |
| { |
| int error = 0; |
| |
| if (len < GIT_OID_MINPREFIXLEN) |
| error = git_odb__error_ambiguous("prefix length too short"); |
| |
| else if (len >= GIT_OID_HEXSZ) { |
| /* We can fall back to regular read method */ |
| error = pack_backend__read(buffer_p, len_p, type_p, backend, short_oid); |
| if (!error) |
| git_oid_cpy(out_oid, short_oid); |
| } else { |
| struct git_pack_entry e; |
| git_rawobj raw = {NULL}; |
| |
| if ((error = pack_entry_find_prefix( |
| &e, (struct pack_backend *)backend, short_oid, len)) == 0 && |
| (error = git_packfile_unpack(&raw, e.p, &e.offset)) == 0) |
| { |
| *buffer_p = raw.data; |
| *len_p = raw.len; |
| *type_p = raw.type; |
| git_oid_cpy(out_oid, &e.sha1); |
| } |
| } |
| |
| return error; |
| } |
| |
| static int pack_backend__exists(git_odb_backend *backend, const git_oid *oid) |
| { |
| struct git_pack_entry e; |
| return pack_entry_find(&e, (struct pack_backend *)backend, oid) == 0; |
| } |
| |
| static int pack_backend__exists_prefix( |
| git_oid *out, git_odb_backend *backend, const git_oid *short_id, size_t len) |
| { |
| int error; |
| struct pack_backend *pb = (struct pack_backend *)backend; |
| struct git_pack_entry e = {0}; |
| |
| error = pack_entry_find_prefix(&e, pb, short_id, len); |
| git_oid_cpy(out, &e.sha1); |
| return error; |
| } |
| |
| static int pack_backend__foreach(git_odb_backend *_backend, git_odb_foreach_cb cb, void *data) |
| { |
| int error; |
| struct git_pack_file *p; |
| struct pack_backend *backend; |
| unsigned int i; |
| |
| assert(_backend && cb); |
| backend = (struct pack_backend *)_backend; |
| |
| /* Make sure we know about the packfiles */ |
| if ((error = pack_backend__refresh(_backend)) < 0) |
| return error; |
| |
| git_vector_foreach(&backend->packs, i, p) { |
| if ((error = git_pack_foreach_entry(p, cb, data)) != 0) |
| return error; |
| } |
| |
| return 0; |
| } |
| |
| static int pack_backend__writepack_append(struct git_odb_writepack *_writepack, const void *data, size_t size, git_indexer_progress *stats) |
| { |
| struct pack_writepack *writepack = (struct pack_writepack *)_writepack; |
| |
| assert(writepack); |
| |
| return git_indexer_append(writepack->indexer, data, size, stats); |
| } |
| |
| static int pack_backend__writepack_commit(struct git_odb_writepack *_writepack, git_indexer_progress *stats) |
| { |
| struct pack_writepack *writepack = (struct pack_writepack *)_writepack; |
| |
| assert(writepack); |
| |
| return git_indexer_commit(writepack->indexer, stats); |
| } |
| |
| static void pack_backend__writepack_free(struct git_odb_writepack *_writepack) |
| { |
| struct pack_writepack *writepack = (struct pack_writepack *)_writepack; |
| |
| assert(writepack); |
| |
| git_indexer_free(writepack->indexer); |
| git__free(writepack); |
| } |
| |
| static int pack_backend__writepack(struct git_odb_writepack **out, |
| git_odb_backend *_backend, |
| git_odb *odb, |
| git_indexer_progress_cb progress_cb, |
| void *progress_payload) |
| { |
| git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT; |
| struct pack_backend *backend; |
| struct pack_writepack *writepack; |
| |
| assert(out && _backend); |
| |
| *out = NULL; |
| |
| opts.progress_cb = progress_cb; |
| opts.progress_cb_payload = progress_payload; |
| |
| backend = (struct pack_backend *)_backend; |
| |
| writepack = git__calloc(1, sizeof(struct pack_writepack)); |
| GIT_ERROR_CHECK_ALLOC(writepack); |
| |
| if (git_indexer_new(&writepack->indexer, |
| backend->pack_folder, 0, odb, &opts) < 0) { |
| git__free(writepack); |
| return -1; |
| } |
| |
| writepack->parent.backend = _backend; |
| writepack->parent.append = pack_backend__writepack_append; |
| writepack->parent.commit = pack_backend__writepack_commit; |
| writepack->parent.free = pack_backend__writepack_free; |
| |
| *out = (git_odb_writepack *)writepack; |
| |
| return 0; |
| } |
| |
| static void pack_backend__free(git_odb_backend *_backend) |
| { |
| struct pack_backend *backend; |
| size_t i; |
| |
| assert(_backend); |
| |
| backend = (struct pack_backend *)_backend; |
| |
| for (i = 0; i < backend->packs.length; ++i) { |
| struct git_pack_file *p = git_vector_get(&backend->packs, i); |
| git_mwindow_put_pack(p); |
| } |
| |
| git_vector_free(&backend->packs); |
| git__free(backend->pack_folder); |
| git__free(backend); |
| } |
| |
| static int pack_backend__alloc(struct pack_backend **out, size_t initial_size) |
| { |
| struct pack_backend *backend = git__calloc(1, sizeof(struct pack_backend)); |
| GIT_ERROR_CHECK_ALLOC(backend); |
| |
| if (git_vector_init(&backend->packs, initial_size, packfile_sort__cb) < 0) { |
| git__free(backend); |
| return -1; |
| } |
| |
| backend->parent.version = GIT_ODB_BACKEND_VERSION; |
| |
| backend->parent.read = &pack_backend__read; |
| backend->parent.read_prefix = &pack_backend__read_prefix; |
| backend->parent.read_header = &pack_backend__read_header; |
| backend->parent.exists = &pack_backend__exists; |
| backend->parent.exists_prefix = &pack_backend__exists_prefix; |
| backend->parent.refresh = &pack_backend__refresh; |
| backend->parent.foreach = &pack_backend__foreach; |
| backend->parent.writepack = &pack_backend__writepack; |
| backend->parent.freshen = &pack_backend__freshen; |
| backend->parent.free = &pack_backend__free; |
| |
| *out = backend; |
| return 0; |
| } |
| |
| int git_odb_backend_one_pack(git_odb_backend **backend_out, const char *idx) |
| { |
| struct pack_backend *backend = NULL; |
| struct git_pack_file *packfile = NULL; |
| |
| if (pack_backend__alloc(&backend, 1) < 0) |
| return -1; |
| |
| if (git_mwindow_get_pack(&packfile, idx) < 0 || |
| git_vector_insert(&backend->packs, packfile) < 0) |
| { |
| pack_backend__free((git_odb_backend *)backend); |
| return -1; |
| } |
| |
| *backend_out = (git_odb_backend *)backend; |
| return 0; |
| } |
| |
| int git_odb_backend_pack(git_odb_backend **backend_out, const char *objects_dir) |
| { |
| int error = 0; |
| struct pack_backend *backend = NULL; |
| git_buf path = GIT_BUF_INIT; |
| |
| if (pack_backend__alloc(&backend, 8) < 0) |
| return -1; |
| |
| if (!(error = git_buf_joinpath(&path, objects_dir, "pack")) && |
| git_path_isdir(git_buf_cstr(&path))) |
| { |
| backend->pack_folder = git_buf_detach(&path); |
| error = pack_backend__refresh((git_odb_backend *)backend); |
| } |
| |
| if (error < 0) { |
| pack_backend__free((git_odb_backend *)backend); |
| backend = NULL; |
| } |
| |
| *backend_out = (git_odb_backend *)backend; |
| |
| git_buf_dispose(&path); |
| |
| return error; |
| } |