| /* |
| * Copyright (C) the libgit2 contributors. All rights reserved. |
| * |
| * This file is part of libgit2, distributed under the GNU GPL v2 with |
| * a Linking Exception. For full terms see the included COPYING file. |
| */ |
| |
| #include "indexer.h" |
| |
| #include "git2/indexer.h" |
| #include "git2/object.h" |
| |
| #include "commit.h" |
| #include "tree.h" |
| #include "tag.h" |
| #include "pack.h" |
| #include "mwindow.h" |
| #include "posix.h" |
| #include "pack.h" |
| #include "filebuf.h" |
| #include "oid.h" |
| #include "oidarray.h" |
| #include "oidmap.h" |
| #include "zstream.h" |
| #include "object.h" |
| |
| extern git_mutex git__mwindow_mutex; |
| |
| size_t git_indexer__max_objects = UINT32_MAX; |
| |
| #define UINT31_MAX (0x7FFFFFFF) |
| |
| struct entry { |
| git_oid oid; |
| uint32_t crc; |
| uint32_t offset; |
| uint64_t offset_long; |
| }; |
| |
| struct git_indexer { |
| unsigned int parsed_header :1, |
| pack_committed :1, |
| have_stream :1, |
| have_delta :1, |
| do_fsync :1, |
| do_verify :1; |
| struct git_pack_header hdr; |
| struct git_pack_file *pack; |
| unsigned int mode; |
| off64_t off; |
| off64_t entry_start; |
| git_object_t entry_type; |
| git_buf entry_data; |
| git_packfile_stream stream; |
| size_t nr_objects; |
| git_vector objects; |
| git_vector deltas; |
| unsigned int fanout[256]; |
| git_hash_ctx hash_ctx; |
| git_oid hash; |
| git_indexer_progress_cb progress_cb; |
| void *progress_payload; |
| char objbuf[8*1024]; |
| |
| /* OIDs referenced from pack objects. Used for verification. */ |
| git_oidmap *expected_oids; |
| |
| /* Needed to look up objects which we want to inject to fix a thin pack */ |
| git_odb *odb; |
| |
| /* Fields for calculating the packfile trailer (hash of everything before it) */ |
| char inbuf[GIT_OID_RAWSZ]; |
| size_t inbuf_len; |
| git_hash_ctx trailer; |
| }; |
| |
| struct delta_info { |
| off64_t delta_off; |
| }; |
| |
| const git_oid *git_indexer_hash(const git_indexer *idx) |
| { |
| return &idx->hash; |
| } |
| |
| static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack) |
| { |
| int error; |
| git_map map; |
| |
| if ((error = p_mmap(&map, sizeof(*hdr), GIT_PROT_READ, GIT_MAP_SHARED, pack->mwf.fd, 0)) < 0) |
| return error; |
| |
| memcpy(hdr, map.data, sizeof(*hdr)); |
| p_munmap(&map); |
| |
| /* Verify we recognize this pack file format. */ |
| if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) { |
| git_error_set(GIT_ERROR_INDEXER, "wrong pack signature"); |
| return -1; |
| } |
| |
| if (!pack_version_ok(hdr->hdr_version)) { |
| git_error_set(GIT_ERROR_INDEXER, "wrong pack version"); |
| return -1; |
| } |
| |
| return 0; |
| } |
| |
| static int objects_cmp(const void *a, const void *b) |
| { |
| const struct entry *entrya = a; |
| const struct entry *entryb = b; |
| |
| return git_oid__cmp(&entrya->oid, &entryb->oid); |
| } |
| |
| int git_indexer_options_init(git_indexer_options *opts, unsigned int version) |
| { |
| GIT_INIT_STRUCTURE_FROM_TEMPLATE( |
| opts, version, git_indexer_options, GIT_INDEXER_OPTIONS_INIT); |
| return 0; |
| } |
| |
| int git_indexer_init_options(git_indexer_options *opts, unsigned int version) |
| { |
| return git_indexer_options_init(opts, version); |
| } |
| |
| int git_indexer_new( |
| git_indexer **out, |
| const char *prefix, |
| unsigned int mode, |
| git_odb *odb, |
| git_indexer_options *in_opts) |
| { |
| git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT; |
| git_indexer *idx; |
| git_buf path = GIT_BUF_INIT, tmp_path = GIT_BUF_INIT; |
| static const char suff[] = "/pack"; |
| int error, fd = -1; |
| |
| if (in_opts) |
| memcpy(&opts, in_opts, sizeof(opts)); |
| |
| idx = git__calloc(1, sizeof(git_indexer)); |
| GIT_ERROR_CHECK_ALLOC(idx); |
| idx->odb = odb; |
| idx->progress_cb = opts.progress_cb; |
| idx->progress_payload = opts.progress_cb_payload; |
| idx->mode = mode ? mode : GIT_PACK_FILE_MODE; |
| git_buf_init(&idx->entry_data, 0); |
| |
| if ((error = git_hash_ctx_init(&idx->hash_ctx)) < 0 || |
| (error = git_hash_ctx_init(&idx->trailer)) < 0 || |
| (error = git_oidmap_new(&idx->expected_oids)) < 0) |
| goto cleanup; |
| |
| idx->do_verify = opts.verify; |
| |
| if (git_repository__fsync_gitdir) |
| idx->do_fsync = 1; |
| |
| error = git_buf_joinpath(&path, prefix, suff); |
| if (error < 0) |
| goto cleanup; |
| |
| fd = git_futils_mktmp(&tmp_path, git_buf_cstr(&path), idx->mode); |
| git_buf_dispose(&path); |
| if (fd < 0) |
| goto cleanup; |
| |
| error = git_packfile_alloc(&idx->pack, git_buf_cstr(&tmp_path)); |
| git_buf_dispose(&tmp_path); |
| |
| if (error < 0) |
| goto cleanup; |
| |
| idx->pack->mwf.fd = fd; |
| if ((error = git_mwindow_file_register(&idx->pack->mwf)) < 0) |
| goto cleanup; |
| |
| *out = idx; |
| return 0; |
| |
| cleanup: |
| if (fd != -1) |
| p_close(fd); |
| |
| if (git_buf_len(&tmp_path) > 0) |
| p_unlink(git_buf_cstr(&tmp_path)); |
| |
| if (idx->pack != NULL) |
| p_unlink(idx->pack->pack_name); |
| |
| git_buf_dispose(&path); |
| git_buf_dispose(&tmp_path); |
| git__free(idx); |
| return -1; |
| } |
| |
| void git_indexer__set_fsync(git_indexer *idx, int do_fsync) |
| { |
| idx->do_fsync = !!do_fsync; |
| } |
| |
| /* Try to store the delta so we can try to resolve it later */ |
| static int store_delta(git_indexer *idx) |
| { |
| struct delta_info *delta; |
| |
| delta = git__calloc(1, sizeof(struct delta_info)); |
| GIT_ERROR_CHECK_ALLOC(delta); |
| delta->delta_off = idx->entry_start; |
| |
| if (git_vector_insert(&idx->deltas, delta) < 0) |
| return -1; |
| |
| return 0; |
| } |
| |
| static int hash_header(git_hash_ctx *ctx, off64_t len, git_object_t type) |
| { |
| char buffer[64]; |
| size_t hdrlen; |
| int error; |
| |
| if ((error = git_odb__format_object_header(&hdrlen, |
| buffer, sizeof(buffer), (size_t)len, type)) < 0) |
| return error; |
| |
| return git_hash_update(ctx, buffer, hdrlen); |
| } |
| |
| static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream) |
| { |
| ssize_t read; |
| |
| assert(idx && stream); |
| |
| do { |
| if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0) |
| break; |
| |
| if (idx->do_verify) |
| git_buf_put(&idx->entry_data, idx->objbuf, read); |
| |
| git_hash_update(&idx->hash_ctx, idx->objbuf, read); |
| } while (read > 0); |
| |
| if (read < 0) |
| return (int)read; |
| |
| return 0; |
| } |
| |
| /* In order to create the packfile stream, we need to skip over the delta base description */ |
| static int advance_delta_offset(git_indexer *idx, git_object_t type) |
| { |
| git_mwindow *w = NULL; |
| |
| assert(type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA); |
| |
| if (type == GIT_OBJECT_REF_DELTA) { |
| idx->off += GIT_OID_RAWSZ; |
| } else { |
| off64_t base_off = get_delta_base(idx->pack, &w, &idx->off, type, idx->entry_start); |
| git_mwindow_close(&w); |
| if (base_off < 0) |
| return (int)base_off; |
| } |
| |
| return 0; |
| } |
| |
| /* Read from the stream and discard any output */ |
| static int read_object_stream(git_indexer *idx, git_packfile_stream *stream) |
| { |
| ssize_t read; |
| |
| assert(stream); |
| |
| do { |
| read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf)); |
| } while (read > 0); |
| |
| if (read < 0) |
| return (int)read; |
| |
| return 0; |
| } |
| |
| static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, off64_t start, off64_t size) |
| { |
| void *ptr; |
| uint32_t crc; |
| unsigned int left, len; |
| git_mwindow *w = NULL; |
| |
| crc = crc32(0L, Z_NULL, 0); |
| while (size) { |
| ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left); |
| if (ptr == NULL) |
| return -1; |
| |
| len = min(left, (unsigned int)size); |
| crc = crc32(crc, ptr, len); |
| size -= len; |
| start += len; |
| git_mwindow_close(&w); |
| } |
| |
| *crc_out = htonl(crc); |
| return 0; |
| } |
| |
| static int add_expected_oid(git_indexer *idx, const git_oid *oid) |
| { |
| /* |
| * If we know about that object because it is stored in our ODB or |
| * because we have already processed it as part of our pack file, we do |
| * not have to expect it. |
| */ |
| if ((!idx->odb || !git_odb_exists(idx->odb, oid)) && |
| !git_oidmap_exists(idx->pack->idx_cache, oid) && |
| !git_oidmap_exists(idx->expected_oids, oid)) { |
| git_oid *dup = git__malloc(sizeof(*oid)); |
| GIT_ERROR_CHECK_ALLOC(dup); |
| git_oid_cpy(dup, oid); |
| return git_oidmap_set(idx->expected_oids, dup, dup); |
| } |
| |
| return 0; |
| } |
| |
| static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj) |
| { |
| git_object *object; |
| git_oid *expected; |
| int error; |
| |
| if (obj->type != GIT_OBJECT_BLOB && |
| obj->type != GIT_OBJECT_TREE && |
| obj->type != GIT_OBJECT_COMMIT && |
| obj->type != GIT_OBJECT_TAG) |
| return 0; |
| |
| if ((error = git_object__from_raw(&object, obj->data, obj->len, obj->type)) < 0) |
| goto out; |
| |
| if ((expected = git_oidmap_get(idx->expected_oids, &object->cached.oid)) != NULL) { |
| git_oidmap_delete(idx->expected_oids, &object->cached.oid); |
| git__free(expected); |
| } |
| |
| /* |
| * Check whether this is a known object. If so, we can just continue as |
| * we assume that the ODB has a complete graph. |
| */ |
| if (idx->odb && git_odb_exists(idx->odb, &object->cached.oid)) |
| return 0; |
| |
| switch (obj->type) { |
| case GIT_OBJECT_TREE: |
| { |
| git_tree *tree = (git_tree *) object; |
| git_tree_entry *entry; |
| size_t i; |
| |
| git_array_foreach(tree->entries, i, entry) |
| if (add_expected_oid(idx, entry->oid) < 0) |
| goto out; |
| |
| break; |
| } |
| case GIT_OBJECT_COMMIT: |
| { |
| git_commit *commit = (git_commit *) object; |
| git_oid *parent_oid; |
| size_t i; |
| |
| git_array_foreach(commit->parent_ids, i, parent_oid) |
| if (add_expected_oid(idx, parent_oid) < 0) |
| goto out; |
| |
| if (add_expected_oid(idx, &commit->tree_id) < 0) |
| goto out; |
| |
| break; |
| } |
| case GIT_OBJECT_TAG: |
| { |
| git_tag *tag = (git_tag *) object; |
| |
| if (add_expected_oid(idx, &tag->target) < 0) |
| goto out; |
| |
| break; |
| } |
| case GIT_OBJECT_BLOB: |
| default: |
| break; |
| } |
| |
| out: |
| git_object_free(object); |
| |
| return error; |
| } |
| |
| static int store_object(git_indexer *idx) |
| { |
| int i, error; |
| git_oid oid; |
| struct entry *entry; |
| off64_t entry_size; |
| struct git_pack_entry *pentry; |
| off64_t entry_start = idx->entry_start; |
| |
| entry = git__calloc(1, sizeof(*entry)); |
| GIT_ERROR_CHECK_ALLOC(entry); |
| |
| pentry = git__calloc(1, sizeof(struct git_pack_entry)); |
| GIT_ERROR_CHECK_ALLOC(pentry); |
| |
| git_hash_final(&oid, &idx->hash_ctx); |
| entry_size = idx->off - entry_start; |
| if (entry_start > UINT31_MAX) { |
| entry->offset = UINT32_MAX; |
| entry->offset_long = entry_start; |
| } else { |
| entry->offset = (uint32_t)entry_start; |
| } |
| |
| if (idx->do_verify) { |
| git_rawobj rawobj = { |
| idx->entry_data.ptr, |
| idx->entry_data.size, |
| idx->entry_type |
| }; |
| |
| if ((error = check_object_connectivity(idx, &rawobj)) < 0) |
| goto on_error; |
| } |
| |
| git_oid_cpy(&pentry->sha1, &oid); |
| pentry->offset = entry_start; |
| |
| if (git_oidmap_exists(idx->pack->idx_cache, &pentry->sha1)) { |
| git_error_set(GIT_ERROR_INDEXER, "duplicate object %s found in pack", git_oid_tostr_s(&pentry->sha1)); |
| git__free(pentry); |
| goto on_error; |
| } |
| |
| if ((error = git_oidmap_set(idx->pack->idx_cache, &pentry->sha1, pentry)) < 0) { |
| git__free(pentry); |
| git_error_set_oom(); |
| goto on_error; |
| } |
| |
| git_oid_cpy(&entry->oid, &oid); |
| |
| if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0) |
| goto on_error; |
| |
| /* Add the object to the list */ |
| if (git_vector_insert(&idx->objects, entry) < 0) |
| goto on_error; |
| |
| for (i = oid.id[0]; i < 256; ++i) { |
| idx->fanout[i]++; |
| } |
| |
| return 0; |
| |
| on_error: |
| git__free(entry); |
| |
| return -1; |
| } |
| |
| GIT_INLINE(bool) has_entry(git_indexer *idx, git_oid *id) |
| { |
| return git_oidmap_exists(idx->pack->idx_cache, id); |
| } |
| |
| static int save_entry(git_indexer *idx, struct entry *entry, struct git_pack_entry *pentry, off64_t entry_start) |
| { |
| int i; |
| |
| if (entry_start > UINT31_MAX) { |
| entry->offset = UINT32_MAX; |
| entry->offset_long = entry_start; |
| } else { |
| entry->offset = (uint32_t)entry_start; |
| } |
| |
| pentry->offset = entry_start; |
| |
| if (git_oidmap_exists(idx->pack->idx_cache, &pentry->sha1) || |
| git_oidmap_set(idx->pack->idx_cache, &pentry->sha1, pentry) < 0) { |
| git_error_set(GIT_ERROR_INDEXER, "cannot insert object into pack"); |
| return -1; |
| } |
| |
| /* Add the object to the list */ |
| if (git_vector_insert(&idx->objects, entry) < 0) |
| return -1; |
| |
| for (i = entry->oid.id[0]; i < 256; ++i) { |
| idx->fanout[i]++; |
| } |
| |
| return 0; |
| } |
| |
| static int hash_and_save(git_indexer *idx, git_rawobj *obj, off64_t entry_start) |
| { |
| git_oid oid; |
| size_t entry_size; |
| struct entry *entry; |
| struct git_pack_entry *pentry = NULL; |
| |
| entry = git__calloc(1, sizeof(*entry)); |
| GIT_ERROR_CHECK_ALLOC(entry); |
| |
| if (git_odb__hashobj(&oid, obj) < 0) { |
| git_error_set(GIT_ERROR_INDEXER, "failed to hash object"); |
| goto on_error; |
| } |
| |
| pentry = git__calloc(1, sizeof(struct git_pack_entry)); |
| GIT_ERROR_CHECK_ALLOC(pentry); |
| |
| git_oid_cpy(&pentry->sha1, &oid); |
| git_oid_cpy(&entry->oid, &oid); |
| entry->crc = crc32(0L, Z_NULL, 0); |
| |
| entry_size = (size_t)(idx->off - entry_start); |
| if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0) |
| goto on_error; |
| |
| return save_entry(idx, entry, pentry, entry_start); |
| |
| on_error: |
| git__free(pentry); |
| git__free(entry); |
| git__free(obj->data); |
| return -1; |
| } |
| |
| static int do_progress_callback(git_indexer *idx, git_indexer_progress *stats) |
| { |
| if (idx->progress_cb) |
| return git_error_set_after_callback_function( |
| idx->progress_cb(stats, idx->progress_payload), |
| "indexer progress"); |
| return 0; |
| } |
| |
| /* Hash everything but the last 20B of input */ |
| static void hash_partially(git_indexer *idx, const uint8_t *data, size_t size) |
| { |
| size_t to_expell, to_keep; |
| |
| if (size == 0) |
| return; |
| |
| /* Easy case, dump the buffer and the data minus the last 20 bytes */ |
| if (size >= GIT_OID_RAWSZ) { |
| git_hash_update(&idx->trailer, idx->inbuf, idx->inbuf_len); |
| git_hash_update(&idx->trailer, data, size - GIT_OID_RAWSZ); |
| |
| data += size - GIT_OID_RAWSZ; |
| memcpy(idx->inbuf, data, GIT_OID_RAWSZ); |
| idx->inbuf_len = GIT_OID_RAWSZ; |
| return; |
| } |
| |
| /* We can just append */ |
| if (idx->inbuf_len + size <= GIT_OID_RAWSZ) { |
| memcpy(idx->inbuf + idx->inbuf_len, data, size); |
| idx->inbuf_len += size; |
| return; |
| } |
| |
| /* We need to partially drain the buffer and then append */ |
| to_keep = GIT_OID_RAWSZ - size; |
| to_expell = idx->inbuf_len - to_keep; |
| |
| git_hash_update(&idx->trailer, idx->inbuf, to_expell); |
| |
| memmove(idx->inbuf, idx->inbuf + to_expell, to_keep); |
| memcpy(idx->inbuf + to_keep, data, size); |
| idx->inbuf_len += size - to_expell; |
| } |
| |
| static int write_at(git_indexer *idx, const void *data, off64_t offset, size_t size) |
| { |
| git_file fd = idx->pack->mwf.fd; |
| size_t mmap_alignment; |
| size_t page_offset; |
| off64_t page_start; |
| unsigned char *map_data; |
| git_map map; |
| int error; |
| |
| assert(data && size); |
| |
| if ((error = git__mmap_alignment(&mmap_alignment)) < 0) |
| return error; |
| |
| /* the offset needs to be at the mmap boundary for the platform */ |
| page_offset = offset % mmap_alignment; |
| page_start = offset - page_offset; |
| |
| if ((error = p_mmap(&map, page_offset + size, GIT_PROT_WRITE, GIT_MAP_SHARED, fd, page_start)) < 0) |
| return error; |
| |
| map_data = (unsigned char *)map.data; |
| memcpy(map_data + page_offset, data, size); |
| p_munmap(&map); |
| |
| return 0; |
| } |
| |
| static int append_to_pack(git_indexer *idx, const void *data, size_t size) |
| { |
| off64_t new_size; |
| size_t mmap_alignment; |
| size_t page_offset; |
| off64_t page_start; |
| off64_t current_size = idx->pack->mwf.size; |
| int fd = idx->pack->mwf.fd; |
| int error; |
| |
| if (!size) |
| return 0; |
| |
| if ((error = git__mmap_alignment(&mmap_alignment)) < 0) |
| return error; |
| |
| /* Write a single byte to force the file system to allocate space now or |
| * report an error, since we can't report errors when writing using mmap. |
| * Round the size up to the nearest page so that we only need to perform file |
| * I/O when we add a page, instead of whenever we write even a single byte. */ |
| new_size = current_size + size; |
| page_offset = new_size % mmap_alignment; |
| page_start = new_size - page_offset; |
| |
| if (p_lseek(fd, page_start + mmap_alignment - 1, SEEK_SET) < 0 || |
| p_write(idx->pack->mwf.fd, data, 1) < 0) { |
| git_error_set(GIT_ERROR_OS, "cannot extend packfile '%s'", idx->pack->pack_name); |
| return -1; |
| } |
| |
| return write_at(idx, data, idx->pack->mwf.size, size); |
| } |
| |
| static int read_stream_object(git_indexer *idx, git_indexer_progress *stats) |
| { |
| git_packfile_stream *stream = &idx->stream; |
| off64_t entry_start = idx->off; |
| size_t entry_size; |
| git_object_t type; |
| git_mwindow *w = NULL; |
| int error; |
| |
| if (idx->pack->mwf.size <= idx->off + 20) |
| return GIT_EBUFS; |
| |
| if (!idx->have_stream) { |
| error = git_packfile_unpack_header(&entry_size, &type, &idx->pack->mwf, &w, &idx->off); |
| if (error == GIT_EBUFS) { |
| idx->off = entry_start; |
| return error; |
| } |
| if (error < 0) |
| return error; |
| |
| git_mwindow_close(&w); |
| idx->entry_start = entry_start; |
| git_hash_init(&idx->hash_ctx); |
| git_buf_clear(&idx->entry_data); |
| |
| if (type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA) { |
| error = advance_delta_offset(idx, type); |
| if (error == GIT_EBUFS) { |
| idx->off = entry_start; |
| return error; |
| } |
| if (error < 0) |
| return error; |
| |
| idx->have_delta = 1; |
| } else { |
| idx->have_delta = 0; |
| |
| error = hash_header(&idx->hash_ctx, entry_size, type); |
| if (error < 0) |
| return error; |
| } |
| |
| idx->have_stream = 1; |
| idx->entry_type = type; |
| |
| error = git_packfile_stream_open(stream, idx->pack, idx->off); |
| if (error < 0) |
| return error; |
| } |
| |
| if (idx->have_delta) { |
| error = read_object_stream(idx, stream); |
| } else { |
| error = hash_object_stream(idx, stream); |
| } |
| |
| idx->off = stream->curpos; |
| if (error == GIT_EBUFS) |
| return error; |
| |
| /* We want to free the stream reasorces no matter what here */ |
| idx->have_stream = 0; |
| git_packfile_stream_dispose(stream); |
| |
| if (error < 0) |
| return error; |
| |
| if (idx->have_delta) { |
| error = store_delta(idx); |
| } else { |
| error = store_object(idx); |
| } |
| |
| if (error < 0) |
| return error; |
| |
| if (!idx->have_delta) { |
| stats->indexed_objects++; |
| } |
| stats->received_objects++; |
| |
| if ((error = do_progress_callback(idx, stats)) != 0) |
| return error; |
| |
| return 0; |
| } |
| |
| int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_indexer_progress *stats) |
| { |
| int error = -1; |
| struct git_pack_header *hdr = &idx->hdr; |
| git_mwindow_file *mwf = &idx->pack->mwf; |
| |
| assert(idx && data && stats); |
| |
| if ((error = append_to_pack(idx, data, size)) < 0) |
| return error; |
| |
| hash_partially(idx, data, (int)size); |
| |
| /* Make sure we set the new size of the pack */ |
| idx->pack->mwf.size += size; |
| |
| if (!idx->parsed_header) { |
| unsigned int total_objects; |
| |
| if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header)) |
| return 0; |
| |
| if ((error = parse_header(&idx->hdr, idx->pack)) < 0) |
| return error; |
| |
| idx->parsed_header = 1; |
| idx->nr_objects = ntohl(hdr->hdr_entries); |
| idx->off = sizeof(struct git_pack_header); |
| |
| if (idx->nr_objects <= git_indexer__max_objects) { |
| total_objects = (unsigned int)idx->nr_objects; |
| } else { |
| git_error_set(GIT_ERROR_INDEXER, "too many objects"); |
| return -1; |
| } |
| |
| if (git_oidmap_new(&idx->pack->idx_cache) < 0) |
| return -1; |
| |
| idx->pack->has_cache = 1; |
| if (git_vector_init(&idx->objects, total_objects, objects_cmp) < 0) |
| return -1; |
| |
| if (git_vector_init(&idx->deltas, total_objects / 2, NULL) < 0) |
| return -1; |
| |
| stats->received_objects = 0; |
| stats->local_objects = 0; |
| stats->total_deltas = 0; |
| stats->indexed_deltas = 0; |
| stats->indexed_objects = 0; |
| stats->total_objects = total_objects; |
| |
| if ((error = do_progress_callback(idx, stats)) != 0) |
| return error; |
| } |
| |
| /* Now that we have data in the pack, let's try to parse it */ |
| |
| /* As the file grows any windows we try to use will be out of date */ |
| git_mwindow_free_all(mwf); |
| |
| while (stats->indexed_objects < idx->nr_objects) { |
| if ((error = read_stream_object(idx, stats)) != 0) { |
| if (error == GIT_EBUFS) |
| break; |
| else |
| goto on_error; |
| } |
| } |
| |
| return 0; |
| |
| on_error: |
| git_mwindow_free_all(mwf); |
| return error; |
| } |
| |
| static int index_path(git_buf *path, git_indexer *idx, const char *suffix) |
| { |
| const char prefix[] = "pack-"; |
| size_t slash = (size_t)path->size; |
| |
| /* search backwards for '/' */ |
| while (slash > 0 && path->ptr[slash - 1] != '/') |
| slash--; |
| |
| if (git_buf_grow(path, slash + 1 + strlen(prefix) + |
| GIT_OID_HEXSZ + strlen(suffix) + 1) < 0) |
| return -1; |
| |
| git_buf_truncate(path, slash); |
| git_buf_puts(path, prefix); |
| git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash); |
| path->size += GIT_OID_HEXSZ; |
| git_buf_puts(path, suffix); |
| |
| return git_buf_oom(path) ? -1 : 0; |
| } |
| |
| /** |
| * Rewind the packfile by the trailer, as we might need to fix the |
| * packfile by injecting objects at the tail and must overwrite it. |
| */ |
| static void seek_back_trailer(git_indexer *idx) |
| { |
| idx->pack->mwf.size -= GIT_OID_RAWSZ; |
| git_mwindow_free_all(&idx->pack->mwf); |
| } |
| |
| static int inject_object(git_indexer *idx, git_oid *id) |
| { |
| git_odb_object *obj; |
| struct entry *entry; |
| struct git_pack_entry *pentry = NULL; |
| git_oid foo = {{0}}; |
| unsigned char hdr[64]; |
| git_buf buf = GIT_BUF_INIT; |
| off64_t entry_start; |
| const void *data; |
| size_t len, hdr_len; |
| int error; |
| |
| seek_back_trailer(idx); |
| entry_start = idx->pack->mwf.size; |
| |
| if (git_odb_read(&obj, idx->odb, id) < 0) { |
| git_error_set(GIT_ERROR_INDEXER, "missing delta bases"); |
| return -1; |
| } |
| |
| data = git_odb_object_data(obj); |
| len = git_odb_object_size(obj); |
| |
| entry = git__calloc(1, sizeof(*entry)); |
| GIT_ERROR_CHECK_ALLOC(entry); |
| |
| entry->crc = crc32(0L, Z_NULL, 0); |
| |
| /* Write out the object header */ |
| hdr_len = git_packfile__object_header(hdr, len, git_odb_object_type(obj)); |
| if ((error = append_to_pack(idx, hdr, hdr_len)) < 0) |
| goto cleanup; |
| |
| idx->pack->mwf.size += hdr_len; |
| entry->crc = crc32(entry->crc, hdr, (uInt)hdr_len); |
| |
| if ((error = git_zstream_deflatebuf(&buf, data, len)) < 0) |
| goto cleanup; |
| |
| /* And then the compressed object */ |
| if ((error = append_to_pack(idx, buf.ptr, buf.size)) < 0) |
| goto cleanup; |
| |
| idx->pack->mwf.size += buf.size; |
| entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, (uInt)buf.size)); |
| git_buf_dispose(&buf); |
| |
| /* Write a fake trailer so the pack functions play ball */ |
| |
| if ((error = append_to_pack(idx, &foo, GIT_OID_RAWSZ)) < 0) |
| goto cleanup; |
| |
| idx->pack->mwf.size += GIT_OID_RAWSZ; |
| |
| pentry = git__calloc(1, sizeof(struct git_pack_entry)); |
| GIT_ERROR_CHECK_ALLOC(pentry); |
| |
| git_oid_cpy(&pentry->sha1, id); |
| git_oid_cpy(&entry->oid, id); |
| idx->off = entry_start + hdr_len + len; |
| |
| error = save_entry(idx, entry, pentry, entry_start); |
| |
| cleanup: |
| if (error) { |
| git__free(entry); |
| git__free(pentry); |
| } |
| |
| git_odb_object_free(obj); |
| return error; |
| } |
| |
| static int fix_thin_pack(git_indexer *idx, git_indexer_progress *stats) |
| { |
| int error, found_ref_delta = 0; |
| unsigned int i; |
| struct delta_info *delta; |
| size_t size; |
| git_object_t type; |
| git_mwindow *w = NULL; |
| off64_t curpos = 0; |
| unsigned char *base_info; |
| unsigned int left = 0; |
| git_oid base; |
| |
| assert(git_vector_length(&idx->deltas) > 0); |
| |
| if (idx->odb == NULL) { |
| git_error_set(GIT_ERROR_INDEXER, "cannot fix a thin pack without an ODB"); |
| return -1; |
| } |
| |
| /* Loop until we find the first REF delta */ |
| git_vector_foreach(&idx->deltas, i, delta) { |
| if (!delta) |
| continue; |
| |
| curpos = delta->delta_off; |
| error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos); |
| if (error < 0) |
| return error; |
| |
| if (type == GIT_OBJECT_REF_DELTA) { |
| found_ref_delta = 1; |
| break; |
| } |
| } |
| |
| if (!found_ref_delta) { |
| git_error_set(GIT_ERROR_INDEXER, "no REF_DELTA found, cannot inject object"); |
| return -1; |
| } |
| |
| /* curpos now points to the base information, which is an OID */ |
| base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left); |
| if (base_info == NULL) { |
| git_error_set(GIT_ERROR_INDEXER, "failed to map delta information"); |
| return -1; |
| } |
| |
| git_oid_fromraw(&base, base_info); |
| git_mwindow_close(&w); |
| |
| if (has_entry(idx, &base)) |
| return 0; |
| |
| if (inject_object(idx, &base) < 0) |
| return -1; |
| |
| stats->local_objects++; |
| |
| return 0; |
| } |
| |
| static int resolve_deltas(git_indexer *idx, git_indexer_progress *stats) |
| { |
| unsigned int i; |
| int error; |
| struct delta_info *delta; |
| int progressed = 0, non_null = 0, progress_cb_result; |
| |
| while (idx->deltas.length > 0) { |
| progressed = 0; |
| non_null = 0; |
| git_vector_foreach(&idx->deltas, i, delta) { |
| git_rawobj obj = {0}; |
| |
| if (!delta) |
| continue; |
| |
| non_null = 1; |
| idx->off = delta->delta_off; |
| if ((error = git_packfile_unpack(&obj, idx->pack, &idx->off)) < 0) { |
| if (error == GIT_PASSTHROUGH) { |
| /* We have not seen the base object, we'll try again later. */ |
| continue; |
| } |
| return -1; |
| } |
| |
| if (idx->do_verify && check_object_connectivity(idx, &obj) < 0) |
| /* TODO: error? continue? */ |
| continue; |
| |
| if (hash_and_save(idx, &obj, delta->delta_off) < 0) |
| continue; |
| |
| git__free(obj.data); |
| stats->indexed_objects++; |
| stats->indexed_deltas++; |
| progressed = 1; |
| if ((progress_cb_result = do_progress_callback(idx, stats)) < 0) |
| return progress_cb_result; |
| |
| /* remove from the list */ |
| git_vector_set(NULL, &idx->deltas, i, NULL); |
| git__free(delta); |
| } |
| |
| /* if none were actually set, we're done */ |
| if (!non_null) |
| break; |
| |
| if (!progressed && (fix_thin_pack(idx, stats) < 0)) { |
| return -1; |
| } |
| } |
| |
| return 0; |
| } |
| |
| static int update_header_and_rehash(git_indexer *idx, git_indexer_progress *stats) |
| { |
| void *ptr; |
| size_t chunk = 1024*1024; |
| off64_t hashed = 0; |
| git_mwindow *w = NULL; |
| git_mwindow_file *mwf; |
| unsigned int left; |
| |
| mwf = &idx->pack->mwf; |
| |
| git_hash_init(&idx->trailer); |
| |
| |
| /* Update the header to include the numer of local objects we injected */ |
| idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects); |
| if (write_at(idx, &idx->hdr, 0, sizeof(struct git_pack_header)) < 0) |
| return -1; |
| |
| /* |
| * We now use the same technique as before to determine the |
| * hash. We keep reading up to the end and let |
| * hash_partially() keep the existing trailer out of the |
| * calculation. |
| */ |
| git_mwindow_free_all(mwf); |
| idx->inbuf_len = 0; |
| while (hashed < mwf->size) { |
| ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left); |
| if (ptr == NULL) |
| return -1; |
| |
| hash_partially(idx, ptr, left); |
| hashed += left; |
| |
| git_mwindow_close(&w); |
| } |
| |
| return 0; |
| } |
| |
| int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats) |
| { |
| git_mwindow *w = NULL; |
| unsigned int i, long_offsets = 0, left; |
| int error; |
| struct git_pack_idx_header hdr; |
| git_buf filename = GIT_BUF_INIT; |
| struct entry *entry; |
| git_oid trailer_hash, file_hash; |
| git_filebuf index_file = {0}; |
| void *packfile_trailer; |
| |
| if (!idx->parsed_header) { |
| git_error_set(GIT_ERROR_INDEXER, "incomplete pack header"); |
| return -1; |
| } |
| |
| /* Test for this before resolve_deltas(), as it plays with idx->off */ |
| if (idx->off + 20 < idx->pack->mwf.size) { |
| git_error_set(GIT_ERROR_INDEXER, "unexpected data at the end of the pack"); |
| return -1; |
| } |
| if (idx->off + 20 > idx->pack->mwf.size) { |
| git_error_set(GIT_ERROR_INDEXER, "missing trailer at the end of the pack"); |
| return -1; |
| } |
| |
| packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); |
| if (packfile_trailer == NULL) { |
| git_mwindow_close(&w); |
| goto on_error; |
| } |
| |
| /* Compare the packfile trailer as it was sent to us and what we calculated */ |
| git_oid_fromraw(&file_hash, packfile_trailer); |
| git_mwindow_close(&w); |
| |
| git_hash_final(&trailer_hash, &idx->trailer); |
| if (git_oid_cmp(&file_hash, &trailer_hash)) { |
| git_error_set(GIT_ERROR_INDEXER, "packfile trailer mismatch"); |
| return -1; |
| } |
| |
| /* Freeze the number of deltas */ |
| stats->total_deltas = stats->total_objects - stats->indexed_objects; |
| |
| if ((error = resolve_deltas(idx, stats)) < 0) |
| return error; |
| |
| if (stats->indexed_objects != stats->total_objects) { |
| git_error_set(GIT_ERROR_INDEXER, "early EOF"); |
| return -1; |
| } |
| |
| if (stats->local_objects > 0) { |
| if (update_header_and_rehash(idx, stats) < 0) |
| return -1; |
| |
| git_hash_final(&trailer_hash, &idx->trailer); |
| write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ); |
| } |
| |
| /* |
| * Is the resulting graph fully connected or are we still |
| * missing some objects? In the second case, we can |
| * bail out due to an incomplete and thus corrupt |
| * packfile. |
| */ |
| if (git_oidmap_size(idx->expected_oids) > 0) { |
| git_error_set(GIT_ERROR_INDEXER, "packfile is missing %"PRIuZ" objects", |
| git_oidmap_size(idx->expected_oids)); |
| return -1; |
| } |
| |
| git_vector_sort(&idx->objects); |
| |
| /* Use the trailer hash as the pack file name to ensure |
| * files with different contents have different names */ |
| git_oid_cpy(&idx->hash, &trailer_hash); |
| |
| git_buf_sets(&filename, idx->pack->pack_name); |
| git_buf_shorten(&filename, strlen("pack")); |
| git_buf_puts(&filename, "idx"); |
| if (git_buf_oom(&filename)) |
| return -1; |
| |
| if (git_filebuf_open(&index_file, filename.ptr, |
| GIT_FILEBUF_HASH_CONTENTS | |
| (idx->do_fsync ? GIT_FILEBUF_FSYNC : 0), |
| idx->mode) < 0) |
| goto on_error; |
| |
| /* Write out the header */ |
| hdr.idx_signature = htonl(PACK_IDX_SIGNATURE); |
| hdr.idx_version = htonl(2); |
| git_filebuf_write(&index_file, &hdr, sizeof(hdr)); |
| |
| /* Write out the fanout table */ |
| for (i = 0; i < 256; ++i) { |
| uint32_t n = htonl(idx->fanout[i]); |
| git_filebuf_write(&index_file, &n, sizeof(n)); |
| } |
| |
| /* Write out the object names (SHA-1 hashes) */ |
| git_vector_foreach(&idx->objects, i, entry) { |
| git_filebuf_write(&index_file, &entry->oid, sizeof(git_oid)); |
| } |
| |
| /* Write out the CRC32 values */ |
| git_vector_foreach(&idx->objects, i, entry) { |
| git_filebuf_write(&index_file, &entry->crc, sizeof(uint32_t)); |
| } |
| |
| /* Write out the offsets */ |
| git_vector_foreach(&idx->objects, i, entry) { |
| uint32_t n; |
| |
| if (entry->offset == UINT32_MAX) |
| n = htonl(0x80000000 | long_offsets++); |
| else |
| n = htonl(entry->offset); |
| |
| git_filebuf_write(&index_file, &n, sizeof(uint32_t)); |
| } |
| |
| /* Write out the long offsets */ |
| git_vector_foreach(&idx->objects, i, entry) { |
| uint32_t split[2]; |
| |
| if (entry->offset != UINT32_MAX) |
| continue; |
| |
| split[0] = htonl(entry->offset_long >> 32); |
| split[1] = htonl(entry->offset_long & 0xffffffff); |
| |
| git_filebuf_write(&index_file, &split, sizeof(uint32_t) * 2); |
| } |
| |
| /* Write out the packfile trailer to the index */ |
| if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0) |
| goto on_error; |
| |
| /* Write out the hash of the idx */ |
| if (git_filebuf_hash(&trailer_hash, &index_file) < 0) |
| goto on_error; |
| |
| git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid)); |
| |
| /* Figure out what the final name should be */ |
| if (index_path(&filename, idx, ".idx") < 0) |
| goto on_error; |
| |
| /* Commit file */ |
| if (git_filebuf_commit_at(&index_file, filename.ptr) < 0) |
| goto on_error; |
| |
| git_mwindow_free_all(&idx->pack->mwf); |
| |
| /* Truncate file to undo rounding up to next page_size in append_to_pack */ |
| if (p_ftruncate(idx->pack->mwf.fd, idx->pack->mwf.size) < 0) { |
| git_error_set(GIT_ERROR_OS, "failed to truncate pack file '%s'", idx->pack->pack_name); |
| return -1; |
| } |
| |
| if (idx->do_fsync && p_fsync(idx->pack->mwf.fd) < 0) { |
| git_error_set(GIT_ERROR_OS, "failed to fsync packfile"); |
| goto on_error; |
| } |
| |
| /* We need to close the descriptor here so Windows doesn't choke on commit_at */ |
| if (p_close(idx->pack->mwf.fd) < 0) { |
| git_error_set(GIT_ERROR_OS, "failed to close packfile"); |
| goto on_error; |
| } |
| |
| idx->pack->mwf.fd = -1; |
| |
| if (index_path(&filename, idx, ".pack") < 0) |
| goto on_error; |
| |
| /* And don't forget to rename the packfile to its new place. */ |
| if (p_rename(idx->pack->pack_name, git_buf_cstr(&filename)) < 0) |
| goto on_error; |
| |
| /* And fsync the parent directory if we're asked to. */ |
| if (idx->do_fsync && |
| git_futils_fsync_parent(git_buf_cstr(&filename)) < 0) |
| goto on_error; |
| |
| idx->pack_committed = 1; |
| |
| git_buf_dispose(&filename); |
| return 0; |
| |
| on_error: |
| git_mwindow_free_all(&idx->pack->mwf); |
| git_filebuf_cleanup(&index_file); |
| git_buf_dispose(&filename); |
| return -1; |
| } |
| |
| void git_indexer_free(git_indexer *idx) |
| { |
| const git_oid *key; |
| git_oid *value; |
| size_t iter; |
| |
| if (idx == NULL) |
| return; |
| |
| if (idx->have_stream) |
| git_packfile_stream_dispose(&idx->stream); |
| |
| git_vector_free_deep(&idx->objects); |
| |
| if (idx->pack->idx_cache) { |
| struct git_pack_entry *pentry; |
| git_oidmap_foreach_value(idx->pack->idx_cache, pentry, { |
| git__free(pentry); |
| }); |
| |
| git_oidmap_free(idx->pack->idx_cache); |
| } |
| |
| git_vector_free_deep(&idx->deltas); |
| |
| if (!git_mutex_lock(&git__mwindow_mutex)) { |
| if (!idx->pack_committed) |
| git_packfile_close(idx->pack, true); |
| |
| git_packfile_free(idx->pack); |
| git_mutex_unlock(&git__mwindow_mutex); |
| } |
| |
| iter = 0; |
| while (git_oidmap_iterate((void **) &value, idx->expected_oids, &iter, &key) == 0) |
| git__free(value); |
| |
| git_hash_ctx_cleanup(&idx->trailer); |
| git_hash_ctx_cleanup(&idx->hash_ctx); |
| git_buf_dispose(&idx->entry_data); |
| git_oidmap_free(idx->expected_oids); |
| git__free(idx); |
| } |