The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#define C_LUCY_COMPOUNDFILEWRITER
#include "Lucy/Util/ToolSet.h"

#include "Lucy/Store/CompoundFileWriter.h"
#include "Lucy/Store/Folder.h"
#include "Lucy/Store/InStream.h"
#include "Lucy/Store/OutStream.h"
#include "Lucy/Util/IndexFileNames.h"
#include "Lucy/Util/Json.h"

int32_t CFWriter_current_file_format = 2;

// Helper which does the heavy lifting for CFWriter_consolidate.
static void
S_do_consolidate(CompoundFileWriter *self);

// Clean up files which may be left over from previous merge attempts.
static void
S_clean_up_old_temp_files(CompoundFileWriter *self);

CompoundFileWriter*
CFWriter_new(Folder *folder) {
    CompoundFileWriter *self
        = (CompoundFileWriter*)VTable_Make_Obj(COMPOUNDFILEWRITER);
    return CFWriter_init(self, folder);
}

CompoundFileWriter*
CFWriter_init(CompoundFileWriter *self, Folder *folder) {
    self->folder = (Folder*)INCREF(folder);
    return self;
}

void
CFWriter_destroy(CompoundFileWriter *self) {
    DECREF(self->folder);
    SUPER_DESTROY(self, COMPOUNDFILEWRITER);
}

void
CFWriter_consolidate(CompoundFileWriter *self) {
    CharBuf *cfmeta_file = (CharBuf*)ZCB_WRAP_STR("cfmeta.json", 11);
    if (Folder_Exists(self->folder, cfmeta_file)) {
        THROW(ERR, "Merge already performed for %o",
              Folder_Get_Path(self->folder));
    }
    else {
        S_clean_up_old_temp_files(self);
        S_do_consolidate(self);
    }
}

static void
S_clean_up_old_temp_files(CompoundFileWriter *self) {
    Folder  *folder      = self->folder;
    CharBuf *cfmeta_temp = (CharBuf*)ZCB_WRAP_STR("cfmeta.json.temp", 16);
    CharBuf *cf_file     = (CharBuf*)ZCB_WRAP_STR("cf.dat", 6);

    if (Folder_Exists(folder, cf_file)) {
        if (!Folder_Delete(folder, cf_file)) {
            THROW(ERR, "Can't delete '%o'", cf_file);
        }
    }
    if (Folder_Exists(folder, cfmeta_temp)) {
        if (!Folder_Delete(folder, cfmeta_temp)) {
            THROW(ERR, "Can't delete '%o'", cfmeta_temp);
        }
    }
}

static void
S_do_consolidate(CompoundFileWriter *self) {
    Folder    *folder       = self->folder;
    Hash      *metadata     = Hash_new(0);
    Hash      *sub_files    = Hash_new(0);
    VArray    *files        = Folder_List(folder, NULL);
    VArray    *merged       = VA_new(VA_Get_Size(files));
    CharBuf   *cf_file      = (CharBuf*)ZCB_WRAP_STR("cf.dat", 6);
    OutStream *outstream    = Folder_Open_Out(folder, (CharBuf*)cf_file);
    uint32_t   i, max;
    bool_t     rename_success;

    if (!outstream) { RETHROW(INCREF(Err_get_error())); }

    // Start metadata.
    Hash_Store_Str(metadata, "files", 5, INCREF(sub_files));
    Hash_Store_Str(metadata, "format", 6,
                   (Obj*)CB_newf("%i32", CFWriter_current_file_format));

    CharBuf *infilepath = CB_new(30);
    size_t base_len = 0;
    VA_Sort(files, NULL, NULL);
    for (i = 0, max = VA_Get_Size(files); i < max; i++) {
        CharBuf *infilename = (CharBuf*)VA_Fetch(files, i);

        if (!CB_Ends_With_Str(infilename, ".json", 5)) {
            InStream *instream   = Folder_Open_In(folder, infilename);
            Hash     *file_data  = Hash_new(2);
            int64_t   offset, len;

            if (!instream) { RETHROW(INCREF(Err_get_error())); }

            // Absorb the file.
            offset = OutStream_Tell(outstream);
            OutStream_Absorb(outstream, instream);
            len = OutStream_Tell(outstream) - offset;

            // Record offset and length.
            Hash_Store_Str(file_data, "offset", 6,
                           (Obj*)CB_newf("%i64", offset));
            Hash_Store_Str(file_data, "length", 6,
                           (Obj*)CB_newf("%i64", len));
            CB_Set_Size(infilepath, base_len);
            CB_Cat(infilepath, infilename);
            Hash_Store(sub_files, (Obj*)infilepath, (Obj*)file_data);
            VA_Push(merged, INCREF(infilename));

            // Add filler NULL bytes so that every sub-file begins on a file
            // position multiple of 8.
            OutStream_Align(outstream, 8);

            InStream_Close(instream);
            DECREF(instream);
        }
    }
    DECREF(infilepath);

    // Write metadata to cfmeta file.
    CharBuf *cfmeta_temp = (CharBuf*)ZCB_WRAP_STR("cfmeta.json.temp", 16);
    CharBuf *cfmeta_file = (CharBuf*)ZCB_WRAP_STR("cfmeta.json", 11);
    Json_spew_json((Obj*)metadata, (Folder*)self->folder, cfmeta_temp);
    rename_success = Folder_Rename(self->folder, cfmeta_temp, cfmeta_file);
    if (!rename_success) { RETHROW(INCREF(Err_get_error())); }

    // Clean up.
    OutStream_Close(outstream);
    DECREF(outstream);
    DECREF(files);
    DECREF(metadata);
    /*
    CharBuf *merged_file;
    Obj     *ignore;
    Hash_Iterate(sub_files);
    while (Hash_Next(sub_files, (Obj**)&merged_file, &ignore)) {
        if (!Folder_Delete(folder, merged_file)) {
            CharBuf *mess = MAKE_MESS("Can't delete '%o'", merged_file);
            DECREF(sub_files);
            Err_throw_mess(ERR, mess);
        }
    }
    */
    DECREF(sub_files);
    for (uint32_t i = 0, max = VA_Get_Size(merged); i < max; i++) {
        CharBuf *merged_file = (CharBuf*)VA_Fetch(merged, i);
        if (!Folder_Delete(folder, merged_file)) {
            CharBuf *mess = MAKE_MESS("Can't delete '%o'", merged_file);
            DECREF(merged);
            Err_throw_mess(ERR, mess);
        }
    }
    DECREF(merged);
}