#include "dataquery.h"

#include <algorithm>
#include <ranges>
#include <iostream>
#include <fstream>

#include "escape.h"

/* For the first step, the JSON text is split into a vector by using " as a separator.
 * After the step, square brackets, braces and commas that perform functions in JSON
 * are exposed in the beginning or the end of the elements of the vector.
 * For the second step, square brackets and braces are stripped step by step.
 * Each square bracket or brace becomes a separate element of the vector, but
 * [] or {} becomes an element as a whole, because [] and {} perform as a value
 * instead of a function.
 * Commas that perform a function, as well as spaces used for keeping the layout,
 * are all removed.
 * After the above process, the vector dict shows all keys and values with clear and
 * simple hierarchy.
 * Each square bracket or brace that performs a function becomes a separate element of
 * the vector, making the future query simple and easy to handle.
 */
std::vector<std::string> DataQuery::marks = {"[", "]", "{", "}", ","};

std::string DataQuery::removeTwoSideSpace(std::string string) {
    while (string[0] == ' ') {
        string.erase(0, 1);
    }

    while (string[string.size() - 1] == ' ') {
        string.erase(string.size() - 1, 1);
    }

    return string;
}

std::vector<std::string> DataQuery::splitJSON(const std::string& text) {
    auto splitJSONVector = text
    | std::views::split('"')
    | std::ranges::to<std::vector<std::string>>();

    return splitJSONVector;
}

std::vector<std::string> DataQuery::splitNest(std::string text) {
    std::vector<std::string> nest;
    std::string lastChar;
    lastChar += text[text.size() - 1];

    while (std::ranges::contains(marks, lastChar)) {
        nest.emplace(nest.begin(), lastChar);
        text.erase(text.size() - 1, 1);
        text = removeTwoSideSpace(text);

        lastChar = "";
        lastChar += text[text.size() - 1];
    }

    if (!text.empty()) {
        nest.emplace(nest.begin(), text);
    }

    return nest;
}

// All line breaks are removed, and the JSON text becomes a single line.
std::string DataQuery::loadJSONFile(const std::filesystem::path& filePath) {
    std::ifstream file(filePath);
    std::string line;
    std::string jsonTextCombined;
    while (getline(file, line)) {
        jsonTextCombined += line;
    }

    return jsonTextCombined;
}

std::string DataQuery::loadJSONText(const std::string& jsonTextRaw) {
    std::istringstream text(jsonTextRaw);
    std::string line;
    std::string jsonTextCombined;
    while (getline(text, line)) {
        jsonTextCombined += line;
    }

    return jsonTextCombined;
};

bool DataQuery::compareVectors(
    const std::vector<std::string>& vector1,
    const std::vector<std::string>& vector2
    ) {
    bool isSame = true;

    if (vector1.size() == vector2.size()) {
        for (int i = 0; i < vector1.size(); ++i) {
            if (vector1[i] != vector2[i]) {
                isSame = false;
            }
        }
    } else {
        isSame = false;
    }

    return isSame;
}

std::vector<std::string> DataQuery::updateHierarchy(
    std::vector<std::string> hierarchyVector,
    const std::string& key,
    int braceIndex
    ) {
    if (!std::ranges::contains(marks, key) && !key.empty()) {
        if (hierarchyVector.size() > braceIndex + 1) {
            hierarchyVector.pop_back();
        }

        if (hierarchyVector.size() > braceIndex) {
            hierarchyVector[braceIndex] = key;
        }

        if (hierarchyVector.size() == braceIndex) {
            hierarchyVector.emplace_back(key);
        }
    }

    return hierarchyVector;
}

std::tuple<
    std::vector<std::pair<std::vector<std::string>, std::string>>,
    std::vector<std::pair<std::vector<std::string>, std::string>>
>
DataQuery::recognizeConditions(
    const std::string& queryConditions, const std::string& queryString
    ) {
    std::vector<std::pair<std::vector<std::string>, std::string>> sameHierarchyConditionVector;
    std::vector<std::pair<std::vector<std::string>, std::string>> conditionVector;

    auto queryVector = queryString
    | std::views::split('.')
    | std::ranges::to<std::vector<std::string>>();

    auto conditions = queryConditions
    | std::views::split(',')
    | std::ranges::to<std::vector<std::string>>();

    for (auto& condition : conditions) {
        auto conditionPairVector = condition
        | std::views::split('=')
        | std::ranges::to<std::vector<std::string>>();

        std::string conditionKeys;
        std::string conditionValue;

        if (conditionPairVector.size() == 2) {
            conditionKeys = removeTwoSideSpace(conditionPairVector[0]);
            conditionValue = removeTwoSideSpace(conditionPairVector[1]);
        }

        if (!conditionKeys.empty() && !conditionValue.empty()) {
            auto conditionKeyVector = conditionKeys
            | std::views::split('.')
            | std::ranges::to<std::vector<std::string>>();

            bool isSameHierarchy = true;

            if (queryVector.size() == conditionKeyVector.size()) {
                for (int k = 0; k < queryVector.size(); ++k) {
                    if (k < queryVector.size() - 1) {
                        if (queryVector[k] != conditionKeyVector[k]) {
                            isSameHierarchy = false;
                        }
                    }
                }
            } else {
                isSameHierarchy = false;
            }

            std::pair<std::string, std::string> conditionStringPair = {
                conditionKeys, conditionValue
            };

            std::pair<std::vector<std::string>, std::string> conditionPair = {
                conditionKeyVector, conditionValue
            };

            if (isSameHierarchy) {
                sameHierarchyConditionVector.emplace_back(conditionPair);
            } else {
                conditionVector.emplace_back(conditionPair);
            }
        }
    }

    return std::make_tuple(sameHierarchyConditionVector, conditionVector);
}

/* The function is designed for generating a flattening dictionary.
 * The structure of an element in the vector dict is like {key, {value's type, value}}.
 * The mark {, }, [ or ] is also stored as a key, while it's value is empty.
 */
std::vector<std::pair<std::string, std::pair<std::string, std::string>>>
DataQuery::generateDict(const std::string& jsonText) {
    std::vector<
        std::pair<
            std::string,
            std::pair<std::string, std::string>
        >
    > dict;

    /* A JSON text may contain the character '\n' that is not escaped, so the JSON text
     * should be combined into one line for making further processing easier.
     * When a JSON text is loaded from a file through loadJSONFile, it has been combined
     * into a line.
     * However, when a JSON text is from any other source, it may contains \n that is not
     * escaped, and should be processed by loadJSONText for combining into one line.
     * For simply processing, no matter which source the JSON text is from, it is always
     * processed using loadJSONText.
     * The characters "\n" which have been escaped are not handled at this step.
     */
    auto jsonTextCombined = loadJSONText(jsonText);

    /* As JSON uses double quotation marks to enclose key names and string values,
     * double quotation marks inside a string value are generally written as \" for escaping.
     * The library splits the JSON text by using " as a separator, so before splitting
     * the escaped double quotation mark \" is replaced with the mark's Unicode encoding \u0022.
     */
    auto escapedJSONText = Escape::escapeDoubleQuotationMarks(jsonTextCombined);

    escapedJSONText = removeTwoSideSpace(escapedJSONText);

    auto splitJSONVector = splitJSON(escapedJSONText);

    int j = 0;
    while (j < splitJSONVector.size()) {
        /* As the common structure of JSON is like "key: value",
         * three variables cleanI, cleanII and cleanIII are used for the analysis of the structure.
         */
        auto cleanI = removeTwoSideSpace(splitJSONVector[j]);

        std::string cleanII;
        if (j + 1 < splitJSONVector.size()) {
            cleanII = removeTwoSideSpace(splitJSONVector[j + 1]);
        }

        std::string cleanIII;
        if (j + 2 < splitJSONVector.size()) {
            cleanIII = removeTwoSideSpace(splitJSONVector[j + 2]);
        }

        /* The first judgement is for recognizing a value which type is a string.
         * In JSON, a key is always enclosed in double quotation marks, while
         * a value which type is a string is also enclosed in double quotation marks.
         * After using the double quotation mark " as the separator to split a JSON text,
         * keys are clearly separated, and values which type is a string are also clearly
         * separated. A colon implies the positions of a key and a value.
         * Therefore, when a value's type is a string, the key, the colon and the value
         * are three separate elements in the vector from split JSON.
         * And here is the only chance to recognize whether a value's type is a string
         * because in the vector dict that is generated later all values are located in
         * the same structure.
         */
        if (cleanII == ":" && !cleanI.empty() && !cleanIII.empty()) {
            std::pair<
                std::string, std::pair<std::string, std::string>
            > pair = {cleanI, {"string", cleanIII}};

            dict.emplace_back(pair);

            j += 3;
        }

        /* The second judgement is for recognizing a value which type is not a string.
         * In JSON, when a value's type is not a string, it is not enclosed in
         * double quotation marks, so, after splitting JSON, the colon and the value
         * are not separated into two elements.
         * And the element may also contain braces or square brackets.
         * The function splitNest is designed for separating marks and any possible value.
         * As a layout may add spaces, the spaces between marks should be removed.
         * An empty object {} or an empty array [] is kept in an element.
         * Otherwise, {, }, [ and ] are always separated into different elements, and
         * they are regarded as the key of a pair.
         */
        else if (!cleanI.empty() && cleanII[0] == ':') {
            std::string s = cleanII.erase(0, 1);

            if (s[s.size() - 1] == ',') {
                s = s.erase(s.size() - 1, 1);
            }

            s = removeTwoSideSpace(s);

            if (!s.empty()) {
                std::string lastChar;
                lastChar += s[s.size() - 1];

                /* The characters between a colon and a comma may be an object, an array
                 * or a simple value. A colon separates a key and a value, while a comma
                 * separates different pairs.
                 * After removing the colon and the comma, the last character of the
                 * remaining string can be the part of a simple value, the right brace },
                 * or the right square bracket ].
                 * And, it can also be the left brace { or the left square bracket [
                 * if the end of the string looks like "},{" or "],[".
                 * The following process distinguishes whether the last character is
                 * a mark or not.
                 */
                if (std::ranges::contains(marks, lastChar)) {
                    auto nest = splitNest(s);

                    if (!nest.empty()) {
                        std::pair<std::string, std::pair<std::string, std::string>> pairKey;

                        /* The function splitNest splits the characters which are
                         * on the right side of a colon into several elements.
                         * If the first element is not a mark, it is a simple value.
                         * It may be a numerical value, or a boolean value or null.
                         * At this time it is not necessary to recognize what its type is,
                         * as the purpose of the library is for making a query
                         * but not for fully parsing.
                         * So here the type is just left empty.
                         */
                        if (!std::ranges::contains(marks, nest[0])) {
                            pairKey = {cleanI, {"", nest[0]}};
                            nest.erase(nest.begin());
                        }

                        /* For an empty array or an empty object, [] or {} is directly regarded
                         * as a value.
                         */
                        else if (
                            (nest[0] == "[" && nest[1] == "]") || (nest[0] == "{" && nest[1] == "}")
                        ) {
                            pairKey = {cleanI, {"", nest[0]}};
                            pairKey.second.second += nest[1];
                            nest.erase(nest.begin(), nest.begin() + 2);
                        }

                        /* If there is not a simple value, and there is not an empty array or
                         * an empty object, it should be the beginning of an array or an object,
                         * or the end of an array or an object, or the end of one and the beginning
                         * of the next one.
                         * In this case, cleanI is added to the vector dict as a key at the first.
                         */
                        else {
                            pairKey = {cleanI, {"", ""}};
                        }

                        dict.emplace_back(pairKey);

                        /* The remaining elements in the vector generated by the function splitNest
                         * are marks, and they are separately added to the vector dict as keys.
                         * As in JSON keys are enclosed in double quotation marks, after splitting
                         * JSON using " as a separator, keys are clearly separated, so for a
                         * string that mixes a value and marks, after recognizing a simple value,
                         * the remaining characters should be marks.
                         */
                        int k = 0;
                        while (k < nest.size()) {
                            if (nest[k] != ",") {
                                std::pair<
                                    std::string, std::pair<std::string, std::string>
                                > pair = {nest[k], {"", ""}};

                                dict.emplace_back(pair);
                            }

                            ++k;
                        }
                    }
                } else {
                    /* When the last character of a string between a colon and a comma
                     * is not a mark, it is a simple value.
                     */
                    std::pair<
                        std::string, std::pair<std::string, std::string>
                    > pair = {cleanI, {"", s}};

                    dict.emplace_back(pair);
                }
            }

            j += 2;
        }

        /* In this case, the remaining elements of the vector from split JSON
         * should be the marks which are located behind a string-type value in JSON.
         * For example, it may be a comma, the right brace, the right square bracket
         * or mixed marks like "},{" or "]},{".
         * Moreover, if an array in JSON contains multiple string-type elements
         * like ["a", "b", "c"], it is recognized here because the elements
         * inside the array do not follow JSON's common structure "key: value".
         * If an array in JSON contains multiple non-string elements like [1, 2, 3],
         * it is recognized at the above step because it is generally in a string
         * like ":[1, 2, 3]" after splitting JSON. Any string that starts with a colon
         * does not appear here, but has been processed in the above step.
         */
        else {
            if (!cleanI.empty()) {
                if (cleanI[cleanI.size() - 1] == ',') {
                    cleanI.erase(cleanI.size() - 1, 1);
                }

                cleanI = removeTwoSideSpace(cleanI);

                if (!cleanI.empty()) {
                    std::string lastChar;
                    lastChar += cleanI[cleanI.size() - 1];

                    /* If a string is like "true]", its type is not a string in JSON.
                     * So, it is clear that if the end of a string is not a mark,
                     * it's type should be a string in JSON.
                     * Because in an array like ["a", "b", "c"] there is no key,
                     * the elements in the array are regarded as values in the vector dict,
                     * and in this case the key of the pair is left empty.
                     * It can be clearly differentiated from the case that the value
                     * of a key is empty.
                     */
                    if (!std::ranges::contains(marks, lastChar)) {
                        std::pair<
                            std::string, std::pair<std::string, std::string>
                        > pair = {"", {"string", cleanI}};

                        dict.emplace_back(pair);
                    } else {
                        auto nest = splitNest(cleanI);

                        /* Here the remaining characters are only marks. */
                        for (auto& part : nest) {
                            if (part != ",") {
                                std::pair<
                                    std::string, std::pair<std::string, std::string>
                                > pair = {part, {"", ""}};

                                dict.emplace_back(pair);
                            }
                        }
                    }
                }
            }

            ++j;
        }
    }

    return dict;
}

/* The function is designed for making a query through a one-time visit to
 * all elements of the vector dict.
 * Currently, it is able to process a query to a simple JSON text that complies
 * with JSON standards.
 * For multi-layer complicated JSON, there is a lot of work to do.
 */
std::pair<std::string, std::string> DataQuery::dataQuery(
    const std::string& jsonData, const std::string& queryString, const std::string& queryConditions
    ) {
    std::pair<std::string, std::string> queryResult;

    auto dict = generateDict(jsonData);

    auto queryVector = queryString
    | std::views::split('.')
    | std::ranges::to<std::vector<std::string>>();

    const auto [sameHierarchyConditionVector, conditionVector] = recognizeConditions(queryConditions, queryString);

    int squareBracketIndex = -1;
    int braceIndex = -1;
    int pairIndexInArray = -1;

    std::vector<std::string> emptyVector;

    int sameHierarchyConditionCount = static_cast<int>(sameHierarchyConditionVector.size());
    std::vector<int> sameHierarchyConditionIndexVector(sameHierarchyConditionCount, 0);
    std::vector<bool> sameHierarchyConditionMatchVector(sameHierarchyConditionCount, false);
    std::vector<std::vector<std::string>> sameHierarchyConditionHierarchy(sameHierarchyConditionCount, emptyVector);

    int conditionCount = static_cast<int>(conditionVector.size());
    std::vector<int> conditionIndexVector(conditionCount, 0);
    std::vector<bool> conditionMatchVector(conditionCount, false);
    std::vector<std::vector<std::string>> conditionHierarchy(conditionCount, emptyVector);

    std::vector<std::string> queryHierarchy;

    std::vector<std::pair<int, std::pair<std::string, std::string>>> possibleQueryResult;

    int foundPairIndexInArray = -1;

    int i = 0;
    int queryVectorIndex = 0;

    bool isInArray = false;

    while (i < dict.size()) {
        if (dict[i].first == "{") {
            ++braceIndex;
            if (isInArray) {
                ++pairIndexInArray;
            }
        }

        else if (dict[i].first == "}") {
            --braceIndex;
        }

        else if (dict[i].first == "[") {
            ++squareBracketIndex;
            pairIndexInArray = -1;
            isInArray = true;
        }

        else if (dict[i].first == "]") {
            --squareBracketIndex;
            pairIndexInArray = -1;
            isInArray = false;
        }

        for (int j = 0; j < conditionVector.size(); ++j) {
            if (conditionMatchVector[j]) {
                continue;
            }

            auto [conditionKeys, conditionValue] = conditionVector[j];

            conditionHierarchy[j] = updateHierarchy(conditionHierarchy[j], dict[i].first, braceIndex);

            if (braceIndex == conditionIndexVector[j]
                && dict[i].first == conditionKeys[conditionIndexVector[j]]
                ) {
                if (conditionIndexVector[j] + 1 < conditionKeys.size()) {
                    ++conditionIndexVector[j];
                }

                if (compareVectors(conditionHierarchy[j], conditionKeys)
                    && dict[i].second.second == conditionValue
                ) {
                    conditionMatchVector[j] = true;
                }
            }
        }

        if (!sameHierarchyConditionVector.empty()) {
            for (int k = 0; k < sameHierarchyConditionVector.size(); ++k) {
                if (sameHierarchyConditionMatchVector[k]) {
                    continue;
                }

                auto [conditionKeys, conditionValue] = sameHierarchyConditionVector[k];

                sameHierarchyConditionHierarchy[k] = updateHierarchy(
                    sameHierarchyConditionHierarchy[k], dict[i].first, braceIndex);

                if (braceIndex == sameHierarchyConditionIndexVector[k]
                    && dict[i].first == conditionKeys[sameHierarchyConditionIndexVector[k]]
                    ) {
                    if (sameHierarchyConditionIndexVector[k] + 1 < conditionKeys.size()) {
                        ++sameHierarchyConditionIndexVector[k];
                    }

                    if (compareVectors(sameHierarchyConditionHierarchy[k], conditionKeys)
                        && dict[i].second.second == conditionValue
                    ) {
                        sameHierarchyConditionMatchVector[k] = true;
                        foundPairIndexInArray = pairIndexInArray;
                    }
                }
            }
        }

        queryHierarchy = updateHierarchy(queryHierarchy, dict[i].first, braceIndex);

        if (braceIndex == queryVectorIndex && dict[i].first == queryVector[queryVectorIndex]) {
            if (queryVectorIndex + 1 < queryVector.size()) {
                ++queryVectorIndex;
            }

            if (compareVectors(queryHierarchy, queryVector)) {
                if (!dict[i].second.second.empty()) {
                    std::pair<int, std::pair<std::string, std::string>> possiblePair = {
                        pairIndexInArray, dict[i].second
                    };
                    possibleQueryResult.emplace_back(possiblePair);
                } else {
                    if (dict[i + 1].first == "[") {
                        int squareBracketIndexInArray = squareBracketIndex;
                        int vectorIndexInArray = i + 2;
                        while (squareBracketIndexInArray >= squareBracketIndex) {
                            if (!std::ranges::contains(marks, dict[vectorIndexInArray].first)) {
                                if (dict[vectorIndexInArray].first.empty()) {
                                    if (possibleQueryResult.empty()) {
                                        std::pair<
                                            int, std::pair<std::string, std::string>
                                        > possiblePair = {
                                            pairIndexInArray, dict[vectorIndexInArray].second
                                        };

                                        possibleQueryResult.emplace_back(possiblePair);
                                    } else {
                                        for (auto& [key, value] : possibleQueryResult) {
                                            if (key == pairIndexInArray) {
                                                value.second += ", ";
                                                value.second += dict[vectorIndexInArray].second.second;
                                            }
                                        }
                                    }
                                } else {
                                    if (possibleQueryResult.empty()) {
                                        std::pair<
                                            int, std::pair<std::string, std::string>
                                        > possiblePair = {
                                            pairIndexInArray,
                                            {"", dict[vectorIndexInArray].first}
                                        };

                                        possiblePair.second.second += " : ";
                                        possiblePair.second.second += dict[vectorIndexInArray].second.second;

                                        possibleQueryResult.emplace_back(possiblePair);
                                    } else {
                                        for (auto& [key, value] : possibleQueryResult) {
                                            if (key == pairIndexInArray) {
                                                value.second += ", ";
                                                value.second += dict[vectorIndexInArray].first;
                                                value.second += " : ";
                                                value.second += dict[vectorIndexInArray].second.second;
                                            }
                                        }
                                    }
                                }
                            }

                            ++vectorIndexInArray;

                            if (dict[vectorIndexInArray].first == "[") {
                                squareBracketIndexInArray += 1;
                            } else if (dict[vectorIndexInArray].first == "]") {
                                squareBracketIndexInArray -= 1;
                            }
                        }
                    }
                }
            }
        }

        ++i;
    }

    bool isMatching = true;

    for (const auto& conditionMatch : conditionMatchVector) {
        if (!conditionMatch) {
            isMatching = false;
        }
    }

    for (const auto& conditionMatch : sameHierarchyConditionMatchVector) {
        if (!conditionMatch) {
            isMatching = false;
        }
    }

    if (isMatching) {
        if (foundPairIndexInArray != -1) {
            for (const auto& [key, value] : possibleQueryResult) {
                if (key == foundPairIndexInArray) {
                    queryResult = value;
                }
            }
        } else if (!possibleQueryResult.empty()) {
            queryResult = possibleQueryResult[0].second;
        }
    }

    return queryResult;
}