import { encode } from "gpt-3-encoder";
import type { Paragraph, Tag, Chunk, Mode, StringMap } from "../types";
import { v4 as uuidv4 } from "uuid";
import { log } from "console";

export const tagsToText = (tags: Tag[]) => tags.map(tag => tag.name).join(', ');

export const restorePropertyFrom = (fromParagraphs, intoParagraphs, property) => {
  const mapOfElements = fromParagraphs.reduce( (map, paragraph) => {
    map[paragraph.block_id] = paragraph;
    return map;
  }, {});

  return intoParagraphs.map(paragraph => {
    const originalElement = mapOfElements[paragraph.block_id];
    return {...paragraph, [property]: originalElement[property]};
  });
};

/**
 * Produces a single array of paragraphs inside the content
 */
export const contentToParagraphs = (contentSet, extraFields = {}, includeOnlySectionTypes: string[] = []) => {
  const resultingParagraphs = contentSet.reduce((paragraphs, section) => {
    for (const paragraph of section.data) {
      const compactParagraph: Paragraph = {
        ...extraFields,
        id: paragraph.id,
        block_id: paragraph.block_id,
        text: paragraph.text,
      };

      // If the type of this section is not "prose", then ignore its paragraphs
      if( includeOnlySectionTypes.length && includeOnlySectionTypes.indexOf(section.type) == -1 ) {
        return paragraphs;
      }
      paragraphs.push(compactParagraph);
    }
    return paragraphs;
  }, []);
  return resultingParagraphs; 
};

/**
 * Some earlier processes produce collections of paragraphs.
 * After processed, those are flatted into a single array of processed paragraphs.
 * This is the input for @param paragraphs
 * Now @param episodeContent contains the original non altered structured content.
 * Which is then replaced by the processed version of each paragraph
 */
export const paragraphsToContent = (paragraphs, episodeContent, immersiveTags?: StringMap) => {
  const paragraphsMap = paragraphs.reduce((map, paragraph) => {
    map[paragraph.block_id] = {
      id: paragraph.id,
      block_id: paragraph.block_id,
      text: paragraph.text,
      meta: paragraph.meta,
      tags: paragraph.tags,
    };
    return map;
  }, {});

  const newContent = episodeContent.map((contentEntry) => {
    const newData = contentEntry.data.map((dataEntry) => {
      const output = paragraphsMap[dataEntry.block_id];
      
      /**
       * It is Very important to note that we are defaulting
       * to original entries if we don't get any from the output.
       * This method (paragraphsToContent) is only used in immersive
       * sound and proofreading. The latter is the reason for this.
       * we will be sending "ignored sections" for "chat" type,
       * this code block below guarantees that we preserve those ignored 
       * sections in the final output
       */      
      if (!output) {
        console.log("Block not found (could be ignored on purpose, like chat blocks)", dataEntry.block_id, dataEntry.id);
        return {...dataEntry};
      }
      
      /**
       * TEMP move this to a proper place
       */
      if(immersiveTags && output.tags) {
        const tagsArray = output.tags.split(',').map(tag => tag.trim());
        const beforeSoundEntry = tagsArray.reduce((soundsObject,tag) => {
          const fileName = immersiveTags[tag];
          if (fileName) {
            soundsObject[fileName] = {
              category: 'effects',
              volume:  0.9
            };
          }
          return soundsObject;
        }, {});
        return { ...dataEntry, text: output.text, sound: {before: beforeSoundEntry} };
      }

      return { ...dataEntry, text: output.text };
    });
    const newEntry = { ...contentEntry, data: newData };
    return newEntry;
  });

  return newContent;
};

export const countTokens = (input: string) => input ? encode(input).length : 0;

const createUUIDPlaceholder = () => `[ph_${uuidv4()}]`;
const createPOVPlaceholder  = (character) => `[POV: ${character} ${uuidv4()}]`;

/**
 * Receives an array of content objects or strings
 * and then returns an array of chunks that fit into
 * the specified limit.
 */
export const divideInChunks = (contentArray: (Paragraph[]) | string[], maxTokens: number) => {
  const inputBucket = [...contentArray].reverse() as Paragraph[] | string[];

  const chunks: any[] = [];

  /**
   * We will spend the amount of tokens we have on remainingTokens to form
   * the biggest chunk possible and restart the process until our bucket is
   * empty
   */
  while (inputBucket.length) {
    let remainingTokens = maxTokens;

    const chunk: any[] = [];

    while (remainingTokens >= 0 && inputBucket.length) {
      const peekIndex = inputBucket.length - 1;
      const peeked = typeof inputBucket[peekIndex] !== 'string' ? JSON.stringify(inputBucket[peekIndex]) : inputBucket[peekIndex] as string;
      if (peeked.startsWith('[POV:') && chunk.length) {
        break;
      }

      const pTokens = countTokens(peeked);
      if (pTokens < remainingTokens) {
        const paragraph = inputBucket.pop();
        if (typeof paragraph === 'string' ? /[a-zA-Z]/.test(paragraph) : /[a-zA-Z]/.test(paragraph.text)) {
          chunk.push(paragraph);
        }
        remainingTokens -= pTokens;
      } else {
        // we don't have more tokens before we could make a chunk
        // this means there is a paragraph so massive it can't be sent
        // to GPT as a single request, and who are we to say how to divide it.
        if (!chunk.length) {
          throw new Error("paragraph-exceeds-maxtokens");
        }
        break;
      }

    }

    chunks.push(chunk);
  } // end while

  return chunks;
};

export const contentToTextOnlyProcessableChunksWithPlaceholders = (
  originalEpisodeContent: any[],
  prompt: string,
  maxTokens: number,
  padding: number,
  mode?: Mode, // Not used yet
): [Chunk[], any, any, any] => {

  // --- part 1: make a text only array and a map to store what we shouldn't touch ---

  /**
   * A map of SECTIONS indexed by placeholders.
   * We will create those placeholders and put these sections inside.
   */
  const skippedSectionsByPlaceholder = {};

  /**
   * For some reason... GPT kills our placeholders only when:
   *  * We run the process for the entire book
   *  * There is a group of chat bubbles at the BEGINNING of a chapter
   * So our workaround is to save the bubbles ourselves and add them back (since it's
   * the start of the chapter its easy to know where to put them back).
   */
  const skippedSectionsAtStart = [];

  /**
   * The idea is to store all the metadata of a certain paragraph and
   * later on restore it all and remove the placeholder.
   */
  const metadataForNextParagraphByPlaceholder = {};

  /**
   * This is a peculiar array, an array made only of text from paragraphs
   * Some of those entries will be the afore mentioned placeholders.
   * When we have a response back from the API, we will re-map these paragraphs
   * to content and whenever we find a placeholder, we will insert the section that 
   * we had saved in skippedSectionsByPlaceholder.
   */
  const textOnlyParagraphs: string[] = [];

  for (let i = 0; i < originalEpisodeContent.length; i++) {
    const currentSection = originalEpisodeContent[i];

    if (currentSection.type !== 'prose') {
      const ph = createUUIDPlaceholder();

      if (i === 0) {
        skippedSectionsAtStart.push(currentSection);
      } else {
        skippedSectionsByPlaceholder[ph] = currentSection;
        if (mode !== 'tag-matching') {
          textOnlyParagraphs.push(ph);
        }
      }

    } else {
      const paragraphs = currentSection.data;

      /**
       * NOTE, POV IS NOT PRESERVED HERE.
       * TODO: preserve POV vía helper_prompt HELPER PROMPT ?
       */
      const textOnlyCurrentParagraphs = paragraphs.reduce((newParagraphs,paragraph) => {
        if (paragraph?.pov) {
          const pov_ph = createPOVPlaceholder(paragraph.pov);
          const {text, ...metadata} = paragraph;
          metadataForNextParagraphByPlaceholder[pov_ph] = metadata;

          if (mode !== 'tag-matching') {
            newParagraphs.push(pov_ph);
          }
        }
        newParagraphs.push(paragraph.text);
        return newParagraphs;
      }, []);

      textOnlyParagraphs.push(...textOnlyCurrentParagraphs);
    }
  }


  // --- part 2: Make our chunks. ---

  // Take prompt and padding into account
  const finalMaxTokens = maxTokens - countTokens(prompt) - padding;
  if (finalMaxTokens <= 0) {
    throw new Error('paragraph-exceeds-maxtokens');
  }

  const chunks = divideInChunks(textOnlyParagraphs, finalMaxTokens);
  return [chunks, skippedSectionsByPlaceholder, metadataForNextParagraphByPlaceholder, skippedSectionsAtStart];
}

/**
 * Converts an entire book/branch to a single string.
 */
export const contentToText = (originalEpisodeContent: any[], paragraphGlue = '\n\n'): string => {

  const sectionMapper = (currentSection) => {
    const paragraphs = currentSection.data;
    const paragraphSummary = paragraphs.map(paragraph => paragraph.text).join( paragraphGlue);
    return paragraphSummary;
  };

  const textBasedSections = originalEpisodeContent.map(sectionMapper);
  return textBasedSections.join(paragraphGlue);
};

/**
 * 
 * From a full episode content structure, a minimal array like this:
 * {text: 'paragraph text', block_id: 'abcde123'}
 * Is created So that we save as many tokens as we can, then this array is 
 * divided into "Chunks" each chunk should fit in a single request to OpenAI
 * 
 * TODO: get the type for originalEpisodeContent from galatea commons.
 */
export const contentToMinimalProcessableChunksWithIDs = (
  originalEpisodeContent: any[],
  prompt: string,
  maxTokens: number,
  symmetricTokenCount: boolean,
  padding: number,
  includeOnlySectionTypes: string[] = []
): Chunk[] => {
  const tokensMinusPrompt = maxTokens - countTokens(prompt);
  const dividedBy = symmetricTokenCount ? 2 : 1;
  const finalMaxTokens = Math.floor(tokensMinusPrompt / dividedBy - padding);

  if (finalMaxTokens <= 0) {
    throw new Error(
      "paragraph-exceeds-maxtokens"
    );
  }

  // Step 1: convert the content into a simple array of paragraphs.
  const originalParagraphs = contentToParagraphs(originalEpisodeContent, {}, includeOnlySectionTypes);

  // Step 2: divide those paragraphs in chunks
  const chunks = divideInChunks(originalParagraphs, finalMaxTokens);
  return chunks;
};


/**
 * Produces a GStudio-friendly structure out of
 * an array of strings
 * 
 * Content-structure:
 * [
 *    { // <-- this is called a section
 *      "id": 1,
 *      "data": [
 *        { <-- this is called a paragraph
 *          "id": 1,
 *          "text": "Surprisingly, we lost sound.",
 *          "block_id": "7ba45d39-1585-46a5-9587-53d8980947c6", // HOW THE HELL?
 *
 *          "first": false, // not used for now
 *          "sound": { // obviously not used, but included
 *            "before": {}
 *          },
 *          "vibrate": {}, // not used at all.
 *          "separator": false // not used either
 *        },
 *      ],
 *      "type": "prose",
 *      "title": ""
 *    }
 * ]
 * 
 * @param stringChunks is an array of strings, each string is a chunk of paragraphs
 * clumped together with newlines.
 * 
 * @param skippedByPlaceholder contains a map of SECTIONS indexed by placeholders
 * that come as single strings in "chunks"
 * 
 * @returns content array
 */
export const contentFromStringChunks = (stringChunks: string[], skippedByPlaceholder, metadataCollectionForNextParagraph, skippedButAtStart) => {
    
  const inputBucket = [...stringChunks].reverse();
  const newSections: any[][] = [];
  let currentSectionParagraphs: any[] = []; // bucket

  let metadataForNextParagraph: any = null;

  if (skippedButAtStart?.length) {
    skippedButAtStart.forEach(skippedAtStart =>
      newSections.push({
        ...skippedAtStart,
        id: newSections.length,
      })
    );
  }

  // pop em until empty
  while (inputBucket.length) {
    const processedText = inputBucket.pop();
    const protoParagraphs = processedText?.split("\n\n");

    protoParagraphs?.forEach(text => {
        
      // Skipped section found
      if(skippedByPlaceholder[text]) {

        // cut the current section
        if(currentSectionParagraphs.length){
          newSections.push({
            // @ts-ignore
            id: newSections.length, // should it be +1?
            data: currentSectionParagraphs,
            type: "prose",
            title: "", // No title, this is a brand new paragraph, want titles? that'd be tricky.
          });
        }

        //reset the bucket for when the non prose text passes
        currentSectionParagraphs = [];

        const nonStandardTextSection = skippedByPlaceholder[text];

        newSections.push({
          ...nonStandardTextSection,
          id: newSections.length
        });
        return;
      }

      if(metadataCollectionForNextParagraph[text]) {
        metadataForNextParagraph = metadataCollectionForNextParagraph[text];
        // ignore the placeholder.
        return;
      }

      /**
       * If this variable exists the code block above executed in the
       * previous loop iteration meaning this paragraph should have its metadata restored
       * including and most importantly: POV
       */
      if(metadataForNextParagraph) {
        currentSectionParagraphs.push({
          ...metadataForNextParagraph,
          id: currentSectionParagraphs.length, // should it be +1?
          text,
        });
        metadataForNextParagraph = null;
        return;
      }

      // Regular text, create new paragraph
      currentSectionParagraphs.push({
        id: currentSectionParagraphs.length, // should it be +1?
        text,
        block_id: uuidv4(),
        // All this below is hardcoded
        separator: false,
        first: false,
        sound: {
          before: {},
        },
        vibrate: {},
      });

    }); // end .foreach
  } // end while

  if (currentSectionParagraphs.length) {
    newSections.push({
      // @ts-ignore
      id: newSections.length, // should it be +1?
      data: currentSectionParagraphs,
      type: "prose",
      title: "", // No title, this is a brand new paragraph, want titles? that'd be tricky.
    });
  }

  // This is our content.
  return newSections;
};
