import * as pdfjsLib from 'pdfjs-dist';
import {calculateTokens} from "@/utils/tokens";
pdfjsLib.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjsLib.version}/pdf.worker.mjs`;

/*export const readFileWithOCR = async (file: File): Promise<string[]> => {
  alert('OCR Working');
  const objectURL = URL.createObjectURL(file);
  const texts = await pdfOCR(objectURL, { lang: "spa+eng" });
  URL.revokeObjectURL(objectURL);
  return texts.map((text: any) => text.trim());
}*/

export const readFile = async (file: File): Promise<string[]> => {
  const objectURL = URL.createObjectURL(file);
  const pdf = await pdfjsLib.getDocument(objectURL).promise;
  const texts = await Promise.all(
    Array.from({ length: pdf.numPages }, (_, i) =>
      pdf.getPage(i + 1).then(async (page) => {
          const content = await page.getTextContent();
          return content.items.map((item: any) => item.str || '').join(' ');
      })
    )
  );
  URL.revokeObjectURL(objectURL);
  return texts.map((text) => text.trim());
};

export const readPages = async (file: File, pages: Set<number>): Promise<string[]> => {
  const fileUrl = URL.createObjectURL(file);
  const pdf = await pdfjsLib.getDocument(fileUrl).promise;
  const texts = await Promise.all(
    Array.from(pages).map((pageNumber) =>
      // Los numeros de pagninas en pdflib son 1-based, y nosotros trabajamos con 0-based
      pdf.getPage(pageNumber + 1).then(async (page) => {
        const content = await page.getTextContent();
        return content.items.map((item: any) => item.str || '').join(' ');
      })
    )
  );
  URL.revokeObjectURL(fileUrl);
  return texts.map((text) => text.trim());
};

export function getUniqueFileIdentifier(file: File) {
    return `${file.name}-${file.size}-${file.lastModified}`;
}

export async function getUniqueFileContentString(files: File[]): Promise<string> {
    let concatenatedContent = '';
    const seenFiles = new Set<string>();  // To track files based on name and size

    for (const file of files) {
        const uniqueIdentifier = `${file.name}-${file.size}`;
        if (!seenFiles.has(uniqueIdentifier)) {
            seenFiles.add(uniqueIdentifier);
            const fileContentArray = await readFile(file);
            const fileContent = fileContentArray.join(' ');  // Combine text from all pages
            concatenatedContent += fileContent;
        }
    }

    return concatenatedContent;
}

export const getFileTokens = async (file : File) => {
    const text = await readFile(file);
    const tokenCount = await calculateTokens(text.join(' '));
    return tokenCount;
};
