// SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
// SPDX-FileCopyrightText: 2024 Universität Göttingen
//
// SPDX-License-Identifier: CC0-1.0
// SPDX-License-Identifier: EUPL-1.2

import React, { useEffect, useState } from "react";
import cheerio from "cheerio";
import ContentTable from "../components/TableOfContent";

interface Section {
  id: string;
  xpath: string;
  title: string;
  heads: Head[];
  paragraphs: Paragraph[];
  quotes: Quote[];
  subsections: Subsection[];
}

interface Head {
  id: string;
  content: string;
  xpath: string;
}

interface Paragraph {
  id: string;
  content: string;
  xpath: string;
}

interface Quote {
  id: string;
  content: string;
  xpath: string;
}

interface Subsection {
  id: string;
  xpath: string;
  title: string;
  heads: Head[];
  paragraphs: Paragraph[];
  quotes: Quote[];
  subsections: Subsection[];
}

interface parsedData {
  sections: Section[];
}

interface ParsedXMLProps {
  xml: string;
}

const ParsedXML: React.FC<ParsedXMLProps> = ({ xml }) => {
  const [parsedData, setParsedData] = useState<parsedData>({ sections: [] });
  const [parsedSections, setparsedSections] = useState<Section[]>([]);

  useEffect(() => {
    const $ = cheerio.load(xml, {
      xmlMode: true,
    });

    function processSubsections(section: HTMLDivElement): Subsection[] {
      const subsections: Subsection[] = [];

      $("tei-div[type='subsection']", section).each(
        (sIndex: string, subsection: Subsection) => {
          const subsectionData: Subsection = {
            id: $(subsection).attr("id"),
            xpath: $(subsection).attr("sameas"),
            title: $(subsection)
              .find("tei-seg")
              .first()
              .text()
              .replace(/\s+/g, " ")
              .replace(/(\r\n|\n|\r)/g, "")
              .trim(),
            heads: [],
            paragraphs: [],
            quotes: [],
            subsections: [],
          };

          $("tei-head", subsection).each((hIndex: string, head: Head) => {
            const headInfo: Head = {
              id: $(head).attr("id"),
              content: $(head)
                .find("tei-seg")
                .text()
                .replace(/\s+/g, " ")
                .replace(/(\r\n|\n|\r)/g, "")
                .trim(),
              xpath: $(head).attr("sameas"),
            };
            subsectionData.heads.push(headInfo);
            //console.log(headInfo);
          });

          $("TEI-p", subsection).each(
            (pIndex: string, paragraph: Paragraph) => {
              const paragraphInfo = {
                id: $(paragraph).attr("xml:id"),
                content: $(paragraph).text().replace(/\s+/g, " ").trim(),
                xpath: $(paragraph).attr("sameas"),
              };

              subsectionData.paragraphs.push(paragraphInfo);
            }
          );

          $("TEI-q", subsection).each((qIndex: string, quote: Quote) => {
            const quoteInfo = {
              id: $(quote).attr("xml:id"),
              content: $(quote).text().replace(/\s+/g, " ").trim(),
              xpath: $(quote).attr("sameas"),
            };

            subsectionData.quotes.push(quoteInfo);
          });

          subsectionData.subsections = processSubsections(subsection);

          subsections.push(subsectionData);
        }
      );

      return subsections;
    }

    const parsedData: parsedData = {
      sections: [],
    };

    $("tei-div[type='section']").each(
      (index: string, section: HTMLDivElement) => {
        const sectionData: Section = {
          id: $(section).attr("id"),
          xpath: $(section).attr("sameas"),
          title: "",
          heads: [],
          paragraphs: [],
          quotes: [],
          subsections: [],
        };

        const sectionTitle: string = $(section)
          .find("tei-head")
          .first()
          .text()
          .replace(/\s+/g, " ")
          .trim();
        if (sectionTitle) {
          sectionData.title = sectionTitle;
        }

        $("TEI-head", section).each((hIndex: string, head: Head) => {
          const headInfo: Head = {
            id: $(head).attr("xml:id"),
            content: $(head).text().replace(/\s+/g, " ").trim(),
            xpath: $(head).attr("data-info"),
          };
          sectionData.heads.push(headInfo);
        });

        $("TEI-p", section).each((pIndex: string, paragraph: Paragraph) => {
          const paragraphInfo: Paragraph = {
            id: $(paragraph).attr("xml:id"),
            content: $(paragraph).text().replace(/\s+/g, " ").trim(),
            xpath: $(paragraph).attr("data-info"),
          };

          sectionData.paragraphs.push(paragraphInfo);
        });

        $("TEI-q", section).each((qIndex: string, quote: Quote) => {
          const quoteInfo: Quote = {
            id: $(quote).attr("xml:id"),
            content: $(quote).text().replace(/\s+/g, " ").trim(),
            xpath: $(quote).attr("data-info"),
          };

          sectionData.quotes.push(quoteInfo);
        });

        sectionData.subsections = processSubsections(section);

        parsedData.sections.push(sectionData);
      }
    );

    parsedData.sections.forEach((section) => {
      section.subsections.forEach((subsection) => {
        subsection.paragraphs.forEach((subsectionParagraph) => {
          parsedData.sections.forEach((s) => {
            s.paragraphs = s.paragraphs.filter(
              (sectionParagraph) =>
                sectionParagraph.id !== subsectionParagraph.id
            );
          });
        });
      });
    });

    setParsedData(parsedData);
    setparsedSections(parsedData.sections);
  }, []);

  //console.log(parsedData);
  return (
    <>
      <ContentTable parsedSections={parsedData.sections} />
    </>
  );
};

export default ParsedXML;
