import { decode } from "html-entities"
import { Section } from "../../api"
import { languages } from "../../constants"
import { sentry } from "../../utils"

export interface TranscriptDetails {
    code: string
    url: string
    name: string
    isAutogenerated: boolean
}

const languageCodes = Object.keys(languages)

interface Chapter {
    start: number
    end: number
    title: string
}

export interface Transcript {
    start: number
    end: number
    text: string
    duration: number
}

const fetchTranscript = async (html: string, language: string): Promise<Transcript[] | null> => {
    try {
        const url = getTranscriptUrl(html, language)
        if (!url) return null

        const transcript = await getTranscript(url)
        return transcript
    } catch (e) {
        const err = e as Error
        sentry.captureException(err)
    }
    return null
}

const getTranscriptUrl = (html: string, language: string) => {
    const availableTranscripts = listAvailableTranscripts(html)
    if (!availableTranscripts) return null

    if (language === "auto") {
        const autoGeneratedTranscript = availableTranscripts.find(
            (transcript) => transcript.isAutogenerated
        )

        if (autoGeneratedTranscript) {
            const transcript = availableTranscripts.find(
                (transcript) =>
                    transcript.code === autoGeneratedTranscript.code && !transcript.isAutogenerated
            )
            if (transcript) return transcript.url
            return autoGeneratedTranscript.url
        }
    }

    const transcriptDetails = getTranscriptByLanguageCode(availableTranscripts, language)
    if (!transcriptDetails) return null

    return transcriptDetails.url
}

const formatTranscript = (transcript: Transcript[]) => {
    const formattedGroups: string[] = []

    for (let i = 0; i < transcript.length; i += 3) {
        const group = transcript.slice(i, i + 3)
        if (group.length === 0) continue

        const groupTimestamp = group[0].start
        const combinedText = group.map((snippet) => snippet.text).join(" ")
        formattedGroups.push(`[${groupTimestamp}] ${combinedText}`)
    }

    return formattedGroups
}

const listAvailableTranscripts = (html: string) => {
    try {
        return getAvailableTranscripts(html)
    } catch (e) {
        const err = e as Error
        sentry.captureException(err)
    }
    return null
}

const fetchSections = (html: string, language: string, transcript: Transcript[]) => {
    const initData = getInitData(html)

    const availableTranscripts = getAvailableTranscripts(html)
    if (!availableTranscripts) return null

    const transcriptDetails = getTranscriptByLanguageCode(availableTranscripts, language)
    if (!transcriptDetails) return null

    const { chapters } = getChaptersFromYouTube(initData)
    if (!chapters) return null

    const sections: Section[] = createSections(transcript, chapters)

    return sections
}

const getChaptersFromYouTube = (initData: any) => {
    function parseTime(timeStr: string) {
        const timeParts = timeStr.split(":").reverse()
        let totalSeconds = 0
        const multipliers = [1, 60, 3600, 86400] // seconds, minutes, hours, days

        for (let i = 0; i < timeParts.length; i++) {
            totalSeconds += parseInt(timeParts[i]) * multipliers[i]
        }

        return totalSeconds
    }
    let contentsPath = null

    const engagementPanels = initData.engagementPanels || []

    for (const engagementPanel of engagementPanels) {
        const chapters =
            engagementPanel.engagementPanelSectionListRenderer?.content?.macroMarkersListRenderer
                ?.contents

        if (chapters) contentsPath = chapters
    }

    if (!contentsPath) return { areAutoGenerated: false, chapters: null }

    const areAutoGenerated = "macroMarkersInfoItemRenderer" in contentsPath[0]
    const chaptersData = areAutoGenerated ? contentsPath.slice(1) : contentsPath

    const rawChapters = chaptersData.map(({ macroMarkersListItemRenderer }: any) => ({
        title: macroMarkersListItemRenderer.title.simpleText,
        time: parseTime(macroMarkersListItemRenderer.timeDescription.simpleText),
        thumbnails: macroMarkersListItemRenderer.thumbnail.thumbnails,
    }))

    if (rawChapters.length > 0) {
        if (rawChapters[0].time !== 0) {
            return { areAutoGenerated, chapters: null }
        }
    }

    const chapters: Chapter[] = rawChapters.map((item: any, index: number) => {
        const start = item.time
        const end =
            index + 1 < rawChapters.length ? rawChapters[index + 1].time : item.time + 100000

        return {
            start,
            end,
            title: item.title,
        }
    })

    return { areAutoGenerated, chapters }
}

const getInitData = (html: string) => {
    const ytInitialDataMatch = html.match(/var ytInitialData = (.*);<\/script>/)
    if (ytInitialDataMatch && ytInitialDataMatch[1]) {
        const ytInitialData = JSON.parse(ytInitialDataMatch[1])
        return ytInitialData
    }
    return null
}

const getTranscriptByLanguageCode = (
    availableTranscripts: TranscriptDetails[],
    language: string
) => {
    if (!availableTranscripts?.length) return null

    const transcript = availableTranscripts?.find(({ code }) => code === language)
    if (transcript) return transcript

    for (const languageCode of languageCodes) {
        const transcript = availableTranscripts?.find(({ code }) => code === languageCode)
        if (transcript) return transcript
    }

    return availableTranscripts[0]
}

const getTranscript = async (url: string) => {
    const response = await fetch(url)
    const transcriptXML = await response.text()
    return parseTranscript(transcriptXML)
}

const parseTranscript = (transcriptXML: string) => {
    const parser = new DOMParser()
    const xmlDoc = parser.parseFromString(transcriptXML, "text/xml")
    const textElements = xmlDoc.getElementsByTagName("text")

    const transcripts = Array.from(textElements).map((textElement) => {
        const decodedText = textElement.textContent
        const start = Number(textElement.getAttribute("start"))
        const duration = Number(textElement.getAttribute("dur"))

        return {
            start: start,
            duration,
            end: start + duration,
            text: decode(decodedText),
        }
    })

    return transcripts
}

const createSections = (transcript: Transcript[], chapters: Chapter[]): Section[] => {
    if (!chapters || chapters.length === 0) {
        return [
            {
                heading: "",
                sentences: formatTranscript(transcript),
                start: 0,
                end:
                    transcript[transcript.length - 1].start +
                    transcript[transcript.length - 1].duration,
            },
        ]
    }

    return chapters
        .map((chapter) => {
            const chapterSentences = transcript.filter(
                (snippet) => snippet.start >= chapter.start && snippet.start <= chapter.end
            )

            return {
                heading: chapter.title,
                sentences: formatTranscript(chapterSentences),
                start: chapter.start,
                end: chapter.end,
            }
        })
        .filter((section) => section.sentences.length > 0)
}

const getAvailableTranscripts = (page: string): null | TranscriptDetails[] => {
    const splittedHtml = page.split('"captions":')

    const captions = splittedHtml?.[1]?.split(',"videoDetails')?.[0]?.replace("\n", "")

    if (!captions) return null

    const captionsJson = JSON.parse(captions)?.playerCaptionsTracklistRenderer

    if (!captionsJson?.captionTracks) return null

    return captionsJson.captionTracks?.map((transcript: any) => ({
        url: transcript.baseUrl,
        code: transcript.languageCode.toLowerCase().slice(0, 2),
        name: transcript.name.simpleText,
        isAutogenerated: transcript.kind === "asr",
    }))
}

export const youtubeTranscriptService = {
    fetchTranscript,
    listAvailableTranscripts,
    fetchSections,
    formatTranscript,
}
