import slugifyPkg from 'slugify';
import { ArticleData, Citation, RichCitation } from './types';
import { htmlToText } from 'html-to-text';
import dayjs from 'dayjs';
import utc from 'dayjs/plugin/utc';
import customParseFormat from 'dayjs/plugin/customParseFormat';
import { splitSentences } from '@xyla/util';

dayjs.extend(utc);
dayjs.extend(customParseFormat);

export const DEFAULT_ARTICLE_DATA: ArticleData = {
  title: '',
  byline: '',
  what_is_this_page_html: '',
  page_type: '',
  cms_type: '',
  medically_reviewed_by: '',
  medically_audited_by: '',
  audited_by: '',
  articlesection_set: [],
  intervention_as: [],
  condition_as: [],
  lead_image_url: '',
  image_url: '',
  related_article_slugs: [],
};

export function slugify(s: string): string {
  return slugifyPkg(s.toLowerCase().replace(/[^\w -]/g, ''));
}

export const combineStyles = (
  ...allStyles: { [key: string]: string }[]
): { [key: string]: string } => {
  const combinedStyles: { [key: string]: string } = {};
  allStyles.forEach((styles) => {
    Object.keys(styles).forEach((key) => {
      if (combinedStyles[key]) {
        combinedStyles[key] += ` ${styles[key]}`;
      } else {
        combinedStyles[key] = styles[key];
      }
    });
  });
  return combinedStyles;
};

export function parseArticleDate(
  stringDate: string | undefined
): Date | undefined {
  if (stringDate === undefined) {
    return undefined;
  }

  if (stringDate.includes('+') || stringDate.includes('Z')) {
    // this is hopefully an ISO-8601 date
    return new Date(stringDate);
  } else {
    // parse some dates which were formatted by old gdoc parser

    // NB(micah) workaround for iamkun/dayjs#1980
    const stringDateCleaned = stringDate.replace(/\./g, '');

    const strict = true;
    let dt = dayjs(stringDateCleaned, ['MMM D, YYYY', 'MMMM D, YYYY'], strict);
    if (dt.isValid()) {
      return dt.utc(true).toDate();
    }
  }

  return undefined;
}

/**
 * Format article date
 * @param dt Format datetime into a string date for display on the page
 * @param page_type Should be set to articleData.page_type if available. If the content is from Xyla CMS, this should be set to 'Science Writer Article'.
 * @returns
 */
export function formatArticleDate(dt?: Date, page_type?: string): string {
  if (dt === undefined) {
    return '';
  }
  const locale = typeof window !== 'undefined' ? navigator.language : 'en-US';
  const UTC_page_types = ['Science Writer Article', 'Trending Topics'];

  return dt.toLocaleDateString(locale, {
    year: 'numeric',
    month: 'long',
    day: 'numeric',

    // NOTE(micah) dates from CMS are datetimes at midnight start of day UTC.
    // this happens because in CMS datepicker we convert all store "dates"
    // (year/month/day) as "datetimes" at UTC midnight start of day. these
    // datetimes then need to be rendered as "dates" (logically the date piece
    // of that datetime, also in UTC). all other datetimes should be localized
    // to the user's current timezone.
    timeZone:
      page_type && UTC_page_types.includes(page_type) ? 'UTC' : undefined,
  });
}

interface ArticleDates {
  dt_published?: string;
  article_date?: string;
  last_modified_date?: string;
}

export function hasDateAvailable(articleDates: ArticleDates): boolean {
  return getDatePublished(articleDates) !== undefined;
}

export function getDatePublished(articleDates: ArticleDates): Date | undefined {
  const dt =
    articleDates.dt_published ??
    articleDates.article_date ??
    articleDates.last_modified_date;
  return parseArticleDate(dt);
}

export function getDateUpdated(articleDates: ArticleDates): Date | undefined {
  const dt =
    articleDates.last_modified_date ??
    articleDates.article_date ??
    articleDates.dt_published;
  return parseArticleDate(dt);
}

export function getRawText(article: ArticleData): string {
  let texts = article.articlesection_set
    .slice(1) // warning: first section is "always" skipped
    .flatMap((section) =>
      section.articleparagraph_set.flatMap((paragraph) =>
        paragraph.articlespan_set.flatMap((span) => span.text)
      )
    );

  // remove REACTCOMPONENT
  texts = texts.filter((text) => !text.startsWith('REACTCOMPONENT'));

  // parse html
  texts = texts.map((text) =>
    htmlToText(text, {
      selectors: [
        {
          selector: 'a',
          format: 'anchor',
          options: {
            ignoreHref: true,
          },
        },
      ],
    })
  );

  let text = texts.join(' ');

  // consolidate whitespace
  text = text.replaceAll('\n', ' ').replaceAll(/\s+/g, ' ');

  return text;
}

/** Generates an og:description based on article content */
export function getArticleDescription(article: ArticleData): string {
  return splitSentences(getRawText(article)).slice(0, 2).join(' ');
}

export function isRichCitation(reference: Citation): reference is RichCitation {
  return (
    typeof reference !== 'string' && Boolean(reference.metadata.citation_detail)
  );
}

export function parseHtmlAndWhiteSpace(input: string): string {
  // Replace ** ** with bold (must be done before the bulleted list)
  let text = input.replace(/\*\*(.*?)\*\*/gm, '<strong>$1</strong>');

  text = text.replace(/^(\s*)[-+*]/gm, '$1•');
  // Check for the existence of a numbered list
  const pattern = /(^|\n)\s*(\d+\.\s)/g;
  if (pattern.test(text)) {
    // Wrap the individual element in <li> tags
    text = text.replace(
      /^(\s*)\d+\.\s(.*?)(?=\n\d+\.|\n|$)/gm,
      '$1<li>$2</li>'
    );
    // Wrap all the elements in <ol> tags
    text = text.replace(
      /((?:\s*<li>.*?<\/li>\n?)+)/g,
      '<ol style="margin-top: 0; margin-bottom: 0;">$1</ol>'
    );
  }
  // Remove anything that is [numbers]
  text = text.replace(/\[\d+\]/g, '');
  // Remove any new lines that come directly before punctation
  text = text.replace(/\n([.,!?;:()])/g, '$1');
  // Trim
  text = text.trim();
  // Add a break before bullet points that don't follow a newline character or an html tag
  text = text.replace(/([^>\n])•/g, '$1<br>•');
  // Render m^2 units properly
  text = text.replace(/m\n2\n?/g, 'm<sup>2</sup>');
  // Add a break for each new line character that is not followed by an html tag
  text = text.replace(/(\r?\n)+(?!(\s*<\w+>))/g, '<br>');
  // Remove any empty html tags and xml:space="preserve" which adds extra space
  text = text.replace(/<(\w+)>(&nbsp;|\s)*<\/\1>|xml:space="preserve"/g, '');

  return text;
}

/**
 * Copied some of the code above that fixes the markdown to be sent to a Markdown parser
 */
export function fixMarkdown(input: string): string {
  // Remove anything that is [numbers]
  let text = input.replace(/\[\d+\]/g, '');
  // Remove any new lines that come directly before punctation
  text = text.replace(/\n([.,!?;:()])/g, '$1');
  // Trim
  text = text.trim();
  // Render m^2 units properly
  text = text.replace(/m\n2\n?/g, 'm<sup>2</sup>');
  // Remove any empty html tags and xml:space="preserve" which adds extra space
  text = text.replace(/<(\w+)>(&nbsp;|\s)*<\/\1>|xml:space="preserve"/g, '');

  return text;
}
