import dl from "datalib";
import { useCallback } from "react";
import type { AccessToken } from "src/Auth0/accessToken";
import { downloadRecordsAsCSV } from "src/Common/DownloadLink";
import { Group } from "src/ImageViewerNew/types";
import { getViewImagesUrlForGroups } from "src/ImageViewerNew/utils";
import { metadataToKey } from "src/imaging/util";
import { DEFAULT_TIMEPOINT } from "src/timeseries/constants";
import { datasetApi } from "src/util/api-client";
import { buildSort } from "src/util/build-sort";
import invariant from "tiny-invariant";
import { Fetchable } from "@spring/core/result";
import {
  MultiSelectCondition,
  Operator,
  SelectCondition,
} from "../../Control/FilterSelector/operations/filter-by";
import {
  Filter,
  FilterSet,
  SelectFilter,
} from "../../Control/FilterSelector/types";
import {
  serializeToIndividualSqlClause,
  serializeToSqlClause,
} from "../../Control/FilterSelector/utils";
import {
  DatasetId,
  UntypedSampleMetadataRow,
  UntypedTimepointSampleMetadataRow,
  UntypedWellSampleMetadataRow,
  WorkspaceId,
  isTimeSeriesMetadata,
} from "../../types";
import { defaultComparator } from "../../util/sorting";
import { DB, queryDBAsRecords, sql, useQueryAsRecords } from "../../util/sql";
import { useFeatureSetManagementContext } from "../context";
import { UmapRow } from "../types";

// Ensure the sample is stable across refreshes
const ROW_SAMPLING_SEED = 32;

function isPositiveFilter(filter: Filter) {
  switch (filter.type) {
    // Filtering for certain values of data is considered positive
    case "number":
    case "text":
    case "multilineText":
    case "checkbox":
      return true;

    case "select":
      return [
        SelectCondition.IS,
        SelectCondition.IS_ANY_OF,
        SelectCondition.IS_EMPTY,
        SelectCondition.IS_NOT_EMPTY,
      ].includes(filter.condition);

    case "multiSelect":
      return [
        MultiSelectCondition.HAS_ANY_OF,
        MultiSelectCondition.HAS_ALL_OF,
        MultiSelectCondition.IS_EXACTLY,
        MultiSelectCondition.HAS_AT_LEAST_TWO_OF,
        MultiSelectCondition.HAS_AT_LEAST_THREE_OF,
        MultiSelectCondition.IS_EMPTY,
        MultiSelectCondition.IS_NOT_EMPTY,
      ];
  }
}

type DedupedMetadata = {
  [plate: string]:
    | {
        [well: string]:
          | { [timepoint: string]: UntypedSampleMetadataRow | undefined }
          | undefined;
      }
    | undefined;
};

/**
 * Given a Filter, return a list of Filters that are equivalent, but with each Filter only
 * filtering on a single value.
 *   ex. "is any of A, B, C" becomes ["is A", "is B", "is C"]
 *
 * This is used to ensure that when we sample we will have data from each of the user's conditions.
 */
function flattenFilter(filter: Filter): Filter[] {
  // Non-select filters are already flat
  if (filter.type !== "select" && filter.type !== "multiSelect") {
    return [filter];
  }

  if (
    filter.type === "select" &&
    filter.condition === SelectCondition.IS_ANY_OF &&
    Array.isArray(filter.queryText)
  ) {
    return filter.queryText.map((query) => ({
      ...filter,
      queryText: [query] as string[] | number[],
    }));
  }

  if (
    filter.type === "multiSelect" &&
    filter.condition === MultiSelectCondition.HAS_ANY_OF
  ) {
    return filter.queryText.map((query) => ({
      ...filter,
      queryText: [query] as string[] | number[],
    }));
  }

  return [filter];
}

/**
 * Potentially sample a dataset to ensure it fits in some size limit and returns the
 * serialized SQL filter for the sample.
 *
 * Given a dataset and the user's configured plates and filters, create additional
 * filtering based on intelligent sampling. This is an unfortunate, but necessary operation
 * when the user's input results in potentially too much data being loaded from the server.
 *
 * Any additional filters will be visible (read only) in the data filter control.
 *
 * Returns a tuple of the consolidated filter and the metadata after applying that
 * filter.
 */
export async function createFilterForSamplingAsync(
  metadataDB: DB,
  plates: string[],
  filterSet: FilterSet,
  maxSize: number,
): Promise<
  [string, UntypedTimepointSampleMetadataRow[] | UntypedWellSampleMetadataRow[]]
> {
  const filterSerialized = serializeToSqlClause(filterSet);

  // TODO(michaelwiest): need to handle timepoints in this crazy logic!
  // Take into account the user's plate selection and desired filters
  const metadata = await queryDBAsRecords<UntypedSampleMetadataRow>(
    metadataDB,
    sql`SELECT * FROM sample_metadata
    ${
      plates.length > 0
        ? `WHERE plate IN (${plates
            .map((plate) => `'${plate}'`)
            .join(",")}) AND `
        : "WHERE "
    }
    (${filterSerialized})`,
  );

  const platesWithCounts = dl
    .groupby("plate")
    .count()
    .execute(
      // Hack(davidsharff): make the sampling stable across refreshes
      metadata.sort((a, b) => defaultComparator(a.plate, b.plate)),
    );

  // We want the plates with the lowest counts first, to ensure we have enough wells to sample from
  platesWithCounts.sort(
    (
      { count: count1 }: { count: number },
      { count: count2 }: { count: number },
    ) => {
      if (count1 > count2) {
        return 1;
      } else if (count1 < count2) {
        return -1;
      } else {
        return 0;
      }
    },
  );

  const samplePlateAndWells: string[] = [];
  const sampleMetadata: UntypedWellSampleMetadataRow[] = [];

  // Take wells from each of the user's postive conditions to ensure that all specified conditions
  // are represented in the result
  const positiveFilters = filterSet.filters
    .filter(isPositiveFilter)
    .map(flattenFilter)
    .flat();

  // If the filter operator is an AND, we should always apply the filters to make sure any data
  // point we select will meet all conditions
  const prefilter =
    filterSet.operator === Operator.AND ? filterSerialized : null;

  // Divide the samples up among the conditions. If there are more conditions than samples available,
  // earlier ones will necessarily be dropped.
  //
  // NOTE: This is definitely introducing bias. We are erring on the side of better UX (showing the
  // user the data they selected for) over better analytical practices.
  // Ideally we would like to remove the need for sampling altogether.
  const numToSamplePerFilter = Math.floor(maxSize / positiveFilters.length);

  // If there isn't enough data from the conditions to fill the sample, we will fill in the rest
  // with random wells later
  let numLeftToSample = maxSize;

  for (const filter of positiveFilters) {
    const filterSerialized = serializeToIndividualSqlClause(filter);

    // Use DuckDB's reservoir sampling to take the random samples
    const samples = await queryDBAsRecords<UntypedSampleMetadataRow>(
      metadataDB,
      sql`SELECT * FROM
          (SELECT * FROM sample_metadata
          WHERE ${
            prefilter === null ? "" : `${prefilter} AND`
          } (${filterSerialized}))
        USING SAMPLE(${numToSamplePerFilter} ROWS)
        REPEATABLE (${ROW_SAMPLING_SEED})`,
    );

    sampleMetadata.push(...samples);
    samplePlateAndWells.push(
      ...samples.map((sample) => `('${sample.plate}', '${sample.well}')`),
    );

    numLeftToSample -= samples.length;
  }

  // Sample random wells from all plates to ensure that all plates are represented in the the result
  const negativeFilterForAlreadySampledWells =
    samplePlateAndWells.length === 0
      ? null
      : `(plate, well) NOT IN (${samplePlateAndWells.join(", ")})`;
  let numPlatesRemaining = platesWithCounts.length;

  for (const { plate } of platesWithCounts) {
    const numToSample = Math.floor(numLeftToSample / numPlatesRemaining);

    // Use DuckDB's reservoir sampling to take the random samples
    const samples = await queryDBAsRecords<UntypedSampleMetadataRow>(
      metadataDB,
      sql`SELECT * FROM
          (SELECT * FROM sample_metadata
          WHERE plate='${plate}' AND (${filterSerialized}) ${
            negativeFilterForAlreadySampledWells === null
              ? ""
              : `AND ${negativeFilterForAlreadySampledWells}`
          })
        USING SAMPLE(${numToSample} ROWS)
        REPEATABLE (${ROW_SAMPLING_SEED})`,
    );

    sampleMetadata.push(...samples);
    samplePlateAndWells.push(
      ...samples.map((sample) => `('${sample.plate}', '${sample.well}')`),
    );

    // If this plate didn't have enough wells, the remainder will be distributed among the rest
    numPlatesRemaining -= 1;
    numLeftToSample -= samples.length;
  }
  return [
    `(plate, well) IN (${samplePlateAndWells.join(", ")})`,
    isTimeSeriesMetadata(sampleMetadata[0])
      ? (sampleMetadata as UntypedTimepointSampleMetadataRow[])
      : (sampleMetadata as UntypedWellSampleMetadataRow[]),
  ];
}

export function renderMetadataValue(value: any): string {
  if (value === null) {
    return "<null>";
  } else {
    return `${value}`;
  }
}

export function handleDownloadFeatureData(
  accessToken: AccessToken,
  workspace: WorkspaceId | undefined,
  dataset: DatasetId,
  {
    features,
    columns,
    plates,
  }: { features: string[]; columns?: string[]; plates: string[] },
): Promise<void> {
  invariant(workspace, "Attempting to download data without a workspace");

  return datasetApi({ accessToken, workspace, dataset })
    .route("download")
    .post({ features, columns, plates })
    .download(`${dataset}.csv`);
}

export function handleDownloadData(
  dataset: DatasetId,
  data: Fetchable<UmapRow[]>,
  metadata: UntypedSampleMetadataRow[],
  options: { points?: Set<string> } = {},
): Promise<void> {
  if (!data?.successful) {
    return new Promise<void>((resolve, reject) =>
      reject(new Error("Started download prior to data ready")),
    );
  }
  return new Promise<void>((resolve) => {
    // TODO(benkomalo): for now, we download the actual UMAPed data. It's possible
    // the user intent when clicking download here is to download the original data
    // that powers the UMAP, but it's ambiguous, and we probably want more fleshed
    // out designs that has a "download options" modal or popup. Until then, just
    // clean up the data we've already got for the UMAP and save it.
    const filename = `${dataset}-UMAP.csv`;
    const metadataByPlateWell: { [key: string]: UntypedSampleMetadataRow } = {};
    for (const row of metadata) {
      const key = metadataToKey(row);
      metadataByPlateWell[key] = row;
    }

    const dataToDownload =
      options.points === undefined
        ? data.value
        : data.value.filter((row) => options.points!.has(metadataToKey(row)));

    const hasClusters = data.value[0].cluster_label !== undefined;
    downloadRecordsAsCSV(
      dataToDownload.map((row) => {
        const key = metadataToKey(row);
        const metadata = metadataByPlateWell[key];
        const csvRow: Record<string, string | number> = {
          ...metadata,
          // Rename "0" and "1" to "x" and "y'.
          x: row[0],
          y: row[1],
        };

        if (hasClusters) {
          csvRow["cluster"] = row["cluster_label"]! + 1;
        }

        return csvRow;
      }),
      filename,
    );
    resolve();
  });
}

/**
 * Employ resovoir sampling to reduce the number of elements in select filters.
 *
 * Sampling is uniform and maintains order.
 *
 * Strategy:
 *  1. Determine the maximum elements that could be evenly distributed across all filters.
 *  2. Increase this number based on the remainder of each filter that is below this threshold.
 *  3. Spread the resulting total evenly over the remaining filters that need to be sampled.
 */
export function getSampledFilterUrl(
  filterGroups: Group<SelectFilter<string[]>>[],
  targetPathname: string,
  maxTotalFilterElements = 1100,
): string | null {
  const maxUrlLength = 12000;

  // We only want to sample select filters.
  // NOTE: other filter types will still be included in the returned url, just not processed
  // for sampling. This is accomplished by directly mutating the queryText array on the
  // filter objects, and using the entire filterGroups object to build the url.
  const filters = filterGroups
    .flatMap((group) => group.filterSet.filters)
    .sort(buildSort((f: SelectFilter<string[]>) => f.queryText.length));

  let remainingElements = maxTotalFilterElements;
  let remainingFilters = filters.length;

  for (const filter of filters) {
    const maxAllowed = Math.floor(remainingElements / remainingFilters);
    if (maxAllowed === 0) {
      console.error(
        "Unable to reduce the number of filter elements below the required threshold.",
      );
      return null;
    }

    // Directly mutate the array on the parent filter object.
    filter.queryText = sampleAtMostN(filter.queryText, maxAllowed);

    remainingElements -= filter.queryText.length;
    remainingFilters -= 1;
  }

  const url = getViewImagesUrlForGroups(targetPathname, filterGroups);

  // If the url is still to long, continue sampling.
  // Details:
  //   We sample on filter elements because it is far more efficient than checking the url len
  //   each iteration. However, it is an imprecise proxy for the total url length because of varying
  //   numbers of groups and possible changes to the filter set objects.
  if (url.length > maxUrlLength) {
    return getSampledFilterUrl(
      filterGroups,
      targetPathname,
      maxTotalFilterElements - 100,
    );
  }

  return url;
}

/**
 * Given a list, return a random sample of at most N elements (maintaining order).
 *
 * Sample is built probabilistically and may not always return exactly N elements
 * when more are present in the list.
 */
function sampleAtMostN<T>(list: T[], max: number) {
  if (list.length <= max) {
    return list;
  }

  const output: T[] = [];

  list.forEach((v, i) => {
    const inclusionProbability = (max - output.length) / (list.length - i);
    if (Math.random() < inclusionProbability) {
      output.push(v);
    }
  });

  return output;
}

/**
 * Given a plate and well, return a synthetic well ID (expected by Compare Images to efficently
 * filter arbitrary groups of wells).
 */
export function useGetWellIdForPoint() {
  const { metadataDB: fullMetadataDB } = useFeatureSetManagementContext();

  const uniquePlatesQuery = useQueryAsRecords<{ plate: string }>(
    fullMetadataDB,
    sql`SELECT DISTINCT plate FROM sample_metadata ORDER BY plate ASC`,
  );

  const getWellId = useCallback(
    (plate: string, well: string) => {
      invariant(uniquePlatesQuery?.successful);

      const plateIndex = uniquePlatesQuery.value.findIndex(
        (r) => r.plate === plate,
      );

      invariant(plateIndex !== -1);

      const plateNum = plateIndex + 1;
      const wellId = `${plateNum}${well}`;

      return wellId;
    },
    [uniquePlatesQuery],
  );

  return getWellId;
}
export function getGroupedMetadata(
  dedupedMetadata: DedupedMetadata,
  coloringMetadataColumn: string | null,
  data: Fetchable<UmapRow[]>,
): [
  { [meta: string]: UmapRow[] | undefined },
  [string, UmapRow[] | undefined][],
] {
  const groupedByMetadata: { [meta: string]: UmapRow[] | undefined } = {};
  if (coloringMetadataColumn && data?.successful) {
    for (const d of data.value) {
      const maybeTimepoint = d.timepoint ?? DEFAULT_TIMEPOINT;
      const metadataValue =
        // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
        dedupedMetadata[d.plate]?.[d.well]?.[maybeTimepoint]?.[
          coloringMetadataColumn
        ];
      if (metadataValue !== undefined) {
        (groupedByMetadata[renderMetadataValue(metadataValue)] ??= []).push(d);
      }
    }
  }
  const groups = Object.entries(groupedByMetadata).sort((a, b) =>
    defaultComparator(a[0], b[0]),
  );

  return [groupedByMetadata, groups];
}
export function getDedupedMetadata(
  metadata: UntypedSampleMetadataRow[] | undefined,
): DedupedMetadata {
  const dedupedMetadata: DedupedMetadata = {};

  if (metadata === undefined) {
    return dedupedMetadata;
  }
  for (const row of metadata) {
    ((dedupedMetadata[row.plate] ??= {})[row.well] ??= {})[
      isTimeSeriesMetadata(row) ? row.timepoint : DEFAULT_TIMEPOINT
    ] = row;
  }
  return dedupedMetadata;
}
