import * as Sentry from "@sentry/react";
import isEqual from "lodash.isequal";
import { useMemo } from "react";
import { DatasetId } from "src/types";
import { parseTaggedString } from "@spring/core/log-utils";
import { Failure, Fetchable, Result, Success } from "@spring/core/result";
import { FeatureSetGroup, useFeatureSets } from "../hooks/features";
import { RecordsArray } from "../util/sql";
import {
  ColumnsLookupFn,
  FeatureSetLevel,
  FeatureSetListing,
  FeatureSetPlateGroup,
  FeatureSetSelection,
  FeatureSetType,
  NormalFeatureSetSelection,
  PlateBasedFeatureSetListing,
  SerializedColumnIndices,
  SerializedSelection,
  UnreifiedFeatureSetSelection,
  UnvalidatedFeatureParams,
} from "./types";

// TODO(you): Fix this no-unused-exports rule violation
// ts-unused-exports:disable-next-line
export const PHENOSORTER_SUFFIX: string = "Prediction";

export function inferTypeFromFeatureSetName(name: string): FeatureSetType {
  if (name.includes("Embedding")) {
    return "embedding";
  } else if (name.includes(PHENOSORTER_SUFFIX)) {
    return "prediction";
  } else {
    return "numerical";
  }
}

export function cleanPredictionName(name: string): string {
  // If our feature name ends with our special Prediction suffix then remove it.
  return name.endsWith(PHENOSORTER_SUFFIX)
    ? name.slice(0, name.length - PHENOSORTER_SUFFIX.length)
    : name;
}

export function hasStructuredLabelSetInformation(
  maybePayload: any,
): maybePayload is LabeledSetFeatureSetArgs {
  // TODO(michaelwiest): we can put other structured fields to check for in here.
  return (
    Array.isArray(maybePayload.stains) &&
    maybePayload.stains.every((s: any) => typeof s === "string") &&
    (maybePayload.labeled_set_id === undefined ||
      typeof maybePayload.labeled_set_id === "string")
  );
}

export function hasStructuredSupervisedLearnerInfo(
  maybePayload: any,
): maybePayload is SupervisedLEarnerFeatureSetArgs {
  return typeof maybePayload.model_id === "string";
}

export function inferLevelFromFeatureSetName(name: string): FeatureSetLevel {
  if (
    name.startsWith("WellAggregated") ||
    name.endsWith("Projection") ||
    // Sometimes for analytical reasons we normalize after well aggregation,
    // such that the normalization prefix comes first.
    (name.includes("WellAggregated") && name.includes("Normalized"))
  ) {
    return "well";
  } else if (
    // Neuronal Health Scores is a custom feature for arvinas in arvinas-neurons-20230622.
    // AggregateProportionCovered is a custom feature for arvinas in arvinas-solubility-20230621.
    [
      "Images",
      "CellCount",
      "Neuronal Health Scores",
      "AggregateProportionCovered",
    ].includes(name)
  ) {
    return "field";
  } else {
    return "cell";
  }
}

// Given rows of a feature set this figures out the FeatureSetLevel given the
// presence / absence of keys.
export function inferLevelFromFeatureSetRows(
  row: RecordsArray,
): FeatureSetLevel {
  const row_entry: { [key: string]: unknown } = row[0];
  if (
    "column" in row_entry &&
    "row" in row_entry &&
    "field" in row_entry &&
    "well" in row_entry
  ) {
    return "cell";
  } else if ("field" in row_entry && "well" in row_entry) {
    return "field";
  } else if ("well" in row_entry) {
    return "well";
  } else throw "Unexpected FeatureSetLevel.";
}

// TODO(you): Fix this no-unused-exports rule violation
// ts-unused-exports:disable-next-line
export function inferIsHiddenFromFeatureSetName(name: string): boolean {
  return (
    name.startsWith("WellAggregated") ||
    name.startsWith("FieldAggregated") ||
    name.startsWith("FieldSkimmed") ||
    name.startsWith("WellSkimmed")
  );
}

export function extractWellAggregatedNamesFromSelection(
  selection: FeatureSetSelection,
): string[] {
  const extractNames = (selection: FeatureSetSelection) => {
    switch (selection.type) {
      case "embedding":
        return selection.names;

      case "numerical":
      case "prediction":
        return [selection.name];
    }
  };
  const names = extractNames(selection);
  return names.map((name) =>
    // Sometimes for analytical reasons we normalize after well aggregation,
    // such that the normalization prefix comes first.
    name.startsWith("Normalized") && name.includes("WellAggregated")
      ? name
      : `WellAggregated${name}`,
  );
}

export type FeatureSetsByType = {
  [key in FeatureSetType]: FeatureSetPlateGroup[];
};

interface NamedItem {
  name: string;
}

// TODO(you): Fix this no-unused-exports rule violation
// ts-unused-exports:disable-next-line
export type LabeledSetFeatureSetArgs = {
  labeled_set_id: string | null;
  stains: string[];
};

// TODO(you): Fix this no-unused-exports rule violation
// ts-unused-exports:disable-next-line
export type SupervisedLEarnerFeatureSetArgs = {
  model_id: string;
};

export function useFeatureSetsGrouped(
  dataset: DatasetId | null,
): Fetchable<FeatureSetsByType> {
  const features = useFeatureSets(dataset ? { dataset } : { skip: true });
  return useMemo(
    () => features?.map(hackilyTransformFeatureNamesToStructuredEntries),
    [features],
  );
}

/**
 * Exclude synthetic/derivative features from user-visible feature lists.
 *
 * Some features are derivative views of other features, like pre-aggregated values
 * or pre-sampled values. These are useful in certain products, but are conceptually
 * the same as their original/source feature, so exclude them from user-visible
 * selection lists.
 */
export function excludeHiddenFeatures<T extends NamedItem>(items: T[]): T[] {
  return items.filter(({ name }) => !inferIsHiddenFromFeatureSetName(name));
}

/**
 * Parse FeatureSet names to structured data according to Spring internal conventions.
 *
 * This is a stopgap measure until we determine what "FeatureSet"s look like eventually,
 * and what metadata might be available for us in a given "features index" that has
 * yet to be built. For now, we infer the following info from the names based on the
 * Spring internal conventions (since we're only working with Spring data thus far):
 *  - timestamp of creation
 *  - grouping by base name (to see if it's the latest of the "kind" or not)
 */
// TODO(you): Fix this no-unused-exports rule violation
// ts-unused-exports:disable-next-line
export function hackilyTransformFeatureNamesToStructuredEntries(
  groupingByPlate: FeatureSetGroup[],
): FeatureSetsByType {
  // Collect all results by name and plate.
  const byNameAndPlate: {
    [name: string]: Map<string | null, FeatureSetListing[]>;
  } = {};

  // The server returns us the raw data grouped by plate.
  for (const { plate, featureSets } of groupingByPlate) {
    // We, however, want to re-group by FeatureSet name, and _then_ by plate.
    for (const nameWithSuffix of featureSets) {
      const parts = parseTaggedString(nameWithSuffix);
      if (parts?.datestamp == null) {
        Sentry.captureMessage(
          `Unexpected Featureset format: ${nameWithSuffix}`,
        );
        continue;
      }

      const year = Number(parts.datestamp.slice(0, 4));
      const month = Number(parts.datestamp.slice(4, 6)) - 1;
      const day = Number(parts.datestamp.slice(6, 8));
      const name = parts.baseName;
      const structured: FeatureSetListing | PlateBasedFeatureSetListing =
        plate === null
          ? {
              id: nameWithSuffix,
              type: inferTypeFromFeatureSetName(name),
              name,
              created: new Date(year, month, day),
            }
          : {
              id: nameWithSuffix,
              type: inferTypeFromFeatureSetName(name),
              level: inferLevelFromFeatureSetName(name),
              plate,
              name,
              created: new Date(year, month, day),
            };

      if (!(name in byNameAndPlate)) {
        byNameAndPlate[name] = new Map<string | null, FeatureSetListing[]>();
      }

      const nameGroup = byNameAndPlate[name]!;
      if (!nameGroup.has(plate)) {
        nameGroup.set(plate, []);
      }
      nameGroup.get(plate)!.push(structured);
    }
  }

  // Note: If FeatureSets have the same (name, plate) but different suffixes, that
  // implies they're just different versions of the same FeatureSet. We don't
  // really expose this kind of versioning to the user but the server is returning us
  // all of the constiuent listings -- just drop the older versions on the floor here.
  const flattenedByName: { [name: string]: FeatureSetPlateGroup } = {};
  for (const [name, byPlate] of Object.entries(byNameAndPlate)) {
    for (const [, versions] of byPlate.entries()) {
      const newest = getNewestVersion(versions);
      if (!(name in flattenedByName)) {
        flattenedByName[name] = {
          name,
          featureSets: [],
        };
      }
      flattenedByName[name].featureSets.push(newest);
    }
  }

  const results: FeatureSetsByType = {
    embedding: [],
    numerical: [],
    prediction: [],
  };

  for (const [name, plateGroup] of Object.entries(flattenedByName)) {
    const type = inferTypeFromFeatureSetName(name);
    results[type].push(plateGroup);
  }

  return results;
}

function getNewestVersion(items: FeatureSetListing[]): FeatureSetListing {
  let newest = items[0];
  for (let i = 1; i < items.length; i++) {
    if (items[i].created > newest.created) {
      newest = items[i];
    }
  }
  return newest;
}

/**
 * Parse out a stain from an embedding name.
 * @param name Something like "Vgg5Embeddings-20210101-uuid - MitoTracker"
 */
export function stainFromName(name: string): string {
  const [, stain] = name.split(" - ");
  if (!stain) {
    throw new Error(
      `Malformed embedding FeatureSet. No stain suffix found: ${name}`,
    );
  }
  return stain;
}

/**
 * Parse out a baseName from an embedding name.
 * @param name Something like "Vgg5Embeddings-20210101-uuid - MitoTracker"
 */
export function baseNameFromName(name: string): string {
  const [baseName] = name.split(" - ");
  return baseName;
}

/**
 * The inverse of stainFromName.
 */
export function nameFromParts(baseName: string, stain: string): string {
  return [baseName, stain].join(" - ");
}

export function serializeToQueryParams(
  selection: FeatureSetSelection,
  getColumns: ColumnsLookupFn,
): SerializedSelection {
  switch (selection.type) {
    case "embedding":
      return {
        featureType: "embedding",
        featureNames: selection.names,
      };
    case "numerical":
    case "prediction":
      return {
        featureType: selection.type,
        featureName: selection.name,
        featureColumns: serializeColumns(selection, getColumns(selection.name)),
      };
  }
}

// Exported for testing.
export function serializeColumns(
  selection: NormalFeatureSetSelection,
  allColumns: string[],
): string[] | "all" | SerializedColumnIndices {
  if (selection.includesAllColumns) {
    return "all";
  } else if (selection.columns.length < 20) {
    return selection.columns;
  } else {
    const selectionSet = new Set(selection.columns);
    const indices: number[] = allColumns
      .map((column, i) => (selectionSet.has(column) ? i : null))
      .filter((i): i is number => i !== null);
    return serializeColumnIndices(indices);
  }
}

// Exported for testing.
/**
 * Serialize a set of column indices into a single string.
 *
 * This representation is useful when many columns are selected, and it
 * assumes that the most common selection (other than selecting "all" columns)
 * is to pick groups of columns which are related, and therefore close to
 * each other in the columns list/index. Therefore, this serializes the
 * indices as singular values, but for any contiguous range of values, will
 * also compact them as `start:end` (start inclusive, end exclusive).
 * e.g. an example format would be "0,3:10,14,15", representing 10 total values.
 *
 * It's assumed indices are sorted.
 */
export function serializeColumnIndices(
  indices: number[],
): SerializedColumnIndices {
  const ranges = [];
  let prev = null;

  for (const value of indices) {
    if (prev && value === prev.to + 1) {
      // Expand the last range
      prev.to = value;
    } else {
      // Start a new range
      prev = { from: value, to: value };
      ranges.push(prev);
    }
  }

  return `::${ranges
    .map(({ from, to }) => (from === to ? String(to) : `${from}:${to + 1}`))
    .join(",")}`;
}

export function isSerializedColumnIndices(
  s: string,
): s is SerializedColumnIndices {
  return s.startsWith("::");
}

export function deserializeColumnIndices(
  indices: SerializedColumnIndices,
): number[] {
  return indices
    .slice(2)
    .split(",")
    .flatMap((indexOrRange) => {
      if (indexOrRange.includes(":")) {
        const [start, end] = indexOrRange.split(":").map(Number);
        return Array.from({ length: end - start }, (_, i) => i + start);
      } else {
        return [Number(indexOrRange)];
      }
    });
}

export function deserializeFromQueryParams(
  features: FeatureSetsByType,
  params: UnvalidatedFeatureParams,
): Result<UnreifiedFeatureSetSelection | null> {
  if (!params.featureType) {
    // No type at all means we likely don't have any relevant params.
    return Success.of(null);
  }

  switch (params.featureType) {
    case "embedding": {
      if (!params.featureNames || params.featureNames.length === 0) {
        return Failure.of(new Error("No feature names detected"));
      }

      const featureNames = params.featureNames as string[];
      for (const name of featureNames) {
        const matchedgroup = features["embedding"].find(
          (group) => group.name === name,
        );
        if (!matchedgroup) {
          return Failure.of(
            new Error(`Unable to find matching feature: ${name}`),
          );
        }
      }
      return Success.of({
        type: "embedding",
        names: featureNames,
      });
    }
    case "numerical":
    case "prediction": {
      if (
        !params.featureName ||
        !params.featureColumns ||
        (params.featureColumns && params.featureColumns.length === 0)
      ) {
        return Failure.of(new Error("Missing feature specification"));
      }

      const matchedFeatureGroup = features[params.featureType].find(
        ({ name }) => name === params.featureName,
      );
      if (!matchedFeatureGroup) {
        return Failure.of(new Error(`Unable to resolve ${params.featureName}`));
      }

      // Ideally a single column is in an array by itself, but when reading from the
      // query params, there's ambiguity here because array values are encoded as
      // repeated key values (e.g. "?key=value1&key=value2") by default. This means
      // that if you have a single value, it's ambiguous if it should be an array or
      // a string from the query param's perspective. We handle that here, and consider
      // any string value that isn't "all" to be a column.
      let columns: string[] | "all" | SerializedColumnIndices;
      if (!Array.isArray(params.featureColumns)) {
        if (
          params.featureColumns === "all" ||
          isSerializedColumnIndices(params.featureColumns)
        ) {
          columns = params.featureColumns;
        } else {
          columns = [params.featureColumns];
        }
      } else {
        columns = params.featureColumns;
      }

      return Success.of({
        type: params.featureType,
        name: params.featureName as string,
        featureSets: matchedFeatureGroup.featureSets,
        columns,
      });
    }

    default:
      return Failure.of(
        new Error(`Unrecognized feature type: ${params.featureType}`),
      );
  }
}

// Note(davidsharff): adding a "Similarity" group would conflict with the similarity score
// column handling in the MegaMap score methodolgoy editor.
export function maybeGroupCellProfilerColumns(
  allColumns: string[],
  columns?: string[],
): {
  [group: string]: string[];
} {
  if (columns === undefined) {
    columns = allColumns;
  }

  // Note: not critical that this is super exhaustive because the failure case is
  // just a lack of grouping. Just enumerate the most common CP features...
  // Also note: we test whether grouping is required based on allColumns, even though
  // we're grouping the (possibly filtered) columns argument.
  if (
    allColumns.some(
      (column) =>
        column.startsWith("AreaShape_") ||
        column.startsWith("Intensity_") ||
        column.startsWith("RadialDistribution_") ||
        column.startsWith("Correlation_") ||
        column.startsWith("Granularity_"),
    )
  ) {
    const grouped: { [group: string]: string[] } = {};
    for (const column of columns) {
      let prefix: string;
      if (!column.includes("_")) {
        prefix = "Other";
      } else {
        prefix = column.split("_")[0];
      }
      if (!(prefix in grouped)) {
        grouped[prefix] = [];
      }
      grouped[prefix].push(column);
    }
    return grouped;
  } else {
    return { all: columns };
  }
}

export function areSelectionsEqual(
  a: FeatureSetSelection,
  b: FeatureSetSelection,
) {
  switch (a.type) {
    case "embedding":
      return b.type === "embedding" && isEqual(a.names, b.names);

    case "numerical":
    case "prediction":
      return (
        b.type !== "embedding" &&
        a.name === b.name &&
        isEqual(a.columns, b.columns)
      );
  }
}

export function groupFeaturesByPrefix(plateGroups: FeatureSetPlateGroup[]): {
  [prefix: string]: FeatureSetPlateGroup[];
} {
  // Embeddings need to coalesce the same kinds of embeddings from every channel
  // into one logical listing.
  const resultsByPrefix: { [key: string]: FeatureSetPlateGroup[] } = {};
  for (const plateGroup of excludeHiddenFeatures(plateGroups)) {
    const { name } = plateGroup;
    const [prefix, stain] = name.split(" - ");
    if (!prefix || !stain) {
      console.warn(`Unrecognized embedding feature name: ${name}`);
      continue;
    }

    // TODO(you): Fix this no-unnecessary-condition rule violation
    // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
    if (!resultsByPrefix[prefix]) {
      resultsByPrefix[prefix] = [];
    }
    resultsByPrefix[prefix].push(plateGroup);
  }
  return resultsByPrefix;
}

export function maybePrettifyEmbedding(
  prefix: string,
  allPrefixes: string[],
): string {
  // Maybe find the entry in allPrefixes that is a substring of all other entries in
  // which case we can strip it out and slightly clean up the display.
  const globalPrefix = allPrefixes.find((p) =>
    allPrefixes.every((q) => q.includes(p)),
  );
  if (allPrefixes.length === 1) {
    return "Unbiased Embeddings";
  } else if (allPrefixes.length === 2) {
    const [first, second] = allPrefixes;
    // The logic here is to identify if an embedding is a normalized version of another.
    // For example:
    // NormalizedM0ControlWellAggregatedSingleCellRepLKNetL384In21kAvgPool128dMinMaxEmbeddings
    // SingleCellRepLKNetL384In21kAvgPool128dMinMaxEmbeddings
    // Where the first is a normalized version of the second.
    if (first.includes("Normalized") && first.endsWith(second)) {
      return prefix.startsWith("Normalized")
        ? "Normalized Embeddings"
        : "Unbiased Embeddings";
    }
  } else if (globalPrefix !== undefined) {
    return prefix === globalPrefix
      ? "Unbiased Embeddings"
      : prefix.replace(globalPrefix, "");
  }
  return prefix;
}
