import { captureException } from "@sentry/react";
import { FileFormat, FILE_FORMATS_BY_KEY } from "types/FileFormats";
import { FileStructure, FILE_STRUCTURES_BY_KEY } from "types/FileStructures";
import { FileType, FILE_TYPES_BY_KEY } from "types/FileTypes";
import { parquetMetadata } from "hyparquet";
import assert from "assert";
import { File as H5WasmFile, Group, h5wasm } from "h5wasm";
import { isNonNull } from "./isNonNull";

/**
 * Extracts file extension from filename
 * @param filename
 * @returns file extension
 */
const getFileExtensionFromFilename = (filename: string) =>
  filename.split(".").pop()?.toLowerCase() ?? "";

/**
 * Extracts file format from file name
 * @param filename
 * @returns valid file format
 */
export const getFileFormatFromFilename = (filename: string) => {
  const fileExtension = getFileExtensionFromFilename(filename);
  if (["tif", "tiff"].includes(fileExtension)) {
    return FILE_FORMATS_BY_KEY["tiff"];
  }
  return FILE_FORMATS_BY_KEY[fileExtension] ?? FILE_FORMATS_BY_KEY["unknown"];
};

/****************
 TYPESCRIPT TYPES
 ****************/

type IsxdMetadata = {
  type: number;
  // ...
};

/******************
 TYPESCRIPT HELPERS
 ******************/

/**
 * Reads the contents of a blob as a little-endian integer
 * @param blob
 * @returns The blob contents
 */
const readIntegerFromBlob = async (blob: Blob) => {
  const arrayBuffer = await blob.arrayBuffer();
  const dataView = new DataView(arrayBuffer);
  return dataView.getUint32(0, true);
};

/******************
 ERROR HANDLING
 ******************/

/**
 * Sends file info and error to Sentry
 * @param file
 * @param error
 */
const captureFileError = (file: File, error: unknown, message: string) => {
  captureException(error, {
    level: "warning",
    extra: {
      fileName: file.name,
      fileType: file.type,
      fileSize: file.size,
      message,
    },
  });
};

/******************
 SCRIPT TRANSLATION
 ******************/

// This mapping is copied over from another repo...
// https://github.com/inscopix/ideas-toolbox-idps/blame/develop/toolbox/utils/identify_file.py
// Mapping: isxd datatype id -> [file type, file structure]
const isxdDatatypeMapping = {
  0: [FILE_TYPES_BY_KEY.miniscope_movie.key, FILE_STRUCTURES_BY_KEY.movie.key],
  1: [FILE_TYPES_BY_KEY.cell_set.key, FILE_STRUCTURES_BY_KEY.binary.key],
  2: [
    // not currently supported on IDEAS
    "isxd_behavioral_movie",
    FILE_STRUCTURES_BY_KEY.movie.key,
  ],
  3: [
    FILE_TYPES_BY_KEY.gpio_data.key,
    FILE_STRUCTURES_BY_KEY.sparse_time_series.key,
  ],
  4: [FILE_TYPES_BY_KEY.miniscope_image.key, FILE_STRUCTURES_BY_KEY.image.key],
  5: [FILE_TYPES_BY_KEY.neural_events.key, FILE_STRUCTURES_BY_KEY.binary.key],
  6: [
    // not currently supported on IDEAS
    "isxd_metrics",
    FILE_STRUCTURES_BY_KEY.binary.key,
  ],
  7: [FILE_TYPES_BY_KEY.imu_data.key, FILE_STRUCTURES_BY_KEY.time_series.key],
  8: [FILE_TYPES_BY_KEY.vessel_set.key, FILE_STRUCTURES_BY_KEY.binary.key],
} as const;

/**
 * Read the metadata of an isxd file as a json-formatted dictionary
 * @param file
 * @returns metadata of the isxd file or `undefined`
 */
const readIsxdMetadata = async (file: File) => {
  try {
    const footerSizeOffset = 8;
    let blob = file.slice(-footerSizeOffset);
    const footerSize = await readIntegerFromBlob(blob);
    const offset = footerSize + footerSizeOffset + 1;
    blob = file.slice(-offset, -offset + footerSize);
    const metadata = JSON.parse(await blob.text()) as IsxdMetadata;
    return metadata;
  } catch (error) {
    captureFileError(file, error, "Failed to parse isxd metadata");
  }
};

/**
 * Identify file and extract metadata when possible
 * @param file
 */
export const identifyFile = async (file: File) => {
  // initialize file info
  let fileType: FileType["key"] | "isxd_behavioral_movie" | "isxd_metrics" =
    FILE_TYPES_BY_KEY["unknown"].key;
  let fileStructure: FileStructure["key"] =
    FILE_STRUCTURES_BY_KEY["unknown"].key;
  const fileFormat: FileFormat["key"] = getFileFormatFromFilename(
    file.name,
  ).key;
  const fileExtension = getFileExtensionFromFilename(file.name);

  // extract file type, file structure, and file metadata
  if (fileExtension === "isxd") {
    // ISXD File
    const metadata = await readIsxdMetadata(file);
    if (metadata !== undefined) {
      const isxdDatatype = metadata["type"] as keyof typeof isxdDatatypeMapping;
      fileType = isxdDatatypeMapping[isxdDatatype][0];
      fileStructure = isxdDatatypeMapping[isxdDatatype][1];
    }
  } else if (fileExtension === "isxc") {
    // Compressed ISXD File
    fileType = FILE_TYPES_BY_KEY["miniscope_movie"].key;
    fileStructure = FILE_STRUCTURES_BY_KEY["movie"].key;
  } else if (fileExtension === "isxb") {
    // Behavior Movie
    fileType = FILE_TYPES_BY_KEY["nvision_movie"].key;
    fileStructure = FILE_STRUCTURES_BY_KEY["movie"].key;
  } else if (fileExtension === "gpio") {
    fileType = FILE_TYPES_BY_KEY["gpio_data"].key;
    fileStructure = FILE_STRUCTURES_BY_KEY["binary"].key;
  } else if (fileExtension === "imu") {
    fileType = FILE_TYPES_BY_KEY["imu_data"].key;
    fileStructure = FILE_STRUCTURES_BY_KEY["binary"].key;
  } else if (
    fileExtension !== undefined &&
    ["mp4", "avi"].includes(fileExtension)
  ) {
    fileType = FILE_TYPES_BY_KEY["movie"].key;
    fileStructure = FILE_STRUCTURES_BY_KEY["movie"].key;
  } else if (fileExtension === "png") {
    fileType = FILE_TYPES_BY_KEY["image"].key;
    fileStructure = FILE_STRUCTURES_BY_KEY["image"].key;
  } else if (fileExtension === "csv" || fileExtension === "tsv") {
    // Do we want to process generic csv and tsv just to extract num col and num rows?
    fileStructure = FILE_STRUCTURES_BY_KEY["table"].key;
  } else if (fileExtension === "nwb") {
    fileType = FILE_TYPES_BY_KEY.nwb_data.key;
    fileStructure = FILE_STRUCTURES_BY_KEY.binary.key;
  } else if (["tif", "tiff"].includes(fileExtension)) {
    fileType = FILE_TYPES_BY_KEY.image.key;
    fileStructure = FILE_STRUCTURES_BY_KEY.image_stack.key;
  } else if (fileExtension === "zip") {
    // file format already set based on file extension
    // not processed for now
  } else if (fileExtension === "parquet") {
    try {
      const buffer = await file.arrayBuffer();
      const parquetMetadataData = parquetMetadata(buffer) as {
        key_value_metadata?: [{ key: string; value: string }];
      };

      // https://pandas.pydata.org/pandas-docs/stable/development/developer.html#column-metadata
      const PANDAS_INT_TYPES = [
        "int8",
        "int16",
        "int32",
        "int64",
        "uint8",
        "uint16",
        "uint32",
        "uint64",
      ] as const;
      const PANDAS_FLOAT_TYPES = ["float16", "float32", "float64"] as const;
      const PANDAS_CATEGORICAL_TYPE = "categorical" as const;

      // pandas encodes metadata in the parquet file to a freeform metadata field called key_value_metadata
      // this supplies information outside of the generic parquet specification when the file is saved from pandas or loaded into pandas
      // https://pandas.pydata.org/pandas-docs/stable/development/developer.html#column-metadata
      const pandasMetadata = JSON.parse(
        parquetMetadataData.key_value_metadata?.find(
          ({ key }) => key === "pandas",
        )?.value ?? JSON.stringify({ columns: [] }),
      ) as {
        columns: [
          {
            name: string;
            pandas_type: string;
          },
        ];
      };

      const frameCol = pandasMetadata.columns.find(
        ({ name }) => name === "frame",
      );
      const timeCol = pandasMetadata.columns.find(
        ({ name }) => name === "time",
      );
      const stateCol = pandasMetadata.columns.find(
        ({ name }) => name === "state",
      );

      assert(
        PANDAS_INT_TYPES.some((intType) => intType === frameCol?.pandas_type),
      );

      assert(
        PANDAS_FLOAT_TYPES.some(
          (floatType) => floatType === timeCol?.pandas_type,
        ),
      );

      assert(stateCol?.pandas_type === PANDAS_CATEGORICAL_TYPE);

      fileType = FILE_TYPES_BY_KEY.experiment_annotations.key;
      fileStructure = FILE_STRUCTURES_BY_KEY.table.key;
    } catch (err) {
      // no error here, file is just left as unknown
    }
  } else if (fileExtension === "h5") {
    let wasmH5File: H5WasmFile | undefined;

    try {
      const buffer = await file.arrayBuffer();

      const { FS } = await h5wasm.ready;

      const WASM_FILENAME = "WASM_FILENAME";
      FS.writeFile(WASM_FILENAME, new Uint8Array(buffer));
      wasmH5File = new H5WasmFile(WASM_FILENAME, "r");

      const keys = wasmH5File.keys();
      assert(keys.some((key) => key === "events"));
      const events = wasmH5File.get("events") as Group | null;
      assert(isNonNull(events));
      const eventKeys = events.keys();
      assert(eventKeys.length > 0);

      for (const eventKey of eventKeys) {
        const eventGroup = events.get(eventKey) as Group | null;
        assert(isNonNull(eventGroup));
        const eventGroupKeys = eventGroup.keys();
        assert(eventGroupKeys.some((key) => key === "Time"));
      }

      fileType = FILE_TYPES_BY_KEY.timestamp_events.key;
      fileStructure = FILE_STRUCTURES_BY_KEY.sparse_time_series.key;
    } catch (err) {
      // no error here
      // this means it's not an events file and will set to unknown
    } finally {
      wasmH5File?.close();
    }
  }

  // define structure to store file information
  const fileInfo = {
    fileType:
      FILE_TYPES_BY_KEY[fileType]?.id ?? FILE_TYPES_BY_KEY["unknown"].id,
    fileFormat: FILE_FORMATS_BY_KEY[fileFormat].id,
    fileStructure,
  };

  return fileInfo;
};
