import { z } from 'zod';
import Papa from 'papaparse';
import { DatasetConfiguration } from '../../../domain/datasets';
import { DatasetParseError, DatasetParserProvider } from '../../../app/datasets';
import { FileParser } from '../../../app/file';
import { isCurrencyType, isDateType, isIntegerType } from '../../../domain/attributes';
import { DatasetParserConfig } from './parserDatasetConfig';

export function createDatasetParserProvider(
    config: DatasetParserConfig = {}
): DatasetParserProvider {
    return {
        create(dataset) {
            const parser = createParser(dataset.configuration);
            return parser;
        },
    };
}

function createZodSchema(item: DatasetConfiguration) {
    return z.object(
        item.schema.reduce((acc, property) => {
            let valueSchema: z.Schema = z.string({
                required_error: 'is required but missing',
            });
            if (isIntegerType(property.type) || isCurrencyType(property.type)) {
                valueSchema = z.number({
                    coerce: true,
                    required_error: 'is required but missing',
                });
            }
            if (isDateType(property.type)) {
                valueSchema = z.date({ coerce: true });
                return {
                    ...acc,
                    [property.key]: z.date({
                        coerce: true,
                        required_error: 'is required but missing',
                    }),
                };
            }
            if (!property.isRequired) {
                valueSchema = valueSchema.optional();
            }
            return { ...acc, [property.key]: valueSchema };
        }, {})
    );
}

function createParser(item: DatasetConfiguration): FileParser<Record<string, unknown>[]> {
    const schema = createZodSchema(item);
    return async (file) => {
        const parsed = await new Promise<unknown[]>((resolve, reject) => {
            Papa.parse(file, {
                header: true,
                skipEmptyLines: 'greedy',
                transform(value, field) {
                    if (value.trim() === '') {
                        return undefined;
                    }
                    return value;
                },
                complete: (results) => {
                    return resolve(results.data);
                },
                error: (error) => {
                    return reject(error);
                },
            });
        });
        const cleaned = parsed.map((rawRow) => {
            const entries = Object.entries(rawRow as object);
            const skipped = entries
                .flatMap(([key, value]) => [
                    // need to filter out empty columns
                    key.trim() === '' ? [] : [key, value],
                ])
                .filter((pair) => pair.length > 0);
            return Object.fromEntries(skipped);
        });
        const mapped = cleaned.map((row) => {
            return item.schema.reduce(
                (acc, property) => ({
                    ...acc,
                    [property.key]: row[property.key],
                }),
                {} as Record<string, unknown>
            );
        });

        const parsedOrError = mapped.map((row, rowIndex) => {
            const parsed = schema.safeParse(row);
            if (!parsed.success) {
                throw new DatasetParseError(row, rowIndex, parsed.error);
            }
            return parsed.data;
        });
        return parsedOrError;
    };
}
