#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include <regex.h>

static SV * new_coerce (SV * type, CV * coerce) {
	dTHX;
	HV * hash = newHV();
	hv_store(hash, "name", 4, type, 0);
	hv_store(hash, "coerce", 6, (SV*)coerce, 0);
	return sv_bless(newRV_noinc((SV*)hash), gv_stashsv(newSVpv("Basic::Coercion::XS", 19), 0));
}

char *get_caller(void) {
	dTHX;
	char *callr = HvNAME((HV*)CopSTASH(PL_curcop));
	return callr;
}

static char* translate_perl_regex(const char *pattern) {
	size_t len = strlen(pattern);
	const char *src = pattern;
	size_t needed = 0;
	while (*src) {
		if (*src == '\\') {
			src++;
			switch (*src) {
				case 'd':
					needed += 5; // "[0-9]"
					src++;
					break;
				case 'D':
					needed += 6; // "[^0-9]"
					src++;
					break;
				case 'w':
					needed += 11; // "[A-Za-z0-9_]"
					src++;
					break;
				case 'W':
					needed += 12; // "[^A-Za-z0-9_]"
					src++;
					break;
				case 's':
					needed += 9; // "[ \t\r\n\f]"
					src++;
					break;
				case 'S':
					needed += 10; // "[^ \t\r\n\f]"
					src++;
					break;
				case '\\':
					needed += 2; // "\\"
					src++;
					break;
				default:
					needed += 2; // "\X"
					if (*src) src++;
					break;
			}
		} else {
			needed++;
			src++;
		}
	}
	needed++;
	char *buf = (char *)malloc(needed);
	if (!buf) return NULL;
	src = pattern;
	char *dst = buf;
	while (*src) {
		if (*src == '\\') {
			src++;
			switch (*src) {
				case 'd':
					strcpy(dst, "[0-9]");
					dst += 5;
					src++;
					break;
				case 'D':
					strcpy(dst, "[^0-9]");
					dst += 6;
					src++;
					break;
				case 'w':
					strcpy(dst, "[A-Za-z0-9_]");
					dst += 11;
					src++;
					break;
				case 'W':
					strcpy(dst, "[^A-Za-z0-9_]");
					dst += 12;
					src++;
					break;
				case 's':
					strcpy(dst, "[ \t\r\n\f]");
					dst += 9;
					src++;
					break;
				case 'S':
					strcpy(dst, "[^ \t\r\n\f]");
					dst += 10;
					src++;
					break;
				case '\\':
					*dst++ = '\\';
					*dst++ = '\\';
					src++;
					break;
				default:
					*dst++ = '\\';
					if (*src) *dst++ = *src++;
					break;
			}
		} else {
			*dst++ = *src++;
		}
	}
	*dst = '\0';
	return buf;
}

AV* split_by_regex(const char *input, const char *pattern) {
	dTHX;
	regex_t regex;
	regmatch_t pmatch[1];
	int reti;
	size_t input_len = strlen(input);
	size_t start = 0, end = 0;
	AV *result = newAV();

	char *translated_pattern = translate_perl_regex(pattern);
	if (!translated_pattern) {
		return result;
	}

	if (regcomp(&regex, translated_pattern, REG_EXTENDED)) {
		free(translated_pattern);
		return result;
	}

	while (start < input_len) {
		reti = regexec(&regex, input + start, 1, pmatch, 0);
		if (!reti && pmatch[0].rm_so != -1) {
			end = start + pmatch[0].rm_so;
			size_t len = end - start;
			SV *token = newSVpvn(input + start, len);
			av_push(result, token);
			start += pmatch[0].rm_eo;
			if (pmatch[0].rm_so == pmatch[0].rm_eo) {
				start++;
			}
		} else {
			SV *token = newSVpvn(input + start, input_len - start);
			av_push(result, token);
			break;
		}
	}

	regfree(&regex);
	free(translated_pattern);
	return result;
}

static char * get_error_message (SV * self, const char * type) {
	dTHX;
	SV ** sv = hv_fetch((HV*)SvRV(self), "message", 7, 0);
	if (sv) {
		STRLEN retlen;
		char * msg = SvPV(*sv, retlen);
		if (retlen > 0) {
			return msg;
		}
	}
	size_t len = 40 + strlen(type);
	char *buffer = (char *)malloc(len);
	snprintf(buffer, len, "value did not pass coerce constraint \"%s\"", type);
	return buffer;
}

MODULE = Basic::Coercion::XS    PACKAGE = Basic::Coercion::XS
PROTOTYPES: ENABLE
FALLBACK: TRUE

SV *
_StrToArray(...)
	CODE:
		SV * self = CvXSUBANY(cv).any_ptr;
		if (!self || !SvOK(self)) {
			croak("StrToArray type constraint not initialized");
		}

		SV * param = ST(0);

		if (!SvOK(param) || SvROK(param) || SvTYPE(param) != SVt_PV) {
			char * custom_error = get_error_message(self, "StrToArray");
			croak("%s", custom_error);
		}

		STRLEN len;
		const char *input = SvPV(param, len);

		SV **pattern_sv = hv_fetch((HV*)SvRV(self), "by", 2, 0);
		const char *pattern = (pattern_sv && SvOK(*pattern_sv)) ? SvPV_nolen(*pattern_sv) : "\\s+";

		AV *result = split_by_regex(input, pattern);
		
		RETVAL = newRV_noinc((SV*)result);
	OUTPUT:
		RETVAL

SV *
StrToArray(...)
	CODE:
		CV *type = newXS(NULL, NULL, __FILE__);
		CvXSUB(type) = (XSUBADDR_t)(
			XS_Basic__Coercion__XS__StrToArray
		);
		SvREFCNT_inc(type);
		RETVAL = new_coerce(newSVpv("StrToArray", 10), type);
		SvREFCNT_inc(type);	
		CvXSUBANY(type).any_ptr = (void *)RETVAL;
		SvREFCNT_inc(RETVAL);
		HV * self = (HV*)SvRV(RETVAL);
		hv_store(self, "coerce", 6, newRV_noinc((SV*)type), 0);
		if (items % 2 != 0) {
			croak("StrToArray type constraint requires an even number of arguments");
		}
		for (int i = 0; i < items; i += 2) {
			SV * key = ST(i);
			SV * value = ST(i + 1);
			if (!SvOK(key) || SvTYPE(key) != SVt_PV) {
				croak("key must be a string");
			}
			if (!SvOK(value)) {
				croak("value must be defined");
			}
			STRLEN keylen;
			char * keystr = SvPV(key, keylen);
			hv_store(self, keystr, keylen, newSVsv(value), 0);
		}
	OUTPUT:
		RETVAL

CV *
coerce(...)
	OVERLOAD: &{}
	CODE:
		SV * self = ST(0);
		if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV) {
			croak("first argument must be a Basic::Coercion::XS object");
		}
		SV * cb = *hv_fetch((HV*)SvRV(self), "coerce", 6, 0);
		RETVAL = (CV*)SvRV(cb);
	OUTPUT:
		RETVAL

void
import( ...)
	CODE:
		char *pkg = get_caller();
		STRLEN retlen;
		int i = 1;
		for (i = 1; i < items; i++) {
			char * ex = SvPV(ST(i), retlen);
			char name [strlen(pkg) + 2 + retlen];
			sprintf(name, "%s::%s", pkg, ex);
			if (strcmp(ex, "StrToArray") == 0) {
				newXS(name, XS_Basic__Coercion__XS_StrToArray, __FILE__);
			}
		}
