#include <Windows.h>
#include "fvec.h"
#include "SimClothEngine.h"
#include "CollisionMesh.h"

// Set the simulation environment
void SimClothEngine::setConnection(SimClothConnection *connection) {
	this->connection=connection;
}

// Prepares the simulation
void SimClothEngine::beginSimulation(void) {
	totalParticles=0;
	for (int i=connection->numDynamicObjects()-1; i>=0; i--) {
		DynamicObject *obj=connection->getDynamicObject(i);
		if (!obj->isDeflector()) totalParticles+=obj->numParts;
		cdetector.addCollisionMesh(obj);
		*(objects.newElement())=obj;
		obj->collisionPriority=i;
	}
	connection->getSimClothEngineParams(params);

	// Init the implicit target
	r=(Vlado::Vector*) _mm_malloc(sizeof(Vlado::Vector)*(totalParticles*2), 16);
	p=(Vlado::Vector*) _mm_malloc(sizeof(Vlado::Vector)*(totalParticles*2), 16);
	r1=(Vlado::Vector*) _mm_malloc(sizeof(Vlado::Vector)*(totalParticles*2), 16);

	integrationSteps=1;
	fastSelfCollide=true;

	substeps=params.substeps;
	incStep=3;
	sum=0;
	numSum=0;
	substepWait=0;
	subdivLevel=0;
	okSteps=4;

	collisionsOnly=false;
	savePos=true;
}

// Make one simulation step
int SimClothEngine::step(float timeStep) {
	int err=0;

	fastSelfCollide=false;
	int first=true;
	// cdetector.update(true, timeStep);
	// cdetector.collide(dt, params.collisionTolerance, CT_VERTEX_FACE | CT_EDGE_EDGE, CT_VERTEX_FACE | CT_EDGE_EDGE, fastSelfCollide);

	TCHAR str[512];
	_stprintf(str, "Substeps=%i\n", substeps);
	OutputDebugStr(str);

	if (params.checkIntersections) {
		for (int i=0; i<objects.count(); i++) {
			DynamicObject &obj=*(objects[i]);
			for (int k=0; k<obj.numParts; k++) {
				Vlado::Particle<DynamicObject> &p=obj.parts[k];
				p.sv=p.v;
				p.sp=p.p;
				p.spp=p.pp;
			}
		}
	}

	while (1) {
		if (params.checkIntersections) savePos=true;

		float inv=1.0f/float(substeps);
		float dt=timeStep*inv;

		connection->setFrameProgressPos(0);
		connection->setFrameProgressRange(0, substeps);

		TCHAR str[512];

		for (int k=0; k<substeps; k++) {
			if (savePos) {
				for (int i=0; i<objects.count(); i++) {
					DynamicObject &obj=*(objects[i]);
					for (int k=0; k<obj.numParts; k++) {
						Vlado::Particle<DynamicObject> &p=obj.parts[k];
						p.pp=p.p;
					}
				}
			}

			err=makeSubstep(dt, k, float(k)*inv, inv);
			OutputDebugString(".");

			// If there are errors, exit
			if (err) return err;
			if (connection->getAbort()) return -2;

			cdetector.update(first, dt);
			first=false;
			cdetector.collide(dt, params.collisionTolerance, CT_VERTEX_FACE | CT_EDGE_EDGE, CT_VERTEX_FACE | CT_EDGE_EDGE, fastSelfCollide);
			connection->setNumCollisions(cdetector.clist->count());

			int correctPos=true;
			cdetector.clist->resetCounter();
			Vlado::Collision<DynamicObject> *c;
			while (c=cdetector.clist->nextElement()) if (c->ipath) {
				float k=c->relPos()*c->normal;
				if (k/c->dist<-1e-3f) {
					correctPos=false;
					break;
				}
			}

			if (!correctPos) savePos=false;
			else savePos=true;

			connection->setFrameProgressPos(k+1);
		}

		if (params.checkIntersections && cdetector.checkIntersection() && subdivLevel<params.adaptiveSubdivs) {
			connection->displayMessage(_T("Reducing time step (intersections found)"));
			OutputDebugString("Reducing time step (intersections found)\n");
			for (int i=0; i<objects.count(); i++) {
				DynamicObject &obj=*(objects[i]);
				for (int k=0; k<obj.numParts; k++) {
					Vlado::Particle<DynamicObject> &p=obj.parts[k];
					p.v=p.sv;
					p.p=p.sp;
					p.pp=p.spp;
				}
			}
			substeps*=2;
			subdivLevel++;
			substepWait=incStep;
		} else {
			if (substepWait>0) {
				connection->displayMessage(_T(""));
				substepWait--;
			} else if (subdivLevel>0) {
				connection->displayMessage(_T("Increasing time step"));
				substeps/=2;
				subdivLevel--;
				substepWait=incStep;
			}
			break;
		}

		OutputDebugString("\n");
	}

/*
		// Detect collisions
		if (params.checkIntersections) {
			cdetector.update(first, dt);
			if (cdetector.checkIntersection() && substeps<(1<<params.adaptiveSubdivs)) {
				OutputDebugString("Reducing time step\n");
				for (i=0; i<objects.count(); i++) {
					DynamicObject &obj=*(objects[i]);
					for (int k=0; k<obj.numParts; k++) {
						Vlado::Particle<DynamicObject> &p=obj.parts[k];
						p.v=p.sv;
						p.p=p.sp;
						p.pp=p.spp;
					}
				}

				substeps*=2;
				subdivLevel++;

				inv=1.0f/float(substeps);
				dt=timeStep*inv;

				substepWait=3;
				if (k>0) k--;
				k*=2;
				continue;
			}
		}

		k++;

		if (substepWait>0) substepWait--;
		else if (subdivLevel>0 && (k&1)==1) {
			OutputDebugString("Increasing time step\n");
			substeps/=2;
			subdivLevel--;
			inv=1.0f/float(substeps);
			dt=timeStep*inv;
			substepWait=3;
			k/=2;
		}

		for (int i=0; i<objects.count(); i++) {
			DynamicObject &obj=*(objects[i]);
			for (int k=0; k<obj.numParts; k++) {
				Vlado::Particle<DynamicObject> &p=obj.parts[k];
				p.sv=p.v;
				p.sp=p.p;
				p.spp=p.pp;
			}
		}
	}*/

	return err;
}

// Cleanes up after simulation
void SimClothEngine::endSimulation(void) {
	cdetector.freeData();
	objects.freeData();
	if (r) _mm_free(r);
	if (p) _mm_free(p);
	if (r1) _mm_free(r1);
	r=r1=p=NULL;
}

int SimClothEngine::makeSubstep(float dt, int first, float substepOffset, float substepSize) {
	int err=0;
	err=implicitEuler(dt, substepOffset, substepSize);
	return err;
/*
	// Save the particle positions
	for (int i=0; i<objects.count(); i++) {
		DynamicObject &obj=*(objects[i]);
		for (int k=0; k<obj.numParts; k++) {
			Vlado::Particle<DynamicObject> &p=obj.parts[k];
			p.sv=p.v;
			p.sp=p.p;
			p.spp=p.pp;
		}
	}

	float s=0.0f;
	float ds=1.0f;
	fastSelfCollide=true;

	int frst=true;
	while (1) {
		if (GetAsyncKeyState(VK_ESCAPE)) break;
		// Make one step
		// cdetector.update(first==0, dt);
		// cdetector.collide(dt, params.collisionTolerance, CT_VERTEX_FACE | CT_EDGE_EDGE, CT_VERTEX_FACE | CT_EDGE_EDGE, fastSelfCollide);
		int err=implicitEuler(dt*ds, substepOffset+substepSize*s, substepSize*ds);
		if (err) return err;

		s+=ds;

		// Check for intersections
		cdetector.update(frst, dt);
		frst=false;
		cdetector.collide(dt, params.collisionTolerance, CT_VERTEX_FACE | CT_EDGE_EDGE, CT_VERTEX_FACE | CT_EDGE_EDGE, fastSelfCollide);

		// Check if there are intersections
		int correctPos=true;
		cdetector.clist->resetCounter();
		Vlado::Collision<DynamicObject> *c;
		while (c=cdetector.clist->nextElement()) {
			if (c->relPos()*c->normal<0.0f) { correctPos=false; break; }
		}

		// Save the particle positions if we are at a correct position
		if (correctPos) {
			for (i=0; i<objects.count(); i++) {
				DynamicObject &obj=*(objects[i]);
				for (int k=0; k<obj.numParts; k++) {
					Vlado::Particle<DynamicObject> &p=obj.parts[k];
					p.pp=p.p;
				}
			}
		}

		if (!params.checkIntersections || !cdetector.checkIntersection()) {
			if (fabsf(1.0f-s)<1e-6f) {
				// All is ok
				fastSelfCollide=true;
				return 0;
			} else {
				// Try to make the remaining step
				ds=1.0f-s;

				for (int i=0; i<objects.count(); i++) {
					DynamicObject &obj=*(objects[i]);
					for (int k=0; k<obj.numParts; k++) {
						Vlado::Particle<DynamicObject> &p=obj.parts[k];
						p.sv=p.v;
						p.sp=p.p;
						p.spp=p.pp;
					}
				}

				continue;
			}
		}

		// Cannot subdivide further, just go on simulating
		if (ds<1.0f/float(1<<params.adaptiveSubdivs)) continue;

		// An intersection has occurred, must increase the substeps
		// First roll back
		s-=ds;
		for (i=0; i<objects.count(); i++) {
			DynamicObject &obj=*(objects[i]);
			for (int k=0; k<obj.numParts; k++) {
				Vlado::Particle<DynamicObject> &p=obj.parts[k];
				p.v=p.sv;
				p.p=p.sp;
				p.pp=p.spp;
			}
		}
		fastSelfCollide=false;
		cdetector.collide(dt, params.collisionTolerance, CT_VERTEX_FACE | CT_EDGE_EDGE, CT_VERTEX_FACE | CT_EDGE_EDGE, fastSelfCollide);

		ds*=0.5f;
	}

	return 0;*/
}

/**************************************
	Integration

	SimCloth uses implicit Euler as the integration method:

	v(t+dt)=v(t)+M^(-1)*f(x+dt)*dt
	x(t+dt)=x(t)+v(x+dt)*dt

	Both the position and the velocity are considered unknowns, in difference from
	other methods like Baraff and Volino, where only the velocity is considered an
	unknown and the position is deduced from that; or Verlet integration where the
	position is considered unknown and the velocity is derived from that. Reliable
	collision response requires us to have control over both the position and velocity
	of the particles - in one time step. Otherwise we stumble across various
	difficulties and artifacts.

***************************************/

enum {
	solve_position,
	solve_velocity,
	solve_positionAndVelocity
} solveFor=solve_positionAndVelocity;

// The number of variables is equal to totalParticles*2 - these the particles'
// position and velocity
int SimClothEngine::getNumVars(void) {
	if (solveFor==solve_positionAndVelocity) return totalParticles*2;
	return totalParticles;
}

void SimClothEngine::reset(float frameTime, float substepSize) {
	for (int i=0; i<objects.count(); i++) {
		DynamicObject &obj=*(objects[i]);
		for (int k=0; k<obj.numParts; k++) {
			Vlado::Particle<DynamicObject> &p=obj.parts[k];
			if (p.invMass>1e-6f) {
				p.np=p.p;
				p.nv=p.v;
				p.constraintVector.makeZero();
			} else {
				p.p=p.np=p.q0+(p.q1-p.q0)*frameTime;
				p.v=p.nv=p.qv0+(p.qv1-p.qv0)*frameTime;
				p.constraintVector.makeZero();
			}
		}
	}
}

// Adds the given vectors to the simulated variables
void SimClothEngine::addChanges(Vlado::Vector *x, Vlado::real scale) {
	int cnt=0;
	for (int i=0; i<objects.count(); i++) {
		DynamicObject &obj=*(objects[i]);
		if (obj.objType!=obj_deflector) {
			for (int k=0; k<obj.numParts; k++) {
				Vlado::Particle<DynamicObject> &p=obj.parts[k];

				p.np+=x[cnt++]*scale;
				p.nv+=x[cnt++]*scale*512.0f;

				// p.np+=x[cnt++]*scale;
				// p.nv=(p.np-p.p)*dt_inv;

				// p.nv+=x[cnt++]*scale*dt_inv;
				// p.np=p.p+p.nv*dt;
			}
		}
	}
}

/**************************************
	Computes the difference between the current variables and the target state
	This is computed from the above formulas as follows:

	v(t+dt)=v(t)+M^(-1)*f(x+dt)*dt translates into
	0=v(t)+M^(-1)*f(x+dt)*dt-v(t+dt)
	0=p.v+(p.invMass*dt)*p.f-p.nv for each particle p

	x(t+dt)=x(t)+v(x+dt)*dt translates into
	0=x(t)+v(x+dt)*dt-x(t+dt)
	0=p.p+nv*dt-p.np for each particle p

	The condition for the velocity is additionally scaled by dt to bring the positions and
	velocities to roughly the same scale, since the velocities are usually much larger numbers
	that the positions, which slows down the implicit solver, since higher precision is required
***************************************/

inline Vlado::Vector cut(Vlado::Vector &a, Vlado::Vector &b, float k) {
	float lenSq=b.lengthSqr();
	if (lenSq<1e-6f) return a;
	return a-b*(a*b)*k/(lenSq);
}

void SimClothEngine::computeChanges(Vlado::Vector *x, int firstCall) {
	if (!collisionsOnly) applyForces(dt, firstCall);
	applyCollisions(dt);

	int cnt=0;
	for (int i=0; i<objects.count(); i++) {
		DynamicObject &obj=*(objects[i]);
		if (obj.objType!=obj_deflector) {
			for (int k=0; k<obj.numParts; k++) {
				Vlado::Particle<DynamicObject> &p=obj.parts[k];

				Vlado::Vector nv=p.v+p.f*(p.invMass*dt);
				Vlado::Vector np=p.p+nv*dt;

				x[cnt++]=cut(np-p.np, p.posImp[1], 0.8f)+p.posImp[0]+p.posImp[1];
				x[cnt++]=(cut(nv-p.nv, p.velImp[1], 0.8f)+p.velImp[0]+p.velImp[1])/512.0f;
				
				// Vlado::Vector np=2.0f*p.p-p.rp+p.f*(p.invMass*dt*dt)+(p.velImp[0]+p.velImp[1])*dt;
				// x[cnt++]=cut(np-p.np, p.posImp[1], 0.8f)+(p.posImp[0]+p.posImp[1]);

				// Vlado::Vector c=(p.velImp[0]+p.velImp[1])+(p.posImp[0]+p.posImp[1])*dt_inv;
				// Vlado::Vector nv=p.v+p.f*(p.invMass*dt);
				// x[cnt++]=(cut(nv-p.nv, c, 0.8f)+c)*dt;
			}
		}
	}
}

// Integration - computes the position and velocity of the particles at the next moment of time
int SimClothEngine::implicitEuler(float timeStep, float substepOffset, float substepSize) {
	dt=timeStep/float(integrationSteps);
	dt_inv=1.0f/this->dt;

	int adaptive=true;

	int err=0;

	int numIter=0;

	for (int k=0; k<integrationSteps;) {
		if (connection->getAbort()) { err=-2; break; }
		// Initialize the "next" state variables and compute the positions
		// and velocties of the fixed particles
		reset(substepOffset+substepSize*float(k+1)/float(integrationSteps), substepSize);

		// Solve for the next state positions and velocities
		int numSteps=(params.useSSE)?
			Vlado::cgSolveVectorTarget(*this, params.solverPrecision, 100, 0.9f, true) : 
			Vlado::cgSolveVectorTarget(*this, params.solverPrecision, 100, 0.9f, false);
		if (numSteps<0) { err=-1; break; }

		if (numSteps<100) numIter+=numSteps;

		if (!adaptive) k++;
		else {
			// Adaptively determine the timestep
			if (numSteps>=40 && integrationSteps<16) {
				integrationSteps*=2;
				dt*=0.5f;
				dt_inv*=2.0f;
				k*=2;
				continue;
			}
			k++;
			if (numSteps>20 && integrationSteps<16) {
				integrationSteps*=2;
				dt*=0.5f;
				dt_inv*=2.0f;
				k*=2;
			} else if (numSteps<4 && integrationSteps>1 && k/2<integrationSteps/2 && (k&1)==0) {
				integrationSteps/=2;
				dt*=2.0f;
				dt_inv*=0.5f;
				k/=2;
			}
		}

		// Set the computed positions and velocities as current
		for (int i=0; i<objects.count(); i++) {
			DynamicObject &obj=*(objects[i]);
			if (obj.objType!=obj_deflector) {
				for (int k=0; k<obj.numParts; k++) {
					Vlado::Particle<DynamicObject> &p=obj.parts[k];
					p.rp=p.p;
					p.p=p.np;
					p.v=p.nv;
				}
			}
		}
	}

	connection->setNumCGIterations(numIter);
	return err;
}

// Compute the forces acting on the particles
void SimClothEngine::applyForces(float dt, int firstCall) {
	for (int i=0; i<objects.count(); i++) {
		DynamicObject &obj=*objects[i];
		if (obj.objType!=obj_deflector) obj.clearForces();
	}

	for (i=0; i<objects.count(); i++) {
		DynamicObject *obj=objects[i];
		if (obj->objType!=obj_deflector) {
			obj->applyForces(dt, firstCall);
			obj->applyGravity(params.gravity);
		}
	}
}

// Apply the collision response impulses to the particles
void SimClothEngine::applyCollisions(float dt) {
	Vlado::Collision<DynamicObject> *c;
	cdetector.clist->resetCounter();
	while (c=cdetector.clist->nextElement()) {
		c->dispatchFrictionForce();
		c->dispatchVel();
		c->dispatchPos();
	}
}