#include "Headers.h"

#include "Utils.h"
#include "SimClothEngine.h"
#include "resource.h"
#include "SimClothModifier.h"
#include "SimClothToMax.h"
#include "ToMax.h"
#include "VertexGroup.h"
#include "tomax.h"

#define FORCES_USE_SSE

#ifdef FORCES_USE_SSE
#	include "fvec.h"
#endif

extern ParamBlockDesc2 sc3_global_pblock;

INT_PTR CALLBACK statsDlgProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam) {
	switch (uMsg) {
		case WM_INITDIALOG: {
			SetWindowLongPtr(hWnd, GWLP_USERDATA, lParam);
			SimClothToMax *connection=(SimClothToMax*) lParam;

			HWND progressBar=GetDlgItem(hWnd, pb_frame);
			SendMessage(progressBar, PBM_SETPOS, 0, 0);
			SendMessage(progressBar, PBM_SETRANGE32, 0, 100);

			LOGFONT logFont;
			memset(&logFont, 0, sizeof(logFont));

			HDC hDC=GetDC(hWnd);
			logFont.lfHeight=-MulDiv(8, GetDeviceCaps(hDC, LOGPIXELSY), 72);
			logFont.lfWeight=FW_BOLD;
			_tcscpy(logFont.lfFaceName, _T("Tahoma"));
			ReleaseDC(hWnd, hDC);

			connection->hFont=CreateFontIndirect(&logFont);
			SendDlgItemMessage(hWnd, st_collisions, WM_SETFONT, (LPARAM) connection->hFont, FALSE);
			SendDlgItemMessage(hWnd, st_lastFrameTime, WM_SETFONT, (LPARAM) connection->hFont, FALSE);
			SendDlgItemMessage(hWnd, st_cgIterations, WM_SETFONT, (LPARAM) connection->hFont, FALSE);
			SendDlgItemMessage(hWnd, st_simulationTime, WM_SETFONT, (LPARAM) connection->hFont, FALSE);
			SendDlgItemMessage(hWnd, st_estimatedTimeLeft, WM_SETFONT, (LPARAM) connection->hFont, FALSE);

			return TRUE;
		}
		case WM_DESTROY: {
			SimClothToMax *connection=(SimClothToMax*) GetWindowLongPtr(hWnd, GWLP_USERDATA);
			DeleteObject(connection->hFont);
			connection->hFont=NULL;
			return TRUE;
		}
		case WM_COMMAND: {
			SimClothToMax *connection=(SimClothToMax*) GetWindowLongPtr(hWnd, GWLP_USERDATA);
			if (LOWORD(wParam)==bn_cancel) connection->abort=true;
			return TRUE;
		}
	}
	return FALSE;
}

void printTime(TCHAR *str, int milliseconds) {
	if (milliseconds<60*1000) _stprintf(str, "%.1fs", float(milliseconds)/1000.0f);
	else if (milliseconds<60*60*1000) {
		int minutes=milliseconds/(60*1000);
		milliseconds-=minutes*(60*1000);
		_stprintf(str, "%im %.1fs", minutes, float(milliseconds)/1000.0f);
	} else {
		int hours=milliseconds/(60*60*1000);
		milliseconds-=hours*(60*60*1000);
		int minutes=milliseconds/(60*1000);
		milliseconds-=minutes*(60*1000);
		_stprintf(str, "%ih %im %.1fs", hours, minutes, float(milliseconds)/1000.0f);
	}
}

int SimClothToMax::simulate(SimClothEngine *scEngine) {
	scEngine->setConnection(this);

	Interface *ip=GetCOREInterface();
  int ticksPerFrame=GetTicksPerFrame();

	// Determine the simulation interval
	int startFrame, endFrame;
	if (sc3_global_pblock.GetInt(sc3_global_useActiveTimeSegment, 0)) {
		Interval animRange=ip->GetAnimRange();
		startFrame=animRange.Start()/ticksPerFrame;
		endFrame=animRange.End()/ticksPerFrame;
	} else {
		startFrame=sc3_global_pblock.GetInt(sc3_global_startFrame, 0);
		endFrame=sc3_global_pblock.GetInt(sc3_global_endFrame, 0);
	}

	// Prepare the objects in the simulation
	for (int i=0; i<simClothObjects.count(); i++) simClothObjects[i]->beginSimulation(startFrame*ticksPerFrame);

	// Prepare the SimCloth engine
	scEngine->beginSimulation();

	// Record the start of the simulation (system ticks)
	int ticksStart=GetTickCount();

	int res=0;

	hWnd=CreateDialogParam(hInstance, MAKEINTRESOURCE(IDD_STATS), GetCOREInterface()->GetMAXHWnd(), &statsDlgProc, (LPARAM) this);

	HWND simProgress=GetDlgItem(hWnd, pb_simulation);
	SendMessage(simProgress, PBM_SETPOS, startFrame, 0);
	SendMessage(simProgress, PBM_SETRANGE32, startFrame, endFrame);
	abort=false;

	// Loop through all the frames in simulation and compute the state of the objects
	float dt=1.0f/float(GetFrameRate());
	TCHAR str[512];
	for (i=startFrame; i<=endFrame; i++) {
		int frameTicks=GetTickCount();

		TimeValue time=i*ticksPerFrame;

		// Update the objects for the current frame
		for (int j=0; j<simClothObjects.count(); j++) simClothObjects[j]->updateSimulation(time+ticksPerFrame);

		// Call the SimCloth engine to compute the object states for the frame
		res=scEngine->step(dt);
		if (res!=0) break;

		// Save the state of the objects
		for (j=0; j<simClothObjects.count(); j++) simClothObjects[j]->saveCache(time);

		// Update the viewports
		ip->SetTime(time);

		// If the Esc key is pressed, abort the simulation
		if (getAbort()) break;

		printTime(str, GetTickCount()-frameTicks);
		SetWindowText(GetDlgItem(hWnd, st_lastFrameTime), str);

		int simTime=GetTickCount()-ticksStart;
		printTime(str, simTime);
		SetWindowText(GetDlgItem(hWnd, st_simulationTime), str);

		int estimatedTimeLeft=(endFrame-i)*simTime/(i-startFrame+1);
		printTime(str, estimatedTimeLeft);
		SetWindowText(GetDlgItem(hWnd, st_estimatedTimeLeft), str);

		SendMessage(simProgress, PBM_SETPOS, i, 0);
		_stprintf(str, "Total simulation (frame %i of %i)", i-startFrame+1, endFrame-startFrame+1);
		SetWindowText(GetDlgItem(hWnd, st_simulation), str);
	}

	// Record the end of the simulation and compute the total simulation time (in system ticks)
	int ticksEnd=GetTickCount();
	DebugPrint("Simulation took %i ticks\n", ticksEnd-ticksStart);

	// Free any data associated with the SimCloth engine
	scEngine->endSimulation();

	// Free any data associated with the objects in the simulation
	for (i=0; i<simClothObjects.count(); i++) simClothObjects[i]->endSimulation();

	DestroyWindow(hWnd);
	hWnd=NULL;

	return res;
}

void SimClothToMax::listNodes(INode *root) {
	for (int i=0; i<root->NumberOfChildren(); i++) {
		INode *node=root->GetChildNode(i);
		listNodes(node);
		if (node->IsNodeHidden()) continue;

		Object *obj=node->GetObjectRef();
		if(!obj) continue;
		while (obj->SuperClassID()==GEN_DERIVOB_CLASS_ID) {
			IDerivedObject *derivObj=(IDerivedObject*) obj;

			for (int i=0; i<derivObj->NumModifiers(); i++) {
				Modifier *mod=derivObj->GetModifier(i);
				if (!mod->IsEnabled()) continue;

				if (mod->ClassID()==SIMCLOTH3_CLASS_ID) {
					ModContext *mc=derivObj->GetModContext(i);
					if (mc->localData==NULL) mc->localData=new SimClothLocalData;

					SimCloth3 *sc3=(SimCloth3*) mod;
					SimClothObject *scObj=NULL;
					switch(sc3->GetParamBlockByID(sc3_general)->GetInt(sc3_general_type)) {
						case 0: scObj=new DeflectorObject(node, sc3, (SimClothLocalData*) mc->localData); break;
						case 1: scObj=new ClothObject(node, sc3, (SimClothLocalData*) mc->localData); break;
						case 2: scObj=new RigidObject(node, sc3, (SimClothLocalData*) mc->localData); break;
					}
					if (scObj) *simClothObjects.newElement()=scObj;
				}
			}
			obj=derivObj->GetObjRef();
		}
	}
}

void SimClothToMax::init(void) {
	// Scan the scene for simcloth objects
	Interface *ip=GetCOREInterface();
	INode *root=ip->GetRootNode();
	listNodes(root);
}

void SimClothToMax::freeData(void) {
	for (int i=0; i<simClothObjects.count(); i++) {
		simClothObjects[i]->freeData();
		delete simClothObjects[i];
	}
	simClothObjects.freeData();
}

void SimClothToMax::getSimClothEngineParams(SimClothEngineParams &params) {
	params.substeps=sc3_global_pblock.GetInt(sc3_global_substeps, 0);
	params.gravity.set(0.0f, 0.0f, -sc3_global_pblock.GetFloat(sc3_global_gravity, 0));
	params.collisionTolerance=sc3_global_pblock.GetFloat(sc3_global_collisionTolerance, 0);
	params.solverPrecision=sc3_global_pblock.GetFloat(sc3_global_solverPrecision, 0);
	params.checkIntersections=sc3_global_pblock.GetInt(sc3_global_checkIntersections, 0);
	params.adaptiveSubdivs=sc3_global_pblock.GetInt(sc3_global_adaptiveSubdivs, 0);
	params.useSSE=sc3_global_pblock.GetInt(sc3_global_useSSE, 0);
}

//******************************************************
// SimClothObject

SimClothObject::SimClothObject(void) {
	node=NULL;
	mod=NULL;
	localData=NULL;
	objVerts=NULL;
	edges=NULL;
	faces=NULL;
	triplets=NULL;
	bends=NULL;
	stretchShears=NULL;
	vertexNormals=NULL;
	edgeNormals=NULL;
	DynamicObject::init();
}

SimClothObject::~SimClothObject(void) {
	freeData();
}

void SimClothObject::init(INode *node, SimCloth3 *mod, SimClothLocalData *localData) {
	this->node=node;
	this->mod=mod;
	this->localData=localData;
	DynamicObject::init();

	stretchType=mod->pblockIntegrity->GetInt(sc3_integrity_stretchType);
	bendEnabled=mod->pblockIntegrity->GetInt(sc3_integrity_bendEnabled);
	bendType=mod->pblockIntegrity->GetInt(sc3_integrity_bendType);

	springsStiffness=mod->pblockIntegrity->GetFloat(sc3_integrity_springsStiffness, 0);
	springsDamping=mod->pblockIntegrity->GetFloat(sc3_integrity_springsDamping, 0);

	stretch_stiffness=mod->pblockIntegrity->GetFloat(sc3_integrity_stretch_stiffness, 0);
	shear_stiffness=mod->pblockIntegrity->GetFloat(sc3_integrity_shear_stiffness, 0);
	stretchShear_damping=mod->pblockIntegrity->GetFloat(sc3_integrity_stretchShear_damping, 0);

	bendAngleStiffness=mod->pblockIntegrity->GetFloat(sc3_integrity_bendAngle_stiffness, 0);
	bendAngleDamping=mod->pblockIntegrity->GetFloat(sc3_integrity_bendAngle_damping, 0);

	bendSpringStiffness=mod->pblockIntegrity->GetFloat(sc3_integrity_bendSpring_stiffness, 0);
	bendSpringDamping=mod->pblockIntegrity->GetFloat(sc3_integrity_bendSpring_damping, 0);

	skipSelectedEdges=mod->pblockIntegrity->GetInt(sc3_integrity_excludeSelectedEdges, 0);

	objType=DynamicObjectType(mod->pblockGeneral->GetInt(sc3_general_type, 0));

	deflector=mod->pblockGeneral->GetInt(sc3_general_type, 0)==0;
	rigid=mod->pblockGeneral->GetInt(sc3_general_type, 0)==2;
	particleMass=mod->pblockGeneral->GetFloat(sc3_general_particleMass, 0);

	airDrag=mod->pblockIntegrity->GetFloat(sc3_integrity_airDrag, 0)*10.0f;

	selfCollideOn=mod->pblockCollisions->GetInt(sc3_collisions_selfCollide, 0)==TRUE;
	is_selfCollide=!deflector && !rigid && selfCollide();
	is_friction=friction=mod->pblockCollisions->GetFloat(sc3_collisions_friction, 0);

	useSSE=sc3_global_pblock.GetInt(sc3_global_useSSE, 0);

	numVerts=0;
	objVerts=NULL;

	numEdges=0;
	edges=NULL;

	numFaces=0;
	faces=NULL;

	numBends=0;
	bends=NULL;

	numStretchShears=0;
	stretchShears=NULL;

	forceFields.clear();
	for (int n=0; n<mod->pblockIntegrity->Count(sc3_integrity_spacewarps); n++) {
		INode *node=mod->pblockIntegrity->GetINode(sc3_integrity_spacewarps, 0, n);
		Object *obj=node->GetObjectRef()->FindBaseObject();
		if (obj->SuperClassID()==WSM_OBJECT_CLASS_ID) {
			WSMObject *wsmObject=(WSMObject*) obj;
			ForceField *ff=wsmObject->GetForceField(node);
			if (ff) *forceFields.newElement()=ff;
		}
	}

	theMesh=NULL;
}

void SimClothObject::freeData(void) {
	DynamicObject::freeData();

	node=NULL;
	mod=NULL;
	localData=NULL;

	springsArray.freeMem();

	if (objVerts) delete[] objVerts;
	objVerts=NULL;
	numVerts=0;

	if (edges) delete[] edges;
	edges=NULL;
	numEdges=0;

	if (faces) delete[] faces;
	faces=NULL;
	numFaces=0;

	if (bends) delete[] bends;
	bends=NULL;
	numBends=0;

	if (triplets) delete[] triplets;
	triplets=NULL;

	if (stretchShears) delete[] stretchShears;
	stretchShears=NULL;
	numStretchShears=0;

	for (int i=0; i<forceFields.count(); i++) {
		forceFields[i]->DeleteThis();
	}
	forceFields.freeData();

	theMesh=NULL;

	if (vertexNormals) delete[] vertexNormals;
	vertexNormals=NULL;
	if (edgeNormals) delete[] edgeNormals;
	edgeNormals=NULL;
}

TriObject *GetTriObjectFromNode(TimeValue t, INode *node, int &deleteIt) {
	deleteIt=FALSE;
	Object *obj=node->EvalWorldState(t).obj;
	if (obj->CanConvertToType(triObjectClassID)) { 
		TriObject *tri = (TriObject *) obj->ConvertToType(t, triObjectClassID);
		if (obj!=tri) deleteIt=TRUE;
		return tri;
	} else return NULL;
}

void SimClothObject::beginSimulation(TimeValue t) {
	localData->cache.freeData();

	node->InvalidateWS();

	mod->dontModify=true;
	mod->NotifyDependents(FOREVER, PART_ALL, REFMSG_CHANGE);
	int deleteIt;
	Matrix3 tm=node->GetObjTMAfterWSM(t);
	TriObject *tri=GetTriObjectFromNode(t, node, deleteIt);
	prepareMesh(tri, tri->GetMesh(), tm);
	if (deleteIt) tri->DeleteMe();
	mod->dontModify=false;

	worldTime=t;
	clearForces();

	mod->beginSimulation(t, this);
}

void SimClothObject::endSimulation(void) {
	mod->endSimulation();
	freeData();
}

void SimClothObject::saveCache(TimeValue t) {
	SimClothKey *key=localData->cache.keys.newElement();
	key->time=t;
	key->numPoints=numVerts;
	key->points=new Point3[numVerts];
	Matrix3 itm=Inverse(tm);
	if (!isRigidBody()) {
		for (int i=0; i<numVerts; i++) {
			if (parts[i].invMass>1e-6f) key->points[i]=itm*toPoint3(parts[i].p);
			else key->points[i]=itm*toPoint3(parts[i].np);
		}
	} else {
			rigidBodyPosTm=Vlado::Transform(Vlado::Matrix(parts[1].np-parts[0].np, parts[2].np-parts[0].np, parts[3].np-parts[0].np), parts[0].np);
			// rigidBodyPosTm.m.makeOrthogonal();
			for (int i=0; i<numVerts; i++) {
				Vlado::Vector p=rigidBodyPosTm*objVerts[i];
				key->points[i]=itm*toPoint3(p);
			}
	}
	mod->NotifyDependents(FOREVER, PART_ALL, REFMSG_CHANGE);
}

// Find the eigenvalues of a symmetric matrix
// Algorithm from Graphics Gems IV
void eigenValues(float ixx, float iyy, float izz, float ixy, float iyz, float izx, double &l0, double &l1, double &l2) {
	double c=ixx*iyy, d=iyz*iyz, e=ixy*ixy, f=izx*izx;
	double p=-ixx-iyy-izz;
	double q=c+(ixx+iyy)*izz-d-e-f;
	double r=(e-c)*izz+d*ixx-2.0*(ixy*iyz*izx)+f*iyy;

	double a=q-p*p/3.0;
	double b=2.0*p*p*p/27.0-p*q/3.0+r;

	double m=2.0*sqrt(-a/3.0);
	double theta=acos(3.0*b/(a*m))/3.0;

	double cs=cos(theta), sn=sin(theta);

	double sqrt3=sqrt(3.0);

	l0=m*cos(theta)-p/3.0;
	l1=m*cos(theta+2.0*pi/3.0)-p/3.0;
	l2=m*cos(theta+4.0*pi/3.0)-p/3.0;

	if (l0<0.0) l0=0.0;
	if (l1<0.0) l1=0.0;
	if (l2<0.0) l2=0.0;
}

Vlado::Vector findSolution(float ixx, float iyy, float izz, float ixy, float iyz, float izx) {
	Vlado::Matrix m;
	m.f[0].set(ixx, ixy, izx);
	m.f[1].set(ixy, iyy, iyz);
	m.f[2].set(izx, iyz, izz);
	Vlado::Matrix im=Vlado::inverse(m);

	// Find the largest element in the inverse matrix
	float maxf=0.0f;
	int row=0, col=0;
	for (int i=0; i<3; i++) {
		for (int j=0; j<3; j++) {
			if (fabs(im.f[i][j])>maxf) { maxf=fabs(im.f[i][j]); row=i; col=j; }
		}
	}

	Vlado::Vector res;
	res[col]=1.0f;

	float a=m.f[(row+1)%3][(col+1)%3];
	float b=m.f[(row+1)%3][(col+2)%3];

	float c=m.f[(row+2)%3][(col+1)%3];
	float d=m.f[(row+2)%3][(col+2)%3];

	float e=-m.f[(row+1)%3][(col)%3];
	float f=-m.f[(row+2)%3][(col)%3];

	float D=a*d-b*c;
	res[(col+1)%3]=(e*d-b*f)/D;
	res[(col+2)%3]=(a*f-e*c)/D;

	return Vlado::normalize(res);
}

Vlado::Vector eigenVector(float ixx, float iyy, float izz, float ixy, float iyz, float izx, double l) {
	return findSolution(ixx-float(l), iyy-float(l), izz-float(l), ixy, iyz, izx);
}

Vlado::Transform computeInertiaTransform(Mesh &mesh) {
	Vlado::Vector gcenter(0.0f, 0.0f, 0.0f);
	float wsum=0.0f;
	for (int i=0; i<mesh.numVerts; i++) {
		Vlado::Vector p=toVector(mesh.verts[i]);
		float mass=1.0f;

		gcenter+=p*mass;
		wsum+=mass;
	}
	gcenter/=wsum;


	float ixx, iyy, izz, ixy, iyz, izx;
	ixx=iyy=izz=ixy=iyz=izx=0.0f;

	Vlado::Box b;
	b.init();

	for (i=0; i<mesh.numVerts; i++) {
		Vlado::Vector p=toVector(mesh.verts[i]);
		float mass=1.0f;

		b+=p;

		Vlado::Vector rp=p-gcenter;
		/*ixx+=mass*(Vlado::sqr(rp.y)+Vlado::sqr(rp.z));
		iyy+=mass*(Vlado::sqr(rp.z)+Vlado::sqr(rp.x));
		izz+=mass*(Vlado::sqr(rp.x)+Vlado::sqr(rp.y));
		ixy-=mass*(rp.x*rp.y);
		iyz-=mass*(rp.y*rp.z);
		izx-=mass*(rp.z*rp.x);*/
		ixx+=mass*rp.x*rp.x;
		iyy+=mass*rp.y*rp.y;
		izz+=mass*rp.z*rp.z;
		ixy+=mass*(rp.x*rp.y);
		iyz+=mass*(rp.y*rp.z);
		izx+=mass*(rp.z*rp.x);
	}
	ixx/=wsum;
	iyy/=wsum;
	izz/=wsum;
	ixy/=wsum;
	iyz/=wsum;
	izx/=wsum;

	// Find the inertia matrix
	Vlado::Matrix I;
	I.f[0].set(ixx, ixy, izx);
	I.f[1].set(ixy, iyy, iyz);
	I.f[2].set(izx, iyz, izz);

	// Find a rotation that will convert the inertia matrix
	// to a diagonal matrix

	// Find the eigenvalues of the inertia matrix
	double l0, l1, l2;
	eigenValues(ixx, iyy, izz, ixy, iyz, izx, l0, l1, l2);

	Vlado::Vector u=eigenVector(ixx, iyy, izz, ixy, iyz, izx, l0);
	Vlado::Vector v=eigenVector(ixx, iyy, izz, ixy, iyz, izx, l1);
	Vlado::Vector w=eigenVector(ixx, iyy, izz, ixy, iyz, izx, l2);

	Vlado::Transform res;

	if (l0<1e-6) l0=1.0f;
	if (l1<1e-6) l1=1.0f;
	if (l2<1e-6) l2=1.0f;

	res.m.set(u*sqrtf(l0), v*sqrtf(l1), w*sqrtf(l2));
	res.offs=gcenter;

	Vlado::Vector wd=b.width();
	int maxd=Vlado::maxComponent(wd);
	float m=wd[maxd];

	res.m.f[0].set(m, 0.0f, 0.0f);
	res.m.f[1].set(m*0.5f, m*sqrtf(3.0f)*0.5f, 0.0f);
	res.m.f[2].set(m*0.5f, m*sqrtf(3.0f)/6.0f, m*sqrtf(6.0f)/3.0f);

	res.offs=b.center()-(res.m.f[0]+res.m.f[1]+res.m.f[2])*0.25f; // b.center()-Vlado::Vector(0.25f, 0.25f, 0.25f);

	return res;
}

inline int SimClothObject::getParticleCollide(int index) {
	return mod->getVertexCollide(index);
}

inline int SimClothObject::getParticleSelfCollide(int index) {
	return mod->getVertexSelfCollide(index);
}

inline int SimClothObject::getParticleCheckIntersections(int index) {
	return mod->getVertexCheckIntersections(index);
}

void SimClothObject::prepareMesh(TriObject *obj, Mesh &mesh, Matrix3 &tm) {
	this->tm=tm;

	float *vweights=mesh.getVSelectionWeights();

	numVerts=mesh.numVerts;
	objVerts=new Vlado::Vector[numVerts];
	vertexNormals=new Vlado::Vector[numVerts];

	if (!isRigidBody()) {
		setNumParts(mesh.numVerts);
	}

	Vlado::Transform objToWorld=toTransform(tm);

	Vlado::Transform inertiaMatrix=computeInertiaTransform(mesh);
	Vlado::Transform inertiaMatrixInverse=Vlado::inverse(inertiaMatrix);

	for (int i=0; i<numVerts; i++) {
		float softSel=1.0f;
		if (vweights) softSel=vweights[i];
		else if (!obj->IsPointSelected(i)) softSel=0.0f;

		objVerts[i]=inertiaMatrixInverse*toVector(mesh.verts[i]);

		if (!isRigidBody()) {
			parts[i].invMass=(1.0f-softSel)/particleMass;
			if (deflector) parts[i].invMass=0.0f;
			parts[i].origInvMass=parts[i].invMass;

			parts[i].rp=parts[i].q0=parts[i].q1=parts[i].q2=parts[i].p=parts[i].pp=parts[i].np=toVector(tm*mesh.verts[i]);
			parts[i].posImp[0].makeZero();
			parts[i].velImp[0].makeZero();
			parts[i].posImp[1].makeZero();
			parts[i].velImp[1].makeZero();
			parts[i].v.makeZero();
			parts[i].nv.makeZero();
			parts[i].qv0.makeZero();
			parts[i].qv1.makeZero();
			parts[i].obj=this;
			if (deflector) { parts[i].collide=true; parts[i].selfCollide=false; }
		}
	}

	if (isRigidBody() && !deflector) {
		setNumParts(4);
		parts[0].rp=parts[0].q0=parts[0].q1=parts[0].q2=parts[0].pp=parts[0].p=parts[0].np=objToWorld*(inertiaMatrix*Vlado::Vector(0.0f, 0.0f, 0.0f));
		parts[1].rp=parts[1].q0=parts[1].q1=parts[1].q2=parts[1].pp=parts[1].p=parts[1].np=objToWorld*(inertiaMatrix*Vlado::Vector(1.0f, 0.0f, 0.0f));
		parts[2].rp=parts[2].q0=parts[2].q1=parts[2].q2=parts[2].pp=parts[2].p=parts[2].np=objToWorld*(inertiaMatrix*Vlado::Vector(0.0f, 1.0f, 0.0f));
		parts[3].rp=parts[3].q0=parts[3].q1=parts[3].q2=parts[3].pp=parts[3].p=parts[3].np=objToWorld*(inertiaMatrix*Vlado::Vector(0.0f, 0.0f, 1.0f));

		for (int i=0; i<numParts; i++) {
			parts[i].v.makeZero();
			parts[i].nv.makeZero();
			parts[i].qv0.makeZero();
			parts[i].qv1.makeZero();
			parts[i].velImp[0].makeZero();
			parts[i].posImp[0].makeZero();
			parts[i].velImp[1].makeZero();
			parts[i].posImp[1].makeZero();
			parts[i].origInvMass=parts[i].invMass=1.0f/particleMass;
			parts[i].obj=this;
			parts[i].collide=true;
			parts[i].selfCollide=false;
		}
	}

	numFaces=mesh.numFaces;
	faces=new Vlado::FaceData[numFaces];
	triplets=new Vlado::ForceTriplet[numFaces];

	Point3 *mverts=mesh.verts;
	for (i=0; i<mesh.numFaces; i++) {
		for (int k=0; k<3; k++) faces[i].v[k]=mesh.faces[i].v[k];

		Vlado::ForceTriplet &tri=triplets[i];

		tri.v0=faces[i].v[0];
		tri.v1=faces[i].v[1];
		tri.v2=faces[i].v[2];

		// Think of some uv coordinates
		Vlado::Vector e0=toVector(mverts[tri.v1]-mverts[tri.v0]);
		Vlado::Vector e1=toVector(mverts[tri.v2]-mverts[tri.v0]);

		Vlado::Vector n=e0^e1;
		Vlado::Vector u=Vlado::Vector(0.0f, 1.0f, 0.0f)^n;
		if (u.lengthSqr()<1e-12f) u=n^Vlado::Vector(1.0f, 0.0f, 0.0f);
		u.makeNormalized();
		Vlado::Vector v=Vlado::normalize(n^u);

		tri.uc[0]=0.0f; tri.vc[0]=0.0f;
		tri.uc[1]=e0*u; tri.vc[1]=e0*v;
		tri.uc[2]=e1*u; tri.vc[2]=e1*v;
	}

	theMesh=&mesh;
	Vlado::EdgeLister<SimClothObject> edgeLister;
	edgeLister.enumEdges(*this);
	edgeNormals=new Vlado::Vector[numEdges];

	buildNormals();

	if (isRigidBody()) {
		if (!deflector) {
			springsArray.setCount(numParts*(numParts-1)/2, useSSE);
			for (int i=0; i<numParts-1; i++) {
				for (int j=i+1; j<numParts; j++) {
					springsArray.addSpring(parts[i], parts[j], springsStiffness*10000.0f*particleMass, springsDamping*10000.0f*particleMass);
				}
			}
			// numSprings=3;
/*/
			numRigidSprings=1;
			rigidSprings=new Vlado::RigidSpringForce<SimClothObject>[numRigidSprings];
			rigidSprings[0].init(*this, 0, 1, 2, 3);
*/
			numStretchShears=0;
			numBends=0;
		}
	} else {
		springsArray.setCount(numEdges*2, useSSE);

		numStretchShears=0;
		stretchShears=new Vlado::StretchShearForce<Vlado::Particle<DynamicObject> >[numFaces];

		numBends=0;
		bends=new Vlado::BendForce<Vlado::Particle<DynamicObject> >[numEdges];

		if (!deflector) {
			for (int i=0; i<numEdges; i++) {
				Vlado::EdgeData &edge=edges[i];

				// Skip invisible edges
				if (skipSelectedEdges) {
					if (mesh.edgeSel[edge.f[0]*3+edge.i[0]]) continue;
					if (edge.numFaces>1) {
						if (mesh.edgeSel[edge.f[1]*3+edge.i[1]]) continue;
					}
				}

				if (stretchType==0) springsArray.addSpring(parts[edge.v[0]], parts[edge.v[1]], springsStiffness*10000.0f*particleMass, springsDamping*10000.0f*particleMass);

				if (edge.numFaces>1 && bendEnabled) {
					int v0=edge.v[0], v1=edge.v[1];
					int tv0=edge.tv[0], tv1=edge.tv[1];
					if (bendType==0) {
						if (bendSpringStiffness>0.0f) {
							springsArray.addSpring(parts[tv0], parts[tv1], bendSpringStiffness*10000.0f*particleMass, bendSpringDamping*10000.0f*particleMass);
						}
					} else {
						if (bendAngleStiffness>0.0f && v0!=v1) {
							bends[numBends++].init(parts[v0], parts[v1], parts[tv0], parts[tv1], triplets[edge.f[0]], triplets[edge.f[1]], bendAngleStiffness*1000.0f*particleMass, bendAngleDamping*1000.0f*particleMass);
						}
					}
				}
			}

			if (stretchType==1) {
				numStretchShears=0;
				for (int i=0; i<numFaces; i++) {
					Vlado::ForceTriplet &tri=triplets[i];
					stretchShears[numStretchShears++].init(parts[tri.v0], parts[tri.v1], parts[tri.v2], tri, stretch_stiffness*1000.0f, shear_stiffness*1000.0f, stretchShear_damping*1000.0f);
				}
			}
		}

		for (int i=0; i<numParts; i++) {
			if (parts[i].invMass<1e-6f) {
				parts[i].q0=parts[i].q1;
				parts[i].q1=toVector(tm*mesh.verts[i]);
			}
		}
		for (i=0; i<numFaces; i++) {
			Vlado::Vector &p0=parts[faces[i].v[0]].np;
			Vlado::Vector &p1=parts[faces[i].v[1]].np;
			Vlado::Vector &p2=parts[faces[i].v[2]].np;

			Vlado::Vector n=(p1-p0)^(p2-p0);

			triplets[i].normalLength=n.length();
			triplets[i].invLength=1.0f/triplets[i].normalLength;
			triplets[i].normal=n*triplets[i].invLength;
		}
	}
}

// A class representing four 3D vectors that can be manipulated as one
BEGIN_VLADO

struct FourVectors {
	__m128 x,y,z;

	FourVectors(void) {}

	FourVectors(Vector &a, Vector &b, Vector &c, Vector &d) {
		x=_mm_set_ps(d.x, c.x, b.x, a.x);
		y=_mm_set_ps(d.y, c.y, b.y, a.y);
		z=_mm_set_ps(d.z, c.z, b.z, a.z);
	}

	FourVectors(Vector *a) {
		x=F32vec4(a[3].x, a[2].x, a[1].x, a[0].x);
		y=F32vec4(a[3].y, a[2].y, a[1].y, a[0].y);
		z=F32vec4(a[3].z, a[2].z, a[1].z, a[0].z);
	}

	// Vector getVector(int i) { return Vector(x[i], y[i], z[i]); }
	// void setVector(int i, Vector &a) { x[i]=a.x; y[i]=a.y; z[i]=a.z; }

	// Set the first vector
	void setVector(Vector &a) {
		x=_mm_move_ss(x, _mm_set_ss(a.x));
		y=_mm_move_ss(y, _mm_set_ss(a.y));
		z=_mm_move_ss(z, _mm_set_ss(a.z));
	}

	// Get the first vector
	/*Vector getVector(void) {
		return Vector(x[0], y[0], z[0]);
	}*/

	void rotateLeft(void) {
		x=_mm_shuffle_ps(x, x, _MM_SHUFFLE(2, 1, 0, 3));
		y=_mm_shuffle_ps(y, y, _MM_SHUFFLE(2, 1, 0, 3));
		z=_mm_shuffle_ps(z, z, _MM_SHUFFLE(2, 1, 0, 3));
	}

	void rotateRight(void) {
		x=_mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 3, 2, 1));
		y=_mm_shuffle_ps(y, y, _MM_SHUFFLE(0, 3, 2, 1));
		z=_mm_shuffle_ps(z, z, _MM_SHUFFLE(0, 3, 2, 1));
	}

	__m128 length(void) {
		return _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z))));
	}

	void operator*=(__m128 &a) {
		x=_mm_mul_ps(x, a);
		y=_mm_mul_ps(y, a);
		z=_mm_mul_ps(z, a);
	}

	void operator-=(Vector &a) {
		x=_mm_sub_ss(x, _mm_set_ss(a.x));
		y=_mm_sub_ss(y, _mm_set_ss(a.y));
		z=_mm_sub_ss(z, _mm_set_ss(a.z));
	}
};

INLINE __m128 operator*(FourVectors &a, FourVectors &b) {
	return _mm_add_ps(_mm_mul_ps(a.x, b.x), _mm_add_ps(_mm_mul_ps(a.y, b.y), _mm_mul_ps(a.z, b.z)));
}

INLINE FourVectors operator+(FourVectors &a, FourVectors &b) {
	FourVectors res;
	res.x=_mm_add_ps(a.x, b.x);
	res.y=_mm_add_ps(a.y, b.y);
	res.z=_mm_add_ps(a.z, b.z);
	return res;
}

END_VLADO

inline void SpringsArray::doSprings4(int index) {
	Vlado::FourVectors d;
	{
		for (int i=0; i<4; i++) {
			Vlado::Vector *a=p1[index+i]->getPosAddr();
			Vlado::Vector *b=p0[index+i]->getPosAddr();

			d.x=_mm_move_ss(d.x, _mm_sub_ss(_mm_load_ss(&a->x), _mm_load_ss(&b->x)));
			d.y=_mm_move_ss(d.y, _mm_sub_ss(_mm_load_ss(&a->y), _mm_load_ss(&b->y)));
			d.z=_mm_move_ss(d.z, _mm_sub_ss(_mm_load_ss(&a->z), _mm_load_ss(&b->z)));

			d.rotateRight();
		}
	}

	F32vec4 dlen=d.length();
	F32vec4 restLen(_mm_load_ps(restLen+index));

	F32vec4 dp;
	{
		F32vec4 scale(_mm_load_ps(scale+index));
		F32vec4 ks(_mm_load_ps(ks+index));
		dp=(scale-dlen/restLen)*ks;
	}

	F32vec4 dv;
	{
		Vlado::FourVectors v;
		F32vec4 kd(_mm_load_ps(kd+index));
		for (int i=0; i<4; i++) {
			Vlado::Vector *a=p1[index+i]->getVelAddr();
			Vlado::Vector *b=p0[index+i]->getVelAddr();

			v.x=_mm_move_ss(v.x, _mm_sub_ss(_mm_load_ss(&a->x), _mm_load_ss(&b->x)));
			v.y=_mm_move_ss(v.y, _mm_sub_ss(_mm_load_ss(&a->y), _mm_load_ss(&b->y)));
			v.z=_mm_move_ss(v.z, _mm_sub_ss(_mm_load_ss(&a->z), _mm_load_ss(&b->z)));

			v.rotateRight();
		}
		dv=((v*d)/(dlen*restLen))*kd;
	}

	d*=__m128((dp-dv)/dlen);

	{
		for (int i=0; i<4; i++) {
			Vlado::Vector *a=p1[index+i]->getForceAddr();
			Vlado::Vector *b=p0[index+i]->getForceAddr();

			_mm_store_ss(&a->x, _mm_add_ss(d.x, _mm_load_ss(&a->x)));
			_mm_store_ss(&a->y, _mm_add_ss(d.y, _mm_load_ss(&a->y)));
			_mm_store_ss(&a->z, _mm_add_ss(d.z, _mm_load_ss(&a->z)));

			_mm_store_ss(&b->x, _mm_sub_ss(_mm_load_ss(&b->x), d.x));
			_mm_store_ss(&b->y, _mm_sub_ss(_mm_load_ss(&b->y), d.y));
			_mm_store_ss(&b->z, _mm_sub_ss(_mm_load_ss(&b->z), d.z));

			d.rotateRight();
		}
	}
}

inline void SpringsArray::doSprings1(int index) {
	Vlado::Vector d, v;
	d=p1[index]->getPos()-p0[index]->getPos();
	v=p1[index]->getVel()-p0[index]->getVel();

	float dlen=d.length();

	float dp=(scale[index]-dlen/restLen[index])*ks[index];
	float dv=((v*d)/(dlen*restLen[index]))*kd[index];

	d*=(dp-dv)/dlen;

	p0[index]->applyForce(-d);
	p1[index]->applyForce(d);
}

void SpringsArray::applyForces(void) {
	if (useSSE) {
		int i;
		for (i=0; i<idx/4; i++) doSprings4(i*4);
		i*=4;
		for (; i<idx; i++) doSprings1(i);
	} else {
		for (int i=0; i<idx; i++) doSprings1(i);
	}
}

void SimClothObject::applyForces(float dt, int firstCall) {
	// Apply spring forces
	springsArray.applyForces();

	// Compute normals for the stretch/shear and the bend forces
	if (numBends>0 || numStretchShears>0) {
		for (int i=0; i<numFaces; i++) {
			Vlado::Vector &p0=parts[faces[i].v[0]].np;
			Vlado::Vector &p1=parts[faces[i].v[1]].np;
			Vlado::Vector &p2=parts[faces[i].v[2]].np;

			Vlado::Vector n=(p1-p0)^(p2-p0);

			triplets[i].normalLength=n.length();
			triplets[i].normal=n/triplets[i].normalLength;
		}
	}

	// Compute stretch/shear forces
	for (int i=0; i<numStretchShears; i++) stretchShears[i].applyForces();

	// Compute bend forces
	for (i=0; i<numBends; i++) bends[i].applyForces();

	// Compute force fields
	if (!rigid && !deflector) {
		for (i=0; i<forceFields.count(); i++) {
			ForceField *ff=forceFields[i];
			for (int k=0; k<numFaces; k++) {
				Vlado::Vector pos=(parts[faces[k].v[0]].np+parts[faces[k].v[1]].np+parts[faces[k].v[2]].np)/3.0f;
				Vlado::Vector vel=(parts[faces[k].v[0]].nv+parts[faces[k].v[1]].nv+parts[faces[k].v[2]].nv)/3.0f;

				Vlado::Vector f=toVector(ff->Force(worldTime, toPoint3(pos), toPoint3(vel), 0))*1e6f;
				f=triplets[k].normal*(triplets[k].normal*f);

				parts[faces[k].v[0]].f+=f;
				parts[faces[k].v[1]].f+=f;
				parts[faces[k].v[2]].f+=f;
			}
		}
	}

	// Compute air drag
	if (!rigid && !deflector && airDrag>1e-6f) {
		for (int k=0; k<numFaces; k++) {
			Vlado::Vector pos=(parts[faces[k].v[0]].np+parts[faces[k].v[1]].np+parts[faces[k].v[2]].np)/3.0f;
			Vlado::Vector vel=(parts[faces[k].v[0]].nv+parts[faces[k].v[1]].nv+parts[faces[k].v[2]].nv)/3.0f;

			Vlado::Vector f=triplets[k].normal*((triplets[k].normal*vel)*(-airDrag));

			parts[faces[k].v[0]].f+=f;
			parts[faces[k].v[1]].f+=f;
			parts[faces[k].v[2]].f+=f;
		}
	}
}

void SimClothObject::updateSimulation(TimeValue t) {
	mod->dontModify=true;
	mod->updateSimulation(t, this);

	int deleteIt;
	TriObject *tri=GetTriObjectFromNode(t, node, deleteIt);
	updateMesh(tri, tri->GetMesh(), node->GetObjTMAfterWSM(t));
	if (deleteIt) tri->DeleteMe();
	mod->dontModify=false;

	updateForceScale();

	worldTime=t;
}

void SimClothObject::updateForceScale(void) {
	for (int j=0; j<springsArray.count(); j++) {
		springsArray.scale[j]=1.0f;

		int numGroups=0;
		float sum=0.0f;

		int v0=springsArray.p0[j]-parts;
		int v1=springsArray.p1[j]-parts;

		for (int i=0; i<mod->groups.Count(); i++) {
			VertexGroup &group=*(mod->groups[i]);
			if (group.numPts==numParts && group.selected[v0] && group.selected[v1]) { sum+=group.scale; numGroups++; }
		}

		if (numGroups>0) springsArray.scale[j]=sum/float(numGroups);
	}

	for (j=0; j<numStretchShears; j++) {
		stretchShears[j].scale=1.0f;

		int numGroups=0;
		float sum=0.0f;

		int v0=stretchShears[j].parts[0]-parts;
		int v1=stretchShears[j].parts[1]-parts;
		int v2=stretchShears[j].parts[2]-parts;

		for (int i=0; i<mod->groups.Count(); i++) {
			VertexGroup &group=*(mod->groups[i]);
			if (group.numPts==numParts && group.selected[v0] && group.selected[v1] && group.selected[v2]) { sum+=group.scale; numGroups++; }
		}

		if (numGroups>0) stretchShears[j].scale=sum/float(numGroups);
	}
}

void SimClothObject::buildNormals(void) {
	// DebugPrint("Building normals...");
	for (int i=0; i<numVerts; i++) vertexNormals[i].makeZero();

	for (i=0; i<numFaces; i++) {
		Vlado::Vector n;
		if (!isRigidBody()) {
			Vlado::Vector &p0=parts[faces[i].v[0]].np;
			Vlado::Vector &p1=parts[faces[i].v[1]].np;
			Vlado::Vector &p2=parts[faces[i].v[2]].np;

			n=(p1-p0)^(p2-p0);

			triplets[i].normalLength=n.length();
			triplets[i].normal=n/triplets[i].normalLength;
		} else {
			Vlado::Vector &p0=vertexPos(faces[i].v[0]);
			Vlado::Vector &p1=vertexPos(faces[i].v[1]);
			Vlado::Vector &p2=vertexPos(faces[i].v[2]);

			n=(p1-p0)^(p2-p0);

			triplets[i].normalLength=n.length();
			triplets[i].normal=n/triplets[i].normalLength;
		}
		for (int j=0; j<3; j++) vertexNormals[faces[i].v[j]]+=n;
	}

	for (i=0; i<numVerts; i++) vertexNormals[i].makeNormalized();

	for (i=0; i<numEdges; i++) {
		Vlado::EdgeData &edge=edges[i];
		if (edge.numFaces<2) edgeNormals[i]=triplets[edge.f[0]].normal;
		else edgeNormals[i]=Vlado::normalize(triplets[edge.f[0]].normal+triplets[edge.f[1]].normal);
	}
	// DebugPrint("done.\n");
}

void SimClothObject::updateMesh(TriObject *obj, Mesh &mesh, Matrix3 &tm) {
	// Update the transformation matrix
	this->tm=tm;

	// Update the position of attached particles
	for (int i=0; i<numParts; i++) {
		parts[i].q0=parts[i].q1;
		parts[i].q1=parts[i].q2;
		parts[i].qv0=parts[i].qv1;

		if (objType==obj_cloth) {
			Vlado::Vector p;
			if (mod->getAttachPos(i, p)) {
				parts[i].q2=p;
				parts[i].qv1=(parts[i].q2-parts[i].q1)*float(GetFrameRate());
				parts[i].invMass=0.0f;
			} else {
				parts[i].invMass=parts[i].origInvMass;
				parts[i].q2=parts[i].np;
			}
		}
		if (parts[i].origInvMass<1e-6f) {
			parts[i].q2=toVector(tm*mesh.verts[i]);
			parts[i].qv1=(parts[i].q2-parts[i].q1)*float(GetFrameRate());
		} 
	}

	// Update the collision and self-collision flags
	if (objType==obj_cloth) {
		for (int i=0; i<numParts; i++) {
			parts[i].collide=getParticleCollide(i);
			parts[i].selfCollide=getParticleSelfCollide(i);
			parts[i].checkIntersections=getParticleCheckIntersections(i);
		}
	}

	buildNormals();
}