Henrik Kragh
Member
- Joined
- Nov 24, 2020
- Messages
- 8
- Programming Experience
- 10+
I have a big multithreaded system in C#, and I realized that performance was very different between two threads.
Now I have designed two nearly identical threads, where one performs 4-5 times faster (And it scales up linearly if you change the amount of loops they have to run).
And the difference?
One clumsy condition surrounding the actual heavy code of one of them.
It makes no sense to me, and I feel powerless in optimizing going forward, if such a minor detail can have such a huge impact.
This was tested in Unity, and as such it could be that the result is different in other environments.
ThreadA finish time: 2.8 seconds.
ThreadB finish time: 0.6 seconds.
Mind you "ThreadB" is the one that has a condition (Which will evaluate to true instantly on first while iteration).
How can such a stupid addition to the code make the actual payload (The for loops and the number crunching) perform so much faster?
Also, if I change the "delay" variable with a static "0.0" directly in the condition of ThreadB, it performs like ThreadA again.
In other words: One single double, and whether it is a hardcoded value, or references a variable, makes a difference in performance of a factor of 4-5.
Never mind the actual algorithm, which is only there to make the computer crunch some numbers.
I know I am comparing the same data again and again, that is besides the point.
I am no compilation nerd, and I have no way of probing how this differs in actual machine/assembler code.
I just know that the difference is huge, and non sensical to me.
What do I miss?
I discovered this by accident, and in the future I may not have any way of knowing a given thread performs at 20% possible speed, and that one slight change could solve it.
Please.
I need an expert to make this going from pure magic to "Oh, that's why...!
Now I know how to avoid it in the future...".
I know compilation of C# is surrounded by layers of managed stuff, but there must be a logical reason. Right?
Here is some test code with some simple structs to support it. If anyone has the time to check if they get the same results as I, I would be happy.
Now I have designed two nearly identical threads, where one performs 4-5 times faster (And it scales up linearly if you change the amount of loops they have to run).
And the difference?
One clumsy condition surrounding the actual heavy code of one of them.
It makes no sense to me, and I feel powerless in optimizing going forward, if such a minor detail can have such a huge impact.
This was tested in Unity, and as such it could be that the result is different in other environments.
ThreadA finish time: 2.8 seconds.
ThreadB finish time: 0.6 seconds.
Mind you "ThreadB" is the one that has a condition (Which will evaluate to true instantly on first while iteration).
How can such a stupid addition to the code make the actual payload (The for loops and the number crunching) perform so much faster?
Also, if I change the "delay" variable with a static "0.0" directly in the condition of ThreadB, it performs like ThreadA again.
In other words: One single double, and whether it is a hardcoded value, or references a variable, makes a difference in performance of a factor of 4-5.
Never mind the actual algorithm, which is only there to make the computer crunch some numbers.
I know I am comparing the same data again and again, that is besides the point.
I am no compilation nerd, and I have no way of probing how this differs in actual machine/assembler code.
I just know that the difference is huge, and non sensical to me.
What do I miss?
I discovered this by accident, and in the future I may not have any way of knowing a given thread performs at 20% possible speed, and that one slight change could solve it.
Please.
I need an expert to make this going from pure magic to "Oh, that's why...!
Now I know how to avoid it in the future...".
I know compilation of C# is surrounded by layers of managed stuff, but there must be a logical reason. Right?
Here is some test code with some simple structs to support it. If anyone has the time to check if they get the same results as I, I would be happy.
C#:
using System.Threading;
public class ThreadTest
{
Thread threadA;
Thread threadB;
bool runThreadA = false;
bool runThreadB = false;
System.Diagnostics.Stopwatch stopWatch;
double elapsedTimeA = 0;
double elapsedTimeB = 0;
public ThreadTest()
{
stopWatch = new System.Diagnostics.Stopwatch();
StartThreads();
}
public void StartThreads ()
{
stopWatch.Reset();
stopWatch.Start();
threadA = new Thread(ThreadA);
threadB = new Thread(ThreadB);
runThreadA = true;
runThreadB = true;
elapsedTimeA = 0;
elapsedTimeB = 0;
threadA.Start();
threadB.Start();
}
void ThreadA ()
{
while (runThreadA)
{
runThreadA = false;
double preTicks = stopWatch.ElapsedTicks;
Line3Double lineA = new Line3Double(new Vector3DoublePrecision(10, 20, 30), new Vector3DoublePrecision(100, 140, 180));
Line3Double lineB = new Line3Double(new Vector3DoublePrecision(-10, -20, -30), new Vector3DoublePrecision(-100, -140, -180));
int lines = 1000;
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < lines; j++)
{
double aStartX = lineA.startX;
double aStartY = lineA.startY;
double aStartZ = lineA.startZ;
double aEndX = lineA.endX;
double aEndY = lineA.endY;
double aEndZ = lineA.endZ;
double aDirX = lineA.dirX;
double aDirY = lineA.dirY;
double aDirZ = lineA.dirZ;
double aDotSelf = lineA.dotSelf;
for (int k = 0; k < 8; k++)
{
for (int l = 0; l < lines; l++)
{
double wX = aStartX - lineB.startX;
double wY = aStartY - lineB.startY;
double wZ = aStartZ - lineB.startZ;
double b = aDirX * lineB.dirX + aDirY * lineB.dirY + aDirZ * lineB.dirZ;
double d = aDirX * wX + aDirY * wY + aDirZ * wZ;
double e = lineB.dirX * wX + lineB.dirY * wY + lineB.dirZ * wZ;
double D = aDotSelf * lineB.dotSelf - b * b;
double sc, tc;
if (D < 0.0000001)
{
// the lines are almost parallel
sc = 0.0f;
tc = (b > lineB.dotSelf ? d / b : e / lineB.dotSelf);
}
else
{
sc = (b * e - lineB.dotSelf * d) / D;
tc = (aDotSelf * e - b * d) / D;
}
double shortestX = wX + (sc * aDirX) - (tc * lineB.dirX);
double shortestY = wY + (sc * aDirY) - (tc * lineB.dirY);
double shortestZ = wZ + (sc * aDirZ) - (tc * lineB.dirZ);
double distance = shortestX * shortestX + shortestY * shortestY + shortestZ * shortestZ;
}
}
}
}
double postTicks = stopWatch.ElapsedTicks;
double time = ((postTicks - preTicks) / System.Diagnostics.Stopwatch.Frequency) * 1000;
elapsedTimeA = time;
}
}
void ThreadB()
{
long startTicks = stopWatch.ElapsedTicks;
double delay = 0;
while (runThreadB)
{
if ((double)(stopWatch.ElapsedTicks - startTicks) / System.Diagnostics.Stopwatch.Frequency >= delay)
{
runThreadB = false;
double preTicks = stopWatch.ElapsedTicks;
Line3Double lineA = new Line3Double(new Vector3DoublePrecision(10, 20, 30), new Vector3DoublePrecision(100, 140, 180));
Line3Double lineB = new Line3Double(new Vector3DoublePrecision(-10, -20, -30), new Vector3DoublePrecision(-100, -140, -180));
int lines = 1000;
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < lines; j++)
{
double aStartX = lineA.startX;
double aStartY = lineA.startY;
double aStartZ = lineA.startZ;
double aEndX = lineA.endX;
double aEndY = lineA.endY;
double aEndZ = lineA.endZ;
double aDirX = lineA.dirX;
double aDirY = lineA.dirY;
double aDirZ = lineA.dirZ;
double aDotSelf = lineA.dotSelf;
for (int k = 0; k < 8; k++)
{
for (int l = 0; l < lines; l++)
{
double wX = aStartX - lineB.startX;
double wY = aStartY - lineB.startY;
double wZ = aStartZ - lineB.startZ;
double b = aDirX * lineB.dirX + aDirY * lineB.dirY + aDirZ * lineB.dirZ;
double d = aDirX * wX + aDirY * wY + aDirZ * wZ;
double e = lineB.dirX * wX + lineB.dirY * wY + lineB.dirZ * wZ;
double D = aDotSelf * lineB.dotSelf - b * b;
double sc, tc;
if (D < 0.0000001)
{
// the lines are almost parallel
sc = 0.0f;
tc = (b > lineB.dotSelf ? d / b : e / lineB.dotSelf);
}
else
{
sc = (b * e - lineB.dotSelf * d) / D;
tc = (aDotSelf * e - b * d) / D;
}
double shortestX = wX + (sc * aDirX) - (tc * lineB.dirX);
double shortestY = wY + (sc * aDirY) - (tc * lineB.dirY);
double shortestZ = wZ + (sc * aDirZ) - (tc * lineB.dirZ);
double distance = shortestX * shortestX + shortestY * shortestY + shortestZ * shortestZ;
}
}
}
}
double postTicks = stopWatch.ElapsedTicks;
double time = ((postTicks - preTicks) / System.Diagnostics.Stopwatch.Frequency) * 1000;
elapsedTimeB = time;
}
}
}
}
public struct Vector3DoublePrecision
{
public double x;
public double y;
public double z;
public Vector3DoublePrecision(double x, double y, double z)
{
this.x = x;
this.y = y;
this.z = z;
}
}
public struct Line3Double
{
public double startX;
public double startY;
public double startZ;
public double endX;
public double endY;
public double endZ;
public double dirX;
public double dirY;
public double dirZ;
public double dotSelf;
public Line3Double(Vector3DoublePrecision start, Vector3DoublePrecision end)
{
startX = start.x;
startY = start.y;
startZ = start.z;
endX = end.x;
endY = end.y;
endZ = end.z;
dirX = end.x - start.x;
dirY = end.y - start.y;
dirZ = end.z - start.z;
dotSelf = dirX * dirX + dirY * dirY + dirZ * dirZ;
}
}