我建议不要给每个线程一个块 A(1-100) 和 B(101-200),而是为每个线程分配一个模数。例如,A 将采用所有奇数索引,B 将采用所有偶数索引,结果 A {1,3,5,7,9,...,191,193,195,197,199} 和 B {2,4,6,8,..., 190,192,194,196,198,200}。这可能是对线程进行负载平衡的最快和最简单的方法,因为计算的复杂性会均匀分布。
下一个建议是添加一个布尔值,指示是否可以继续处理。然后,在每次计算开始之前,您检查是否可以继续进行。这样,您可以在不终止线程的情况下停止计算,但每次循环都要进行一次额外比较。
#include <windows.h>
#include <stdio.h>
#include <process.h>
#include <queue>
#include <cmath>
#include <iostream>
using namespace std;
bool run;
priority_queue<int> primes;
CRITICAL_SECTION critical;
struct args
{
int begin;
int end;
}par1, par2;
int e_prosto(int n)
{
for(int i = 2; i*i<(n + 1) ; i++)
if (n & 1 == 0 || n % i == 0) return 0;
return 1;
}
unsigned int __stdcall rabotnik(void* n)
{
struct args *lPar = (args*) n;
for(int i = lPar->begin; i < lPar->end && run; i++)
{
if(e_prosto(i)){
EnterCriticalSection(&critical);
primes.push(i);
LeaveCriticalSection(&critical);
}
}
run = false;
return 0;
}
int main()
{
int foo;
printf(" Tarsene na prosti do: ");
scanf("%d", &foo);
par1.begin=1;
par1.end=foo/2+1;
par2.begin=foo/2+1;
par2.end=foo;
run = true;
HANDLE hvadkaA, hvadkaB;
InitializeCriticalSection(&critical);
SYSTEMTIME st, now, then;
hvadkaA = (HANDLE)_beginthreadex(0, 0, &rabotnik, (void*)&par1, 0, 0);
hvadkaB = (HANDLE)_beginthreadex(0, 0, &rabotnik, (void*)&par2, 0, 0);
::GetSystemTime(&then);
WaitForSingleObject(hvadkaA, INFINITE);
WaitForSingleObject(hvadkaB, INFINITE);
CloseHandle(hvadkaA);
CloseHandle(hvadkaB);
::GetSystemTime(&now);
while(!primes.empty())
{
printf("%d \t", primes.top());
primes.pop();
}
printf("\nGotov za %d milisec", abs(now.wMilliseconds - then.wMilliseconds));
system("pause>nul");
return 0;
}
另一种方法是将您的范围划分为许多块,然后当一个线程完成时给它一个新的块来处理。这样做的好处是不会为计算增加额外的开销,但确实需要更多的代码(因此您正在重用线程并监听任何线程来完成,而不仅仅是一个)。要具有任何值,您可能需要更大的范围,并且您可能需要根据复杂性来改变块大小(块大小 {1-100}、{101-150}、{151-175}、{176-183}、 {184-187},...)。使用您的代码(具有对称块大小)的快速示例:
#include <windows.h>
#include <stdio.h>
#include <process.h>
#include <queue>
#include <cmath>
#include <iostream>
using namespace std;
priority_queue<int> primes;
CRITICAL_SECTION critical;
typedef struct args
{
int begin;
int end;
//Helper method for initalizing struct
void setAll(int inBegin, bool inEnd)
{
}
} *PArgs;
int e_prosto(int n)
{
for(int i = 2; i*i<(n + 1) ; i++)
if (n & 1 == 0 || n % i == 0) return 0;
return 1;
}
static DWORD WINAPI rabotnik(LPVOID lpParam)
{
struct args *lPar = (args*) lpParam;
for(int i = lPar->begin; i < lPar->end; i++)
{
if(e_prosto(i)){
EnterCriticalSection(&critical);
primes.push(i);
LeaveCriticalSection(&critical);
}
}
return 0;
}
int main()
{
const int NUM_THREAD = 2; //Use named constant incase you want to change later.
DWORD returnedThreadID;
DWORD threadID[NUM_THREAD];
HANDLE threadHandle[NUM_THREAD]; //Holds the handels for the threads.
int foo, //Range size.
fooBlockSize, //Number of elements in a block.
nextBlock;
PArgs par[NUM_THREAD];
printf(" Tarsene na prosti do: ");
scanf("%d", &foo); //Get range size from user.
fooBlockSize = foo / (NUM_THREAD * 10); //Set number of elements in a block.
InitializeCriticalSection(&critical);
SYSTEMTIME st, now, then;
for (int i = 0; i < NUM_THREAD; i++)
{
par[i] = (PArgs) HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(PArgs));
// If the allocation fails, the system is out of memory so terminate execution.
if( par[i] == NULL ){ cout<<"par HeapAlloc failed with error# "<<GetLastError()<<endl<<"Will now quit."<<endl; ExitProcess(2);}
}
for(int i = 0; i < NUM_THREAD; i++)
{
par[i]->begin = (fooBlockSize * i) + 1;
par[i]->end = par[i]->begin + fooBlockSize;
threadHandle[i] = CreateThread(NULL, 0, rabotnik, par[i], CREATE_SUSPENDED, &threadID[i]);
}
nextBlock = NUM_THREAD;
::GetSystemTime(&then);
for (int i = 0; i < NUM_THREAD; i++)
{
ResumeThread(threadHandle[i]); //Start threads
}
while( ((fooBlockSize * nextBlock) + 1) < foo)
{
returnedThreadID = WaitForMultipleObjects(NUM_THREAD, threadHandle, false, INFINITE); //Wait for a thread to complete.
for(int i = 0; i<NUM_THREAD;i++)
{
if(returnedThreadID = threadID[i])
{
//Update the thread's arguments with the new block.
par[i]->begin = (fooBlockSize * nextBlock) + 1;
par[i]->end = par[i]->begin + fooBlockSize;
//Restart the thread.
ResumeThread(threadHandle[i]);
nextBlock++;
}
}
}
for (int i = 0; i < NUM_THREAD; i++)
{
//Return heap memorry (good practice, though Windows should return it all when the process terminates).
if (HeapFree(GetProcessHeap(), 0, par[i]) == 0)
{
cout<<"HeapFree failed for par["<<i<<"]"<<endl;
}
//Not sure we need to close the threads, but it was in original version.
CloseHandle(threadHandle[i]);
}
::GetSystemTime(&now);
while(!primes.empty())
{
printf("%d \t", primes.top());
primes.pop();
}
printf("\nGotov za %d milisec", abs(now.wMilliseconds - then.wMilliseconds));
system("pause>nul");
return 0;
}
在增加块数与增加块大小之间需要权衡取舍。增加块的数量意味着只有一个块能够处理(线程 [0] 完成,而线程 [1] 完成时没有任何东西可处理),这意味着将花费更少的时间,但也意味着会有我花了更多时间等待调度程序循环分配一个新块来处理。根据您的问题陈述,我预计要花很长时间才能找到无关紧要的更高级别的素数。
正如其他答案所指出的,不要使用相同的堆栈来存储每个线程找到的素数(工作锁所需的时间会过多)。如果您希望以数字顺序返回素数,我建议重写打印素数的循环,使其同时通过两个堆栈,打印下一个值(按顺序)。比如:
while(!primes1.empty() && !primes2.empty())
{
if(primes1.top() > primes2.top())
{
printf("%d \t", primes1.top());
primes1.pop();
}
else
{
printf("%d \t", primes2.top());
primes2.pop();
}
}
一旦另一个堆栈为空,您将不得不处理一个堆栈中剩余的值(或者可能在每个堆栈的底部放置一个 -1,因此如果任一堆栈为空,那么所有大于 -1 的值都已经打印)。
另一种解决方案是维护一个已排序的素数列表,每次线程返回时都会更新该列表。然后可以将其复制到 par 结构中以更快地检测素数(如果一个数字可以被现有素数整除,则该数字不是素数)。
注意:我没有测试过这些示例,尽管它们应该足够接近,可以让您大致了解。