in api/c/bingo-nosql/src/bingo_matcher.cpp [984:1254]
void TopNSimMatcher::_findTopN()
{
QS_DEF(Array<float>, thrs);
QS_DEF(Array<int>, nhits_per_block);
QS_DEF(Array<int>, blocks);
QS_DEF(Array<int>, cells);
thrs.clear();
nhits_per_block.clear();
blocks.clear();
cells.clear();
SimStorage& sim_storage = _index.getSimStorage();
float thr_low_limit = _query_data->getMin();
int hits_limit = _limit;
_initModelDistribution(thrs, nhits_per_block);
blocks.clear_resize(thrs.size());
cells.clear_resize(thrs.size());
_current_results.clear();
bool already_found = false;
bool too_many = false;
float thr_proc = 0.0f;
float cur_thr = 0.0f;
float thr_too_many = 0.0f;
int nhits = 0;
int start_iter = thrs.size();
int cont_count = 0;
int cells_count = 0;
int min_cell = 0;
int max_cell = 0;
int cur_cell = 0;
int i;
int cnt = 0;
for (i = 0; i < thrs.size(); i++)
{
if (too_many)
{
if (thr_proc == 0.0f)
cur_thr = thr_too_many + (1.0f - thr_too_many) / 2;
else
cur_thr = thr_too_many + (thr_proc - thr_too_many) / 2;
}
else
cur_thr = thrs[i];
resetThresholdLimit(cur_thr);
cont_count = containersCount();
blocks[i] = cont_count;
cells_count = cellsCount();
cells[i] = cells_count;
min_cell = minCell();
max_cell = maxCell();
if ((cont_count > 0) && (nhits == 0))
{
cnt = 0;
_current_results.clear();
SimResult* res = 0;
while (BaseSimilarityMatcher::next())
{
cnt++;
res = &_current_results.push();
res->id = _current_id;
res->sim_value = _current_sim_value;
cur_cell = currentCell();
if ((cnt > hits_limit * 2) && ((max_cell - cur_cell) > cells_count / 2))
{
thr_too_many = cur_thr;
too_many = true;
break;
}
else
too_many = false;
}
if (too_many)
continue;
if ((cnt >= hits_limit) || (cur_thr <= thr_low_limit))
{
already_found = true;
break;
}
nhits = (cnt / cont_count);
if ((nhits == 0) && (cnt > 0))
nhits = 1;
thr_proc = cur_thr;
nhits_per_block[i] = nhits;
if (i < thrs.size() - 2)
start_iter = i + 1;
else
{
already_found = true;
break;
}
if (thr_too_many > 0.0)
{
thrs[i] = thr_proc;
thrs[start_iter] = thr_proc - (thr_proc - thr_too_many) / 2;
if (start_iter < thrs.size() - 2)
thrs[start_iter + 1] = thr_too_many;
}
}
else if ((cont_count == 0) && sim_storage.isSmallBase())
{
cnt = 0;
_current_results.clear();
SimResult* res = 0;
while (BaseSimilarityMatcher::next())
{
cnt++;
res = &_current_results.push();
res->id = _current_id;
res->sim_value = _current_sim_value;
if (cnt > hits_limit * 2)
{
thr_too_many = cur_thr;
too_many = true;
break;
}
else
too_many = false;
}
if (too_many)
continue;
if ((cnt >= hits_limit) || (cur_thr <= thr_low_limit))
{
already_found = true;
break;
}
}
else if (cont_count == 0)
continue;
else
{
nhits_per_block[i] = nhits_per_block[i - 1] * 2;
}
}
if (!already_found)
{
bool adjusted = false;
float next_thr = 0.0f;
float prev_thr = 0.0f;
i = start_iter;
while (i < thrs.size())
{
if ((blocks[i] * nhits_per_block[i] < hits_limit) && (i < (thrs.size() - 1)) && (!adjusted))
{
i++;
continue;
}
cur_thr = thrs[i];
if ((blocks[i] * nhits_per_block[i] > 2 * hits_limit) && (!adjusted))
{
next_thr = cur_thr + (thrs[i - 1] - cur_thr) / 2;
prev_thr = cur_thr;
cur_thr = next_thr;
adjusted = true;
}
too_many = false;
resetThresholdLimit(cur_thr);
cont_count = containersCount();
cells_count = cellsCount();
cells[i] = cells_count;
min_cell = minCell();
max_cell = maxCell();
if (cont_count > 0)
{
cnt = 0;
_current_results.clear();
SimResult* res = 0;
while (BaseSimilarityMatcher::next())
{
cnt++;
res = &_current_results.push();
res->id = _current_id;
res->sim_value = _current_sim_value;
cur_cell = currentCell();
if ((cnt > hits_limit * 2) && (max_cell - cur_cell) > cells_count / 2)
{
next_thr = cur_thr + (thr_proc - cur_thr) / 2;
thr_too_many = cur_thr;
thrs[i] = next_thr;
too_many = true;
adjusted = true;
break;
}
}
if (((cnt >= hits_limit) || (cur_thr <= thr_low_limit)) && (!too_many))
break;
}
else
i++;
if (!too_many)
{
thr_proc = cur_thr;
nhits_per_block[i] = cnt / cont_count;
if (i < (thrs.size() - 2))
nhits_per_block[i + 1] = nhits_per_block[i] * 2;
if (thr_too_many > 0.0)
{
next_thr = cur_thr - (cur_thr - thr_too_many) / 2;
thrs[i] = next_thr;
}
else if (i < (thrs.size() - 2))
{
if ((hits_limit - cnt) < (nhits_per_block[i + 1] * (blocks[i + 1] - cont_count)))
{
next_thr = cur_thr - (cur_thr - thrs[i + 1]) / 2;
thrs[i] = next_thr;
adjusted = true;
}
else
{
i++;
next_thr = thrs[i];
}
}
}
cur_thr = next_thr;
}
}
if (_current_results.size() > 0)
{
_current_results.qsort(_cmp_sim_res, 0);
for (i = 0; i < _current_results.size(); i++)
{
_result_ids.push(_current_results[i].id);
_result_sims.push(_current_results[i].sim_value);
if (i == (hits_limit - 1))
break;
}
}
}