1
0
Fork 0
mirror of https://github.com/sbrl/Pepperminty-Wiki.git synced 2024-12-23 01:55:01 +00:00
Pepperminty-Wiki/experiments/bktree/bktreetest.php
Starbeamrainbowlabs 593f16dfb9
Commit BkTree & Nilsimsa experiments. It's about time!
....I was gettign increasinly nervous about not committing these to git. 
Hopefully at some point soon I'll be able to integrate the BkTree into 
Pepperminty Wiki properly - but I still need to implement word removal 
first before I can do that.

Also, feature-search is getting big. It's refactoring time to be sure, 
but Im uncertain  at this stage precisely _how_ I want to go about that. 
I've got 2 ideas:

1. Refactor the engine and the storage box into separate "library 
modules"
2. Refactor them into their own repository/ies or something, and include 
them as extra data
3. Extend the  extra data system to support local files and include them 
in the main Pepperminty Wiki repository

Thought is required. If anyone actually reads this message, do get in 
touch with your thoughts!
2020-03-04 01:57:13 +00:00

134 lines
3.3 KiB
PHP

<?php
require("BkTree.php");
function time_callable($callable) {
$start_time = microtime(true);
return [
"value" => $callable(),
"time" => microtime(true) - $start_time
];
}
function tree_create() {
$tree = new BkTree("bktree.sqlite");
echo("Populating tree - ");
$time = microtime(true);
$handle = fopen("enable1.shuf.txt", "r"); $i = 0;
while(($line = fgets($handle)) !== false) {
// if($i > 10) exit();
$line = trim($line);
$tree->add($line);
$i++;
}
echo("done in ".round((microtime(true) - $time) * 1000, 2)."ms\n");
fclose($handle);
return $tree;
}
function tree_save(BkTree $tree) {
echo("Saving tree\n");
$tree->close();
}
function tree_load() {
return new BkTree("bktree.sqlite");
}
function test_search_linear() {
$start_time = microtime(true);
$handle = fopen("enable1.shuf.txt", "r");
while(($line = fgets($handle)) !== false) {
if(levenshtein("cakke", trim($line)) > 2) continue;
echo("linear match: ".trim($line)."\n");
}
echo("done in ".round((microtime(true) - $start_time) * 1000, 2)."ms\n");
exit();
}
if(file_exists("bktree.sqlite"))
$tree = time_callable("tree_load");
else
$tree = time_callable("tree_create");
echo("Tree created in ".($tree["time"]*1000)."ms\n");
$tree = $tree["value"];
echo("Tree stats: ");
var_dump($tree->stats());
function test_auto() {
global $tree;
for($i = 0; $i < 1; $i++) {
$start_time = microtime(true);
$results = $tree->lookup("cakke", 2);
echo("Lookup complete in ".round((microtime(true) - $start_time)*1000, 2)."ms (".count($results)." results found)\n");
}
exit();
}
test_auto();
echo("BkTree Test CLI\n");
echo("Exit with .exit\n");
echo("This ensures the tree is saved to disk\n");
while(true) {
$line = readline("> "); // Newline is removed automatically
if(strlen($line) == 0) continue;
readline_add_history($line);
if($line[0] == ".") {
switch ($line) {
case ".quit":
case ".exit":
$result = time_callable(function() use ($tree) {
tree_save($tree);
});
echo("Serialised tree in ".round($result["time"] * 1000, 2)."ms\n");
exit("exit\n");
break;
}
continue;
}
// var_dump($line);
$time = microtime(true);
$results = $tree->lookup($line, 2); $i = 0;
$time = round((microtime(true) - $time)*1000, 2);
$time_sort = microtime(true);
// Note that adding a cache here doesn't make a significant different to performance
// The overhead of calling a function far outweighs that of calling levenshtein(), apparently
usort($results, function($a, $b) use ($line, $tree) {
return $tree->edit_distance($a, $line) - $tree->edit_distance($b, $line);
});
$time_sort = round((microtime(true) - $time_sort)*1000, 2);
foreach($results as $result) {
echo(
str_pad($i, 5, " ", STR_PAD_LEFT).": ".
str_pad($result, 20).
" dist ".$tree->edit_distance($result, $line).
"\n"
);
$i++;
}
// $start_time_inc = microtime(true);
// $i = 0;
// foreach($tree->lookup($line, 2) as $result) {
// // var_dump($result);
// echo(
// str_pad(
// str_pad("$i: $result", 20)."dist ".levenshtein($result, $line),
// 40
// ).
// "+".round((microtime(true) - $start_time_inc)*1000, 2)."ms\n"
// );
// // readline("(press enter to continue)");
//
// $start_time_inc = microtime(true);
// $i++;
// }
echo("Found $i results in {$time}ms (+{$time_sort}ms sort)\n");
}