Finish improvements to pageindex rebuilder

also squash warning from stats engine during the firstrun wizard
This commit is contained in:
Starbeamrainbowlabs 2020-08-08 22:01:12 +01:00
parent 62a3ea9d1e
commit c0fa5b8ae4
Signed by: sbrl
GPG Key ID: 1BE5172E637709C2
4 changed files with 78 additions and 6 deletions

View File

@ -7,6 +7,7 @@ Make sure you have PHP 7.3+ when you update past this point! It isn't the end of
### Added
- [Module Api] Add new `search::invindex_term_getpageids`, and `search::invindex_term_getoffsets`, and `search::index_sort_freq` methods
- [Module Api] Add new `ends_with` and `filepath_to_pagename` core functions
- Added new syntax features to PeppermintParsedown, inspired by ParsedownExtreme (which we couldn't get to work, and it wasn't working before as far as I can tell)
- Checkboxes: `[ ]` and `[x]` after a bullet point or at the start of a line
- Marked / highlighted text: `Some text ==marked text== more text`
@ -42,6 +43,7 @@ Make sure you have PHP 7.3+ when you update past this point! It isn't the end of
- A warning is generated in PHP 7.2 and below = [please upgrade](https://www.php.net/supported-versions.php) to PHP 7.3+! (#200)
- [security] The `Secure` cookie flag is now automatically added when clients use HTTPS to prevent downgrade-based session stealing attacks (control this with the new `cookie_secure` setting)
- Standardised prefixes to (most) `error_log()` calls to aid clarity in multi-wiki environments
- Improved pageindex rebuilder algorithm to search for and import history revisions - this helps when converting data from another wiki format
### Fixed
- Squashed a warning when using the fenced code block syntax
@ -58,6 +60,7 @@ Make sure you have PHP 7.3+ when you update past this point! It isn't the end of
- Fixed an obscure warning when previewing PDFs (#202)
- Ensure that the parent page exists when moving a page to be a child of a non-existent parent (#201)
- Fixed templating (#203)
- Fixed warning from statistics engine during firstrun wizard
## v0.21.1-hotfix1

View File

@ -159,6 +159,30 @@ function path_resolve(string $path, string $basePath = null) {
return implode(DIRECTORY_SEPARATOR, $components);
}
/**
* Converts a filepath to a page name.
* @param string $filepath The filepath to convert.
* @return string The extracted pagename.
*/
function filepath_to_pagename(string $filepath) : string {
global $env;
// Strip the storage prefix, but only if it isn't a dot
if(starts_with($filepath, $env->storage_prefix) && $env->storage_prefix !== ".") {
$filepath = substr($filepath, strlen($env->storage_prefix));
// Strip the forward slash at the beginning
if($filepath[0] == "/" && $env->storage_prefix[-1] !== "/")
$filepath = substr($filepath, 1);
}
if(preg_match("/\.r[0-9]+$/", $filepath) !== false)
$filepath = substr($filepath, 0, strrpos($filepath, ".r"));
if(ends_with($filepath, ".md"))
$filepath = substr($filepath, 0, -3);
return $filepath;
}
/**
* Gets the name of the parent page to the specified page.
* @apiVersion 0.15.0
@ -303,10 +327,22 @@ function hide_email($str)
* of $haystack.
* @return bool Whether $needle can be found at the beginning of $haystack.
*/
function starts_with($haystack, $needle) {
function starts_with(string $haystack, string $needle) : bool {
$length = strlen($needle);
return (substr($haystack, 0, $length) === $needle);
}
/**
* Checks to see if $hackstack ends with $needle.
* The matching bookend to starts_with.
* @package core
* @param string $haystack The haystack to search..
* @param string $needle The needle to look for.
* @return bool
*/
function ends_with(string $haystack, string $needle) : bool {
$length = strlen($needle);
return (substr($haystack, -$length) === $needle);
}
/**
* Case-insensitively finds all occurrences of $needle in $haystack. Handles

View File

@ -103,16 +103,45 @@ if(!file_exists($paths->pageindex))
if(function_exists("history_add_revision")) {
$history_revs = glob_recursive($env->storage_prefix . "*.r*");
// It's very important that we read the history revisions in the right order and that we don't skip any
usort($history_revs, function($a, $b) {
preg_match("/[0-9]+$/", $a, $revid_a);
$revid_a = intval($revid_a[0]);
preg_match("/[0-9]+$/", $b, $revid_b);
$revid_b = intval($revid_b[0]);
return $revid_a - $revid_b;
});
$strlen_storageprefix = strlen($env->storage_prefix);
foreach($history_revs as $filename) {
preg_match("/[0-9]+$/", "Main Page.md.r0", $revid);
preg_match("/[0-9]+$/", $filename, $revid);
error_log("raw revid | ".var_export($revid, true));
if(count($revid) === 0) continue;
$revid = intval($revid[0]);
// TODO: Extract the pagename here (maybe a function is worth implementing if we haven't already?)
$pagename = filepath_to_pagename($filename);
$filepath_stripped = substr($filename, $strlen_storageprefix);
if($revid == 0 && ) {
if(!isset($pageindex->$pagename->history))
$pageindex->$pagename->history = [];
if(isset($pageindex->$pagename->history[$revid]))
continue;
error_log("pagename: $pagename, revid: $revid, pageindex entry: ".var_export($pageindex->$pagename, true));
$newsize = filesize($filename);
$prevsize = 0;
if($revid > 0 && isset($pageindex->$pagename->history[$revid - 1])) {
$prevsize = filesize(end($pageindex->$pagename->history)->filename);
}
$pageindex->$pagename->history[$revid] = (object) [
"type" => "edit",
"rid" => $revid,
"timestamp" => filemtime($filename),
"filename" => $filepath_stripped,
"newsize" => $newsize,
"sizediff" => $newsize - $prevsize,
"editor" => "unknown"
];
}
}

View File

@ -1,7 +1,7 @@
<?php
register_module([
"name" => "Statistics",
"version" => "0.4.1",
"version" => "0.4.2",
"author" => "Starbeamrainbowlabs",
"description" => "An extensible statistics calculation system. Comes with a range of built-in statistics, but can be extended by other modules too.",
"id" => "feature-stats",
@ -302,6 +302,10 @@ function update_statistics($update_all = false, $force = false)
{
global $settings, $env, $paths, $statistic_calculators;
// If the firstrun wizard isn't complete, then there's no point in updating the statistics index
if(isset($settings->firstrun_complete) && $settings->firstrun_complete == false)
return;
$stats_mtime = filemtime($paths->statsindex);
// Clear the existing statistics if we are asked to recalculate them all