Make PeppermintParsedown::extract_page_names more multibyte safe to avoid empty statistics

2025-04-07 23:04:53 +00:00 · 2022-04-24 14:38:21 +01:00 · 2022-04-24 14:38:21 +01:00 · 3f76c64b82
commit 3f76c64b82
parent bf2d797d92
3 changed files with 9 additions and 6 deletions
--- a/Changelog.md
+++ b/Changelog.md
@ -25,6 +25,7 @@ This file holds the changelog for Pepperminty Wiki. This is the master list of t
 - StorageBox: Create SQLite DB if it doesn't exist explicitly with `touch()`, because some systems are weird
 - StorageBox: Fix crash when `index.php` is a symbolic link
 - Fixed erroneous additional entries in complex tables of contents
+ - Make `PeppermintParsedown::extract_page_names` more multibyte safe to avoid empty statistics


 ## v0.23
--- a/modules/feature-stats.php
+++ b/modules/feature-stats.php
@ -397,5 +397,11 @@ function stats_load()
 function stats_save($stats)
 {
 	global $paths;
+	echo("STATS_SAVE BEGIN, CONTENT_DUMP:\n");
+	var_dump($stats);
+	echo("\nCONTENT_JSON:\n");
+	var_dump(json_encode($stats, JSON_PRETTY_PRINT, 10));
+	echo("\nLAST_ERROR: ".json_last_error()."\n");
+	echo("\nSTATS_SAVE END to $paths->statsindex\n");
 	return file_put_contents($paths->statsindex, json_encode($stats, JSON_PRETTY_PRINT) . "\n");
 }
--- a/modules/parser-parsedown.php
+++ b/modules/parser-parsedown.php
@ -262,7 +262,6 @@ register_module([
 					$page_content = file_get_contents($env->storage_prefix . $pagedata->filename);
 					
 					$page_links = PeppermintParsedown::extract_page_names($page_content);
-					
 					foreach($page_links as $linked_page) {
 						// We're only interested in pages that don't exist
 						if(!empty($pageindex->$linked_page)) continue;
@ -1640,17 +1639,14 @@ class PeppermintParsedown extends ParsedownExtra
 	 */
 	public static function extract_page_names($page_text) {
 		global $pageindex;
-		preg_match_all("/\[\[([^\]]+)\]\]/", $page_text, $linked_pages);
+		preg_match_all("/\[\[([^\]]+)\]\]/u", $page_text, $linked_pages);
 		if(count($linked_pages[1]) === 0)
 			return []; // No linked pages here
 		
 		$result = [];
 		foreach($linked_pages[1] as $linked_page) {
 			// Strip everything after the | and the #
-			if(strpos($linked_page, "|") !== false)
-				$linked_page = substr($linked_page, 0, strpos($linked_page, "|"));
-			if(strpos($linked_page, "#") !== false)
-				$linked_page = substr($linked_page, 0, strpos($linked_page, "#"));
+			$linked_page = preg_replace("/[|¦#].*/u", "", $linked_page);
 			if(strlen($linked_page) === 0)
 				continue;
 			// Make sure we try really hard to find this page in the