From 639c896a169aef0001c517b2508f5a71333e26fb Mon Sep 17 00:00:00 2001 From: matthew Date: Thu, 4 Jul 2019 22:28:39 -0500 Subject: [PATCH] Added join_path to cleanly join paths. Added sanitize_path to normalize and deduplicate directory separators. Changed warehouse_path to be able to scale up the number of octect pairs as much as desired. --- core/tests/util.test.php | 139 +++++++++++++++++++++++++++++++++++++++ core/util.php | 84 +++++++++++++++++++---- 2 files changed, 210 insertions(+), 13 deletions(-) create mode 100644 core/tests/util.test.php diff --git a/core/tests/util.test.php b/core/tests/util.test.php new file mode 100644 index 00000000..9087dde5 --- /dev/null +++ b/core/tests/util.test.php @@ -0,0 +1,139 @@ +assertEquals( + "one", + sanitize_path("one") + ); + + $this->assertEquals( + "one".DIRECTORY_SEPARATOR."two", + sanitize_path("one\\two") + ); + + $this->assertEquals( + "one".DIRECTORY_SEPARATOR."two", + sanitize_path("one/two") + ); + + $this->assertEquals( + "one".DIRECTORY_SEPARATOR."two", + sanitize_path("one\\\\two") + ); + + $this->assertEquals( + "one".DIRECTORY_SEPARATOR."two", + sanitize_path("one//two") + ); + + $this->assertEquals( + "one".DIRECTORY_SEPARATOR."two", + sanitize_path("one\\\\\\two") + ); + + $this->assertEquals( + "one".DIRECTORY_SEPARATOR."two", + sanitize_path("one///two") + ); + + $this->assertEquals( + DIRECTORY_SEPARATOR."one".DIRECTORY_SEPARATOR."two".DIRECTORY_SEPARATOR, + sanitize_path("\\/one/\\/\\/two\\/") + ); + + } + + public function test_join_path() + { + $this->assertEquals( + "one", + join_path("one") + ); + + $this->assertEquals( + "one".DIRECTORY_SEPARATOR."two", + join_path("one","two") + ); + + $this->assertEquals( + "one".DIRECTORY_SEPARATOR."two".DIRECTORY_SEPARATOR."three", + join_path("one","two","three") + ); + + $this->assertEquals( + "one".DIRECTORY_SEPARATOR."two".DIRECTORY_SEPARATOR."three", + join_path("one/two","three") + ); + + $this->assertEquals( + DIRECTORY_SEPARATOR."one".DIRECTORY_SEPARATOR."two".DIRECTORY_SEPARATOR."three".DIRECTORY_SEPARATOR, + join_path("\\/////\\\\one/\///"."\\//two\/\\//\\//","//\/\\\/three/\\/\/") + ); + } + + public function test_warehouse_path() + { + $hash = "7ac19c10d6859415"; + + $this->assertEquals( + join_path(DATA_DIR,"base",$hash), + warehouse_path("base",$hash,false, 0) + ); + + $this->assertEquals( + join_path(DATA_DIR,"base","7a",$hash), + warehouse_path("base",$hash,false, 1) + ); + + $this->assertEquals( + join_path(DATA_DIR,"base","7a","c1",$hash), + warehouse_path("base",$hash,false, 2) + ); + + $this->assertEquals( + join_path(DATA_DIR,"base","7a","c1","9c",$hash), + warehouse_path("base",$hash,false, 3) + ); + + $this->assertEquals( + join_path(DATA_DIR,"base","7a","c1","9c","10",$hash), + warehouse_path("base",$hash,false, 4) + ); + + $this->assertEquals( + join_path(DATA_DIR,"base","7a","c1","9c","10","d6",$hash), + warehouse_path("base",$hash,false, 5) + ); + + $this->assertEquals( + join_path(DATA_DIR,"base","7a","c1","9c","10","d6","85",$hash), + warehouse_path("base",$hash,false, 6) + ); + + $this->assertEquals( + join_path(DATA_DIR,"base","7a","c1","9c","10","d6","85","94",$hash), + warehouse_path("base",$hash,false, 7) + ); + + $this->assertEquals( + join_path(DATA_DIR,"base","7a","c1","9c","10","d6","85","94","15",$hash), + warehouse_path("base",$hash,false, 8) + ); + + $this->assertEquals( + join_path(DATA_DIR,"base","7a","c1","9c","10","d6","85","94","15",$hash), + warehouse_path("base",$hash,false, 9) + ); + + $this->assertEquals( + join_path(DATA_DIR,"base","7a","c1","9c","10","d6","85","94","15",$hash), + warehouse_path("base",$hash,false, 10) + ); + + } +} diff --git a/core/util.php b/core/util.php index b16dbb8e..ea74a7db 100644 --- a/core/util.php +++ b/core/util.php @@ -5,6 +5,9 @@ require_once "vendor/shish/libcontext-php/context.php"; * Misc * \* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +const DATA_DIR = "data"; + + function mtimefile(string $file): string { $data_href = get_base_href(); @@ -159,28 +162,40 @@ function format_text(string $string): string return $tfe->formatted; } -function warehouse_path(string $base, string $hash, bool $create=true): string +/** + * Generates the path to a file under the data folder based on the file's hash. + * This process creates subfolders based on octet pairs from the file's hash. + * The calculated folder follows this pattern data/$base/octet_pairs/$hash + * @param string $base + * @param string $hash + * @param bool $create + * @param int $splits The number of octet pairs to split the hash into. Caps out at strlen($hash)/2. + * @return string + */ +function warehouse_path(string $base, string $hash, bool $create=true, int $splits = WH_SPLITS): string { - $ab = substr($hash, 0, 2); - $cd = substr($hash, 2, 2); - - $pa = Image::DATA_DIR.'/'.$base.'/'; - - if (WH_SPLITS == 2) { - $pa .= $ab.'/'.$cd.'/'.$hash; - } else { - $pa .= $ab.'/'.$hash; + $dirs =[DATA_DIR, $base]; + $splits = min($splits, strlen($hash) / 2); + for($i = 0; $i < $splits; $i++) { + $dirs[] = substr($hash, $i * 2, 2); } + $dirs[] = $hash; + + $pa = join_path(...$dirs); + if ($create && !file_exists(dirname($pa))) { mkdir(dirname($pa), 0755, true); } return $pa; } -function data_path(string $filename): string +/** + * Determines the path to the specified file in the data folder. + */ +function data_path(string $filename, bool $create = true): string { - $filename = "data/" . $filename; - if (!file_exists(dirname($filename))) { + $filename = join_path("data", $filename); + if ($create&&!file_exists(dirname($filename))) { mkdir(dirname($filename), 0755, true); } return $filename; @@ -325,6 +340,49 @@ function path_to_tags(string $path): string return implode(" ", $tags); } +/** + * Translates all possible directory separators to the appropriate one for the current system, + * and removes any duplicate separators. + */ +function sanitize_path(string $path): string +{ + return preg_replace('|[\\\\/]+|S',DIRECTORY_SEPARATOR,$path); +} + +/** + * Combines all path segments specified, ensuring no duplicate separators occur, + * as well as converting all possible separators to the one appropriate for the current system. + */ +function join_path(string ...$paths): string +{ + $output = ""; + foreach ($paths as $path) { + if(empty($path)) { + continue; + } + $path = sanitize_path($path); + if(empty($output)) { + $output = $path; + } else { + $output = rtrim($output, DIRECTORY_SEPARATOR); + $path = ltrim($path, DIRECTORY_SEPARATOR); + $output .= DIRECTORY_SEPARATOR . $path; + } + } + return $output; +} + +function join_url(string $base, string ...$paths) +{ + $output = $base; + foreach ($paths as $path) { + $output = rtrim($output,"/"); + $path = ltrim($path, "/"); + $output .= "/".$path; + } + return $output; +} + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\ * Debugging functions *