A standalone full-text search engine written in C#.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Program.cs 7.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.Linq;
  5. using System.Text.RegularExpressions;
  6. using System.Threading.Tasks;
  7. using Newtonsoft.Json;
  8. using SBRL.Utilities;
  9. using LibSearchBox;
  10. namespace SearchBoxCLI
  11. {
  12. enum OperatingModes
  13. {
  14. Query,
  15. Index,
  16. Add,
  17. Remove,
  18. Update,
  19. GenerateContext
  20. }
  21. enum OutputModes
  22. {
  23. Json,
  24. Text,
  25. Html
  26. }
  27. class MainClass {
  28. private static List<string> Extras = new List<string>();
  29. private static OperatingModes Mode = OperatingModes.Query;
  30. private static OutputModes OutputMode = OutputModes.Text;
  31. private static bool Batch = false;
  32. private static string Name = string.Empty;
  33. private static IEnumerable<string> Tags;
  34. private static string SearchIndexFilepath = string.Empty;
  35. private static TextReader Source = Console.In;
  36. private static TextReader SourceOld = null, SourceNew = null;
  37. private static string Query = string.Empty;
  38. private static int ResultsLimit = -1;
  39. private static int ResultsOffset = 0;
  40. public static int Main(string[] args)
  41. {
  42. for (int i = 0; i < args.Length; i++)
  43. {
  44. if (!args[i].StartsWith("-")) {
  45. Extras.Add(args[i]);
  46. continue;
  47. }
  48. switch (args[i].TrimStart("-".ToCharArray())) {
  49. case "s":
  50. case "source":
  51. string sourceFilename = args[++i];
  52. Source = new StreamReader(sourceFilename);
  53. Name = Name.Length > 0 ? Name : sourceFilename;
  54. break;
  55. case "batch":
  56. Batch = true;
  57. break;
  58. case "old-source":
  59. SourceOld = new StreamReader(args[++i]);
  60. break;
  61. case "new-source":
  62. string newSourceFilename = args[++i];
  63. SourceNew = new StreamReader(newSourceFilename);
  64. Name = Name.Length > 0 ? Name : newSourceFilename;
  65. break;
  66. case "tags":
  67. Tags = Regex.Split(args[++i], @",\s*");
  68. break;
  69. case "n":
  70. case "name":
  71. Name = args[++i];
  72. break;
  73. case "index":
  74. SearchIndexFilepath = args[++i];
  75. break;
  76. case "limit":
  77. ResultsLimit = int.Parse(args[++i]);
  78. break;
  79. case "offset":
  80. ResultsOffset = int.Parse(args[++i]);
  81. break;
  82. case "query":
  83. Query = args[++i];
  84. break;
  85. case "format":
  86. OutputMode = (OutputModes)Enum.Parse(typeof(OutputModes), args[++i], true);
  87. break;
  88. case "help":
  89. return HandleHelp();
  90. default:
  91. Console.Error.WriteLine($"Error: Unknown property {args[i]}.");
  92. return 1;
  93. }
  94. }
  95. if (Extras.Count < 1) return HandleHelp();
  96. string modeText = Extras.First().Replace("context", "generatecontext"); Extras.RemoveAt(0);
  97. Mode = (OperatingModes)Enum.Parse(typeof(OperatingModes), modeText, true);
  98. switch (Mode) {
  99. case OperatingModes.Index: return HandleIndex();
  100. case OperatingModes.Add: return HandleAdd();
  101. case OperatingModes.Remove: return HandleRemove();
  102. case OperatingModes.Query: return HandleQuery();
  103. case OperatingModes.GenerateContext: return HandleContextGeneration();
  104. default:
  105. Console.Error.WriteLine($"Error: Don't know how to handle mode {Mode}.");
  106. return 128;
  107. }
  108. }
  109. private static int HandleHelp()
  110. {
  111. Console.WriteLine(EmbeddedFiles.ReadAllText("SearchBoxCLI.EmbeddedFiles.Help.txt"));
  112. return 1;
  113. }
  114. private static int HandleAdd()
  115. {
  116. if (Name == string.Empty && !Batch) {
  117. Console.Error.WriteLine("Error: The document name must be specified when reading from stdin!");
  118. return 1;
  119. }
  120. if (SearchIndexFilepath == string.Empty) {
  121. Console.Error.WriteLine("Error: No search index file path specified.");
  122. return 1;
  123. }
  124. // --------------------------------------
  125. SearchBox searchBox;
  126. if (!File.Exists(SearchIndexFilepath))
  127. searchBox = new SearchBox();
  128. else
  129. searchBox = JsonConvert.DeserializeObject<SearchBox>(File.ReadAllText(SearchIndexFilepath));
  130. if (!Batch)
  131. searchBox.AddDocument(Name, Tags, Source.ReadToEnd());
  132. else {
  133. try
  134. {
  135. Parallel.ForEach(LineIterator.GetLines(Source), (string nextLine) => {
  136. string[] parts = nextLine.Split('|');
  137. if (parts[0].Trim().Length == 0)
  138. return;
  139. searchBox.AddDocument(
  140. parts[1].Trim(),
  141. Regex.Split(parts[2], @",\s*"),
  142. File.ReadAllText(parts[0].Trim())
  143. );
  144. Console.Error.WriteLine($"[Searchbox] [add] {parts[0].Trim()}");
  145. });
  146. } catch (FileNotFoundException error) {
  147. Console.Error.WriteLine(error.Message);
  148. return 1;
  149. }
  150. }
  151. File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
  152. Console.Error.WriteLine($"[Searchbox] [save] {Name} -> {SearchIndexFilepath}");
  153. return 0;
  154. }
  155. private static int HandleRemove()
  156. {
  157. if (string.IsNullOrEmpty(Name)) {
  158. Console.Error.WriteLine("Error: The document name must be specified when removing a document!");
  159. return 1;
  160. }
  161. // --------------------------------------
  162. SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
  163. File.ReadAllText(SearchIndexFilepath)
  164. );
  165. searchBox.RemoveDocument(Name);
  166. File.WriteAllText(SearchIndexFilepath, JsonConvert.SerializeObject(searchBox));
  167. Console.Error.WriteLine($"[Searchbox] [remove] {Name} <- {SearchIndexFilepath}");
  168. return 0;
  169. }
  170. private static int HandleQuery()
  171. {
  172. if (string.IsNullOrEmpty(Query)) {
  173. Console.Error.WriteLine("Error: No query specified!");
  174. return 1;
  175. }
  176. if (SearchIndexFilepath == string.Empty) {
  177. Console.Error.WriteLine("Error: No search index file path specified.");
  178. return 1;
  179. }
  180. // Use the first line of stdin instead of the actual query string if "-" is specified
  181. if (Query == "-") {
  182. Query = Console.ReadLine().Trim();
  183. }
  184. SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
  185. File.ReadAllText(SearchIndexFilepath)
  186. );
  187. IEnumerable<SearchResult> resultsRaw = searchBox.Query(Query, new QuerySettings()).Skip(ResultsOffset);
  188. List<SearchResult> results = new List<SearchResult>(
  189. ResultsLimit > 0 ? resultsRaw.Take(ResultsLimit) : resultsRaw
  190. );
  191. switch (OutputMode)
  192. {
  193. case OutputModes.Json:
  194. Console.WriteLine(JsonConvert.SerializeObject(results));
  195. break;
  196. case OutputModes.Text:
  197. int i = 0;
  198. foreach (SearchResult nextResult in results) {
  199. Console.WriteLine($"#{i}: {nextResult}");
  200. i++;
  201. }
  202. break;
  203. }
  204. return 0;
  205. }
  206. private static int HandleContextGeneration()
  207. {
  208. if (string.IsNullOrEmpty(Name)) {
  209. Console.Error.WriteLine("Error: No document name specified.");
  210. return 1;
  211. }
  212. if (string.IsNullOrEmpty(Query)) {
  213. Console.Error.WriteLine("Error: No query specified.");
  214. return 1;
  215. }
  216. if (SearchIndexFilepath == string.Empty) {
  217. Console.Error.WriteLine("Error: No search index file path specified.");
  218. return 1;
  219. }
  220. SearchBox searchBox = JsonConvert.DeserializeObject<SearchBox>(
  221. File.ReadAllText(SearchIndexFilepath)
  222. );
  223. ContextSettings generationSettings = new ContextSettings();
  224. switch (OutputMode) {
  225. case OutputModes.Json:
  226. Console.Error.WriteLine("Error: JSON output for context generation is not supported.");
  227. return 1;
  228. case OutputModes.Html:
  229. generationSettings.Html = true;
  230. break;
  231. case OutputModes.Text:
  232. generationSettings.Html = false;
  233. break;
  234. }
  235. Console.WriteLine(searchBox.GenerateContext(Name, Source.ReadToEnd(), Query, generationSettings));
  236. return 0;
  237. }
  238. private static int HandleIndex()
  239. {
  240. Index index = new Index(Source.ReadToEnd());
  241. switch (OutputMode)
  242. {
  243. case OutputModes.Json:
  244. Console.WriteLine(JsonConvert.SerializeObject(index));
  245. break;
  246. case OutputModes.Text:
  247. Console.WriteLine(index);
  248. break;
  249. }
  250. return 0;
  251. }
  252. }
  253. }