inilike.common source code

1 /**
2  * Common functions for dealing with entries in ini-like file.
3  * Authors: 
4  *  $(LINK2 https://github.com/MyLittleRobo, Roman Chistokhodov)
5  * Copyright:
6  *  Roman Chistokhodov, 2015-2016
7  * License: 
8  *  $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
9  * See_Also: 
10  *  $(LINK2 http://standards.freedesktop.org/desktop-entry-spec/latest/index.html, Desktop Entry Specification)
11  */
12 
13 module inilike.common;
14 
15 package {
16     import std.algorithm;
17     import std.range;
18     import std.string;
19     import std.traits;
20     import std.typecons;
21     import std.conv : to;
22     
23     static if( __VERSION__ < 2066 ) enum nogc = 1;
24     
25     auto keyValueTuple(String)(String key, String value)
26     {
27         alias KeyValueTuple = Tuple!(String, "key", String, "value");
28         return KeyValueTuple(key, value);
29     }
30 }
31 
32 private @nogc @safe auto simpleStripLeft(inout(char)[] s) pure nothrow
33 {
34     size_t spaceNum = 0;
35     while(spaceNum < s.length) {
36         const char c = s[spaceNum];
37         if (c == ' ' || c == '\t') {
38             spaceNum++;
39         } else {
40             break;
41         }
42     }
43     return s[spaceNum..$];
44 }
45 
46 private @nogc @safe auto simpleStripRight(inout(char)[] s) pure nothrow
47 {
48     size_t spaceNum = 0;
49     while(spaceNum < s.length) {
50         const char c = s[$-1-spaceNum];
51         if (c == ' ' || c == '\t') {
52             spaceNum++;
53         } else {
54             break;
55         }
56     }
57     
58     return s[0..$-spaceNum];
59 }
60 
61 
62 /**
63  * Test whether the string s represents a comment.
64  */
65 @nogc @safe bool isComment(const(char)[] s) pure nothrow
66 {
67     s = s.simpleStripLeft;
68     return !s.empty && s[0] == '#';
69 }
70 
71 ///
72 unittest
73 {
74     assert( isComment("# Comment"));
75     assert( isComment("   # Comment"));
76     assert(!isComment("Not comment"));
77     assert(!isComment(""));
78 }
79 
80 /**
81  * Test whether the string s represents a group header.
82  * Note: "[]" is not considered as valid group header.
83  */
84 @nogc @safe bool isGroupHeader(const(char)[] s) pure nothrow
85 {
86     s = s.simpleStripRight;
87     return s.length > 2 && s[0] == '[' && s[$-1] == ']';
88 }
89 
90 ///
91 unittest
92 {
93     assert( isGroupHeader("[Group]"));
94     assert( isGroupHeader("[Group]    "));
95     assert(!isGroupHeader("[]"));
96     assert(!isGroupHeader("[Group"));
97     assert(!isGroupHeader("Group]"));
98 }
99 
100 /**
101  * Retrieve group name from header entry.
102  * Returns: group name or empty string if the entry is not group header.
103  */
104 
105 @nogc @safe auto parseGroupHeader(inout(char)[] s) pure nothrow
106 {
107     s = s.simpleStripRight;
108     if (isGroupHeader(s)) {
109         return s[1..$-1];
110     } else {
111         return null;
112     }
113 }
114 
115 ///
116 unittest
117 {
118     assert(parseGroupHeader("[Group name]") == "Group name");
119     assert(parseGroupHeader("NotGroupName") == string.init);
120     
121     assert(parseGroupHeader("[Group name]".dup) == "Group name".dup);
122 }
123 
124 /**
125  * Parse entry of kind Key=Value into pair of Key and Value.
126  * Returns: tuple of key and value strings or tuple of empty strings if it's is not a key-value entry.
127  * Note: this function does not check whether parsed key is valid key.
128  */
129 @nogc @trusted auto parseKeyValue(String)(String s) pure nothrow if (isSomeString!String && is(ElementEncodingType!String : char))
130 {
131     auto t = s.findSplit("=");
132     auto key = t[0];
133     auto value = t[2];
134     
135     if (key.length && t[1].length) {
136         return keyValueTuple(key, value);
137     }
138     return keyValueTuple(String.init, String.init);
139 }
140 
141 ///
142 unittest
143 {
144     assert(parseKeyValue("Key=Value") == tuple("Key", "Value"));
145     assert(parseKeyValue("Key=") == tuple("Key", string.init));
146     assert(parseKeyValue("=Value") == tuple(string.init, string.init));
147     assert(parseKeyValue("NotKeyValue") == tuple(string.init, string.init));
148     
149     assert(parseKeyValue("Key=Value".dup) == tuple("Key".dup, "Value".dup));
150 }
151 
152 /**
153 * Test whether the string is valid key in terms of Desktop File Specification. Not actually used in inilike.file.IniLikeFile, but can be used in derivatives.
154 * Only the characters A-Za-z0-9- may be used in key names. See $(LINK2 http://standards.freedesktop.org/desktop-entry-spec/latest/ar01s02.html, Basic format of the file)
155 * Note: this function automatically separate key from locale. It does not check validity of the locale itself.
156 */
157 @nogc @safe bool isValidKey(String)(String key) pure nothrow if (isSomeString!String && is(ElementEncodingType!String : char)) {
158     key = separateFromLocale(key)[0];
159     
160     @nogc @safe static bool isValidKeyChar(ElementType!String c) pure nothrow {
161         return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '-';
162     }
163     
164     if (key.empty) {
165         return false;
166     }
167     for (size_t i = 0; i<key.length; ++i) {
168         if (!isValidKeyChar(key[i])) {
169             return false;
170         }
171     }
172     return true;
173 }
174 
175 ///
176 unittest
177 {
178     assert(isValidKey("Generic-Name"));
179     assert(isValidKey("Generic-Name[ru_RU]"));
180     assert(!isValidKey("Name$"));
181     assert(!isValidKey(""));
182     assert(!isValidKey("[ru_RU]"));
183 }
184 
185 /**
186  * Test whether the entry value represents true
187  */
188 @nogc @safe bool isTrue(const(char)[] value) pure nothrow {
189     return (value == "true" || value == "1");
190 }
191 
192 ///
193 unittest 
194 {
195     assert(isTrue("true"));
196     assert(isTrue("1"));
197     assert(!isTrue("not boolean"));
198 }
199 
200 /**
201  * Test whether the entry value represents false
202  */
203 @nogc @safe bool isFalse(const(char)[] value) pure nothrow {
204     return (value == "false" || value == "0");
205 }
206 
207 ///
208 unittest 
209 {
210     assert(isFalse("false"));
211     assert(isFalse("0"));
212     assert(!isFalse("not boolean"));
213 }
214 
215 /**
216  * Check if the entry value can be interpreted as boolean value.
217  * See_Also: isTrue, isFalse
218  */
219 @nogc @safe bool isBoolean(const(char)[] value) pure nothrow {
220     return isTrue(value) || isFalse(value);
221 }
222 
223 ///
224 unittest 
225 {
226     assert(isBoolean("true"));
227     assert(isBoolean("1"));
228     assert(isBoolean("false"));
229     assert(isBoolean("0"));
230     assert(!isBoolean("not boolean"));
231 }
232 
233 /**
234  * Convert bool to string. Can be used to set boolean values.
235  */
236 @nogc @safe string boolToString(bool b) nothrow pure {
237     return b ? "true" : "false";
238 }
239 
240 ///
241 unittest
242 {
243     assert(boolToString(false) == "false");
244     assert(boolToString(true) == "true");
245 }
246 
247 /**
248  * Make locale name based on language, country, encoding and modifier.
249  * Returns: locale name in form lang_COUNTRY.ENCODING@MODIFIER
250  * See_Also: parseLocaleName
251  */
252 @safe String makeLocaleName(String)(
253     String lang, String country = null, 
254     String encoding = null, 
255     String modifier = null) pure
256 if (isSomeString!String && is(ElementEncodingType!String : char))
257 {
258     return lang ~ (country.length ? "_".to!String~country : String.init)
259                 ~ (encoding.length ? ".".to!String~encoding : String.init)
260                 ~ (modifier.length ? "@".to!String~modifier : String.init);
261 }
262 
263 ///
264 unittest
265 {
266     assert(makeLocaleName("ru", "RU") == "ru_RU");
267     assert(makeLocaleName("ru", "RU", "UTF-8") == "ru_RU.UTF-8");
268     assert(makeLocaleName("ru", "RU", "UTF-8", "mod") == "ru_RU.UTF-8@mod");
269     assert(makeLocaleName("ru", string.init, string.init, "mod") == "ru@mod");
270     
271     assert(makeLocaleName("ru".dup, (char[]).init, (char[]).init, "mod".dup) == "ru@mod".dup);
272 }
273 
274 /**
275  * Parse locale name into the tuple of 4 values corresponding to language, country, encoding and modifier
276  * Returns: Tuple!(string, "lang", string, "country", string, "encoding", string, "modifier")
277  * See_Also: makeLocaleName
278  */
279 @nogc @trusted auto parseLocaleName(String)(String locale) pure nothrow if (isSomeString!String && is(ElementEncodingType!String : char))
280 {
281     auto modifiderSplit = findSplit(locale, "@");
282     auto modifier = modifiderSplit[2];
283     
284     auto encodongSplit = findSplit(modifiderSplit[0], ".");
285     auto encoding = encodongSplit[2];
286     
287     auto countrySplit = findSplit(encodongSplit[0], "_");
288     auto country = countrySplit[2];
289     
290     auto lang = countrySplit[0];
291     
292     alias LocaleTuple = Tuple!(String, "lang", String, "country", String, "encoding", String, "modifier");
293     
294     return LocaleTuple(lang, country, encoding, modifier);
295 }
296 
297 ///
298 unittest 
299 {
300     assert(parseLocaleName("ru_RU.UTF-8@mod") == tuple("ru", "RU", "UTF-8", "mod"));
301     assert(parseLocaleName("ru@mod") == tuple("ru", string.init, string.init, "mod"));
302     assert(parseLocaleName("ru_RU") == tuple("ru", "RU", string.init, string.init));
303     
304     assert(parseLocaleName("ru_RU.UTF-8@mod".dup) == tuple("ru".dup, "RU".dup, "UTF-8".dup, "mod".dup));
305 }
306 
307 /**
308  * Drop encoding part from locale (it's not used in constructing localized keys).
309  * Returns: Locale string with encoding part dropped out or original string if encoding was not present.
310  */
311 @safe String dropEncodingPart(String)(String locale) pure nothrow if (isSomeString!String && is(ElementEncodingType!String : char))
312 {
313     auto t = parseLocaleName(locale);
314     if (!t.encoding.empty) {
315         return makeLocaleName(t.lang, t.country, String.init, t.modifier);
316     }
317     return locale;
318 }
319 
320 ///
321 unittest
322 {
323     assert("ru_RU.UTF-8".dropEncodingPart() == "ru_RU");
324     string locale = "ru_RU";
325     assert(locale.dropEncodingPart() is locale);
326 }
327 
328 /**
329  * Construct localized key name from key and locale.
330  * Returns: localized key in form key[locale] dropping encoding out if present.
331  * See_Also: separateFromLocale
332  */
333 @safe String localizedKey(String)(String key, String locale) pure nothrow if (isSomeString!String && is(ElementEncodingType!String : char))
334 {
335     if (locale.empty) {
336         return key;
337     }
338     return key ~ "[".to!String ~ locale.dropEncodingPart() ~ "]".to!String;
339 }
340 
341 ///
342 unittest 
343 {
344     string key = "Name";
345     assert(localizedKey(key, "") == key);
346     assert(localizedKey("Name", "ru_RU") == "Name[ru_RU]");
347     assert(localizedKey("Name", "ru_RU.UTF-8") == "Name[ru_RU]");
348 }
349 
350 /**
351  * ditto, but constructs locale name from arguments.
352  */
353 @safe String localizedKey(String)(String key, String lang, String country, String modifier = null) pure if (isSomeString!String && is(ElementEncodingType!String : char))
354 {
355     return key ~ "[".to!String ~ makeLocaleName(lang, country, String.init, modifier) ~ "]".to!String;
356 }
357 
358 ///
359 unittest 
360 {
361     assert(localizedKey("Name", "ru", "RU") == "Name[ru_RU]");
362     assert(localizedKey("Name".dup, "ru".dup, "RU".dup) == "Name[ru_RU]".dup);
363 }
364 
365 /** 
366  * Separate key name into non-localized key and locale name.
367  * If key is not localized returns original key and empty string.
368  * Returns: tuple of key and locale name.
369  * See_Also: localizedKey
370  */
371 @nogc @trusted auto separateFromLocale(String)(String key) pure nothrow if (isSomeString!String && is(ElementEncodingType!String : char)) {
372     if (key.endsWith("]")) {
373         auto t = key.findSplit("[");
374         if (t[1].length) {
375             return tuple(t[0], t[2][0..$-1]);
376         }
377     }
378     return tuple(key, typeof(key).init);
379 }
380 
381 ///
382 unittest 
383 {
384     assert(separateFromLocale("Name[ru_RU]") == tuple("Name", "ru_RU"));
385     assert(separateFromLocale("Name") == tuple("Name", string.init));
386     
387     char[] mutableString = "Hello".dup;
388     assert(separateFromLocale(mutableString) == tuple(mutableString, typeof(mutableString).init));
389 }
390 
391 /**
392  * Choose the better localized value matching to locale between two localized values. The "goodness" is determined using algorithm described in $(LINK2 http://standards.freedesktop.org/desktop-entry-spec/latest/ar01s04.html, Localized values for keys).
393  * Params:
394  *  locale = original locale to match to
395  *  firstLocale = first locale
396  *  firstValue = first value
397  *  secondLocale = second locale
398  *  secondValue = second value
399  * Returns: The best alternative among two or empty string if none of alternatives match original locale.
400  * Note: value with empty locale is considered better choice than value with locale that does not match the original one.
401  */
402 @nogc @trusted auto chooseLocalizedValue(String)(
403     String locale, 
404     String firstLocale,  String firstValue, 
405     String secondLocale, String secondValue) pure nothrow
406     if (isSomeString!String && is(ElementEncodingType!String : char))
407 {   
408     const lt = parseLocaleName(locale);
409     const lt1 = parseLocaleName(firstLocale);
410     const lt2 = parseLocaleName(secondLocale);
411     
412     int score1, score2;
413     
414     if (lt.lang == lt1.lang) {
415         score1 = 1 + ((lt.country == lt1.country) ? 2 : 0 ) + ((lt.modifier == lt1.modifier) ? 1 : 0);
416     }
417     if (lt.lang == lt2.lang) {
418         score2 = 1 + ((lt.country == lt2.country) ? 2 : 0 ) + ((lt.modifier == lt2.modifier) ? 1 : 0);
419     }
420     
421     if (score1 == 0 && score2 == 0) {
422         if (firstLocale.empty && !firstValue.empty) {
423             return tuple(firstLocale, firstValue);
424         } else if (secondLocale.empty && !secondValue.empty) {
425             return tuple(secondLocale, secondValue);
426         } else {
427             return tuple(String.init, String.init);
428         }
429     }
430     
431     if (score1 >= score2) {
432         return tuple(firstLocale, firstValue);
433     } else {
434         return tuple(secondLocale, secondValue);
435     }
436 }
437 
438 ///
439 unittest
440 {
441     string locale = "ru_RU.UTF-8@jargon";
442     assert(chooseLocalizedValue(string.init, "ru_RU", "Программист", "ru@jargon", "Кодер") == tuple(string.init, string.init));
443     assert(chooseLocalizedValue(locale, "fr_FR", "Programmeur", string.init, "Programmer") == tuple(string.init, "Programmer"));
444     assert(chooseLocalizedValue(locale, string.init, "Programmer", "de_DE", "Programmierer") == tuple(string.init, "Programmer"));
445     assert(chooseLocalizedValue(locale, "fr_FR", "Programmeur", "de_DE", "Programmierer") == tuple(string.init, string.init));
446     
447     assert(chooseLocalizedValue(string.init, string.init, "Value", string.init, string.init) == tuple(string.init, "Value"));
448     assert(chooseLocalizedValue(locale, string.init, "Value", string.init, string.init) == tuple(string.init, "Value"));
449     assert(chooseLocalizedValue(locale, string.init, string.init, string.init, "Value") == tuple(string.init, "Value"));
450     
451     assert(chooseLocalizedValue(locale, "ru_RU", "Программист", "ru@jargon", "Кодер") == tuple("ru_RU", "Программист"));
452     assert(chooseLocalizedValue(locale, "ru_RU", "Программист", "ru_RU@jargon", "Кодер") == tuple("ru_RU@jargon", "Кодер"));
453     
454     assert(chooseLocalizedValue(locale, "ru", "Разработчик", "ru_RU", "Программист") == tuple("ru_RU", "Программист"));
455 }
456 
457 /**
458  * Check if value needs to be escaped. This function is currently tolerant to single slashes and tabs.
459  * Returns: true if value needs to escaped, false otherwise.
460  */
461 @nogc @safe bool needEscaping(String)(String value) nothrow pure if (isSomeString!String && is(ElementEncodingType!String : char))
462 {
463     for (size_t i=0; i<value.length; ++i) {
464         const c = value[i];
465         if (c == '\n' || c == '\r') {
466             return true;
467         }
468     }
469     return false;
470 }
471 
472 ///
473 unittest
474 {
475     assert("new\nline".needEscaping);
476     assert(!`i have \ slash`.needEscaping);
477     assert("i like\rcarriage\rreturns".needEscaping);
478     assert(!"just a text".needEscaping);
479 }
480 
481 /**
482  * Escapes string by replacing special symbols with escaped sequences. 
483  * These symbols are: '\\' (backslash), '\n' (newline), '\r' (carriage return) and '\t' (tab).
484  * Returns: Escaped string.
485  * See_Also: unescapeValue
486  */
487 @trusted String escapeValue(String)(String value) pure if (isSomeString!String && is(ElementEncodingType!String : char)) {
488     return value.replace("\\", `\\`.to!String).replace("\n", `\n`.to!String).replace("\r", `\r`.to!String).replace("\t", `\t`.to!String);
489 }
490 
491 ///
492 unittest 
493 {
494     assert("a\\next\nline\top".escapeValue() == `a\\next\nline\top`); // notice how the string on the right is raw.
495     assert("a\\next\nline\top".dup.escapeValue() == `a\\next\nline\top`.dup);
496 }
497 
498 
499 /**
500  * Unescape value. If value does not need unescaping this function returns original value.
501  * Params:
502  *  value = string to unescape
503  *  pairs = pairs of escaped characters and their unescaped forms.
504  */
505 @trusted inout(char)[] doUnescape(inout(char)[] value, in Tuple!(char, char)[] pairs) nothrow pure {
506     //little optimization to avoid unneeded allocations.
507     size_t i = 0;
508     for (; i < value.length; i++) {
509         if (value[i] == '\\') {
510             break;
511         }
512     }
513     if (i == value.length) {
514         return value;
515     }
516     
517     auto toReturn = appender!(typeof(value))();
518     toReturn.put(value[0..i]);
519     
520     for (; i < value.length; i++) {
521         if (value[i] == '\\') {
522             if (i+1 < value.length) {
523                 const char c = value[i+1];
524                 auto t = pairs.find!"a[0] == b[0]"(tuple(c,c));
525                 if (!t.empty) {
526                     toReturn.put(t.front[1]);
527                     i++;
528                     continue;
529                 }
530             }
531         }
532         toReturn.put(value[i]);
533     }
534     return toReturn.data;
535 }
536 
537 unittest
538 {
539     enum Tuple!(char, char)[] pairs = [tuple('\\', '\\')];
540     static assert(is(typeof(doUnescape("", pairs)) == string));
541     static assert(is(typeof(doUnescape("".dup, pairs)) == char[]));
542 }
543 
544 
545 /**
546  * Unescapes string. You should unescape values returned by library before displaying until you want keep them as is (e.g., to allow user to edit values in escaped form).
547  * Returns: Unescaped string.
548  * See_Also: escapeValue
549  */
550 @safe inout(char)[] unescapeValue(inout(char)[] value) nothrow pure
551 {
552     static immutable Tuple!(char, char)[] pairs = [
553        tuple('s', ' '),
554        tuple('n', '\n'),
555        tuple('r', '\r'),
556        tuple('t', '\t'),
557        tuple('\\', '\\')
558     ];
559     return doUnescape(value, pairs);
560 }
561 
562 ///
563 unittest 
564 {
565     assert(`a\\next\nline\top`.unescapeValue() == "a\\next\nline\top"); // notice how the string on the left is raw.
566     assert(`\\next\nline\top`.unescapeValue() == "\\next\nline\top");
567     string value = `nounescape`;
568     assert(value.unescapeValue() is value); //original is returned.
569     assert(`a\\next\nline\top`.dup.unescapeValue() == "a\\next\nline\top".dup);
570 }