Unicode in mapgen (#38149)

CleverRaven · Apr 2, 2020 · d432807 · d432807
1 parent 7b18364
commit d432807
Show file tree

Hide file tree

Showing 8 changed files with 151 additions and 74 deletions.
diff --git a/data/json/mapgen/nested/house_nested.json b/data/json/mapgen/nested/house_nested.json
@@ -467,7 +467,7 @@
       "mapgensize": [ 4, 4 ],
       "rotation": [ 0, 3 ],
       "rows": [
-        " CR ",
+        " C▤ ",
         "O   ",
         " EE ",
         " EE "
@@ -476,7 +476,7 @@
       "items": {
         "O": [ { "item": "SUS_dresser_mens", "chance": 50 }, { "item": "SUS_dresser_womens", "chance": 50, "repeat": [ 1, 2 ] } ],
         "E": { "item": "bed", "chance": 40, "repeat": [ 1, 2 ] },
-        "R": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] }
+        "▤": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] }
       }
     }
   },
@@ -492,13 +492,13 @@
         " EE ",
         " EE ",
         "O   ",
-        " CR "
+        " C▤ "
       ],
       "palettes": [ "house_w_nest_palette" ],
       "items": {
         "O": [ { "item": "SUS_dresser_mens", "chance": 50 }, { "item": "SUS_dresser_womens", "chance": 50, "repeat": [ 1, 2 ] } ],
         "E": { "item": "bed", "chance": 40, "repeat": [ 1, 2 ] },
-        "R": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] }
+        "▤": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] }
       }
     }
   },
@@ -513,14 +513,14 @@
       "rows": [
         "    ",
         "C EE",
-        "R EE",
+        "▤ EE",
         " O  "
       ],
       "palettes": [ "house_w_nest_palette" ],
       "items": {
         "O": [ { "item": "SUS_dresser_mens", "chance": 50 }, { "item": "SUS_dresser_womens", "chance": 50, "repeat": [ 1, 2 ] } ],
         "E": { "item": "bed", "chance": 40, "repeat": [ 1, 2 ] },
-        "R": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] }
+        "▤": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] }
       }
     }
   },
@@ -535,14 +535,14 @@
       "rows": [
         "    ",
         "EE C",
-        "EE R",
+        "EE ▤",
         "  O "
       ],
       "palettes": [ "house_w_nest_palette" ],
       "items": {
         "O": [ { "item": "SUS_dresser_mens", "chance": 50 }, { "item": "SUS_dresser_womens", "chance": 50, "repeat": [ 1, 2 ] } ],
         "E": { "item": "bed", "chance": 40, "repeat": [ 1, 2 ] },
-        "R": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] }
+        "▤": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] }
       }
     }
   },
@@ -704,7 +704,7 @@
         "     ",
         "EE  I",
         "L  AI",
-        "y   R",
+        "y   ▤",
         "OCy  "
       ],
       "palettes": [ "house_w_nest_palette" ],
@@ -713,7 +713,7 @@
         "E": { "item": "bed", "chance": 40, "repeat": [ 1, 2 ] },
         "I": { "item": "SUS_desks_bedroom_unisex", "chance": 40, "repeat": [ 1, 2 ] },
         "L": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] },
-        "R": { "item": "homebooks", "chance": 30, "repeat": [ 1, 2 ] }
+        "▤": { "item": "homebooks", "chance": 30, "repeat": [ 1, 2 ] }
       }
     }
   },
@@ -729,8 +729,8 @@
         "  OO ",
         "IB  L",
         "I  EE",
-        "y   R",
-        "RCa  "
+        "y   ▤",
+        "▤Ca  "
       ],
       "palettes": [ "house_w_nest_palette" ],
       "items": {
@@ -739,7 +739,7 @@
         "I": { "item": "SUS_desks_bedroom_unisex", "chance": 40, "repeat": [ 1, 2 ] },
         "L": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] },
         "a": { "item": "unisex_coat_rack", "chance": 100, "repeat": [ 1, 2 ] },
-        "R": { "item": "homebooks", "chance": 30, "repeat": [ 1, 2 ] }
+        "▤": { "item": "homebooks", "chance": 30, "repeat": [ 1, 2 ] }
       }
     }
   },
@@ -1023,8 +1023,8 @@
       "rotation": [ 0, 3 ],
       "rows": [
         " @@p ",
-        "    R",
-        " pp R",
+        "    ▤",
+        " pp ▤",
         "     ",
         " xxx "
       ],
@@ -1118,11 +1118,11 @@
       "mapgensize": [ 2, 2 ],
       "rotation": [ 0, 3 ],
       "rows": [
-        "RR",
+        "▤▤",
         "C "
       ],
       "palettes": [ "house_w_nest_palette" ],
-      "items": { "R": [ { "item": "homebooks", "chance": 30 } ] }
+      "items": { "▤": [ { "item": "homebooks", "chance": 30 } ] }
     }
   },
   {
@@ -1135,10 +1135,10 @@
       "rotation": [ 0, 3 ],
       "rows": [
         "C ",
-        "R "
+        "▤ "
       ],
       "palettes": [ "house_w_nest_palette" ],
-      "items": { "R": [ { "item": "homebooks", "chance": 30 } ] }
+      "items": { "▤": [ { "item": "homebooks", "chance": 30 } ] }
     }
   },
   {
@@ -1151,10 +1151,10 @@
       "rotation": [ 0, 3 ],
       "rows": [
         " C",
-        "HR"
+        "H▤"
       ],
       "palettes": [ "house_w_nest_palette" ],
-      "items": { "R": [ { "item": "homebooks", "chance": 30 } ] }
+      "items": { "▤": [ { "item": "homebooks", "chance": 30 } ] }
     }
   },
   {
@@ -1230,12 +1230,12 @@
       "mapgensize": [ 3, 3 ],
       "rotation": [ 0, 3 ],
       "rows": [
-        "R H",
-        "RC ",
-        "R  "
+        "▤ H",
+        "▤C ",
+        "▤  "
       ],
       "palettes": [ "house_w_nest_palette" ],
-      "items": { "R": [ { "item": "homebooks", "chance": 30 } ] }
+      "items": { "▤": [ { "item": "homebooks", "chance": 30 } ] }
     }
   },
   {

diff --git a/data/json/mapgen_palettes/house_w_palette.json b/data/json/mapgen_palettes/house_w_palette.json
@@ -22,7 +22,7 @@
       "O": "f_dresser",
       "P": "f_locker",
       "Q": "f_rack",
-      "R": "f_bookcase",
+      "▤": "f_bookcase",
       "S": [ [ "f_filing_cabinet", 80 ], [ "f_shredder", 20 ] ],
       "U": "f_utility_shelf",
       "V": "f_glass_cabinet",

diff --git a/doc/MAPGEN.md b/doc/MAPGEN.md
@@ -197,9 +197,11 @@ Example: "fill_ter": "t_grass"
 *required if "fill_ter" is unset*
 > Value: ([array]): blocks of 24 rows of blocks of 24 character lines. Each character is defined by "terrain" and optionally "furniture" or other entries below
 
+Other parts can be linked with this map, for example one can place things like a gaspump (with gasoline) or a toilet (with water) or items from an item group or fields at the square given by a character.
+
 Any character used here must have some definition elsewhere to indicate its purpose.  Failing to do so is an error which will be caught by running the tests.  The tests will run automatically when you make a pull request for adding new maps to the game.  If you have defined `fill_ter` or you are writing nested mapgen, then there are a couple of exceptions.  The space and period characters (` ` and `.`) are permitted to have no definition and be used for 'background' in the `rows`.
 
-Other parts can be linked with this map, for example one can place things like a gaspump (with gasoline) or a toilet (with water) or items from an item group or fields at the square given by a character.
+As keys, you can use any Unicode characters which are not double-width.  This includes for example most European alphabets but not Chinese characters.  If you intend to take advantage of this, ensure that your editor is saving the file with a UTF-8 encoding.  Accents are acceptable, even when using [combining characters](https://en.wikipedia.org/wiki/Combining_character).  No normalization is performed; comparison is done at the raw bytes (code unit) level.  Therefore, there are literally an infinite number of mapgen key characters available.  Please don't abuse this by using distinct characters that are visually indistinguishable, or which are so rare as to be unlikely to render correctly for other developers.
 
 Example:
 

diff --git a/src/catacharset.cpp b/src/catacharset.cpp
@@ -479,6 +479,26 @@ std::u32string utf8_to_utf32( const std::string &str )
     return ret;
 }
 
+std::vector<std::string> utf8_display_split( const std::string &s )
+{
+    std::vector<std::string> result;
+    std::string current_glyph;
+    const char *pos = s.c_str();
+    int len = s.length();
+    while( len > 0 ) {
+        const char *old_pos = pos;
+        const uint32_t ch = UTF8_getch( &pos, &len );
+        const int width = mk_wcwidth( ch );
+        if( width > 0 && !current_glyph.empty() ) {
+            result.push_back( current_glyph );
+            current_glyph.clear();
+        }
+        current_glyph += std::string( old_pos, pos );
+    }
+    result.push_back( current_glyph );
+    return result;
+}
+
 int center_text_pos( const char *text, int start_pos, int end_pos )
 {
     int full_screen = end_pos - start_pos + 1;

diff --git a/src/catacharset.h b/src/catacharset.h
@@ -5,6 +5,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <string>
+#include <vector>
 
 #define ANY_LENGTH 5
 #define NULL_UNICODE 0x0000
@@ -56,11 +57,15 @@ std::string utf8_to_native( const std::string &str );
 std::string utf32_to_utf8( const std::u32string &str );
 std::u32string utf8_to_utf32( const std::string &str );
 
+// Split the given string into displayed characters.  Each element of the returned vector
+// contains one 'regular' codepoint and all subsequent combining characters.
+std::vector<std::string> utf8_display_split( const std::string & );
+
 /**
  * UTF8-Wrapper over std::string.
  * It looks and feels like a std::string, but uses code points counts
  * as index, not bytes.
- * A multi-byte Unicode character might by represented
+ * A multi-byte Unicode character might be represented
  * as 3 bytes in UTF8, this class will see these 3 bytes as 1 character.
  * It will never separate them. It will however split between code points
  * which might be problematic when containing combination characters.