import * as React from 'react'
  /* @jsx mdx */
import { mdx } from '@mdx-js/react';
/* @jsxRuntime classic */

/* @jsx mdx */

export const _frontmatter = {
  "title": "Terminal support for emoji",
  "date": "2021-01-21T13:00:00.000Z",
  "layout": "post",
  "draft": false,
  "path": "/posts/emoji-in-the-terminal/",
  "category": "Software",
  "tags": ["emoji", "terminals", "unicode", "python"],
  "description": "Terminals and emoji don't play nicely together."
};
const layoutProps = {
  _frontmatter
};
const MDXLayout = "wrapper";
export default function MDXContent({
  components,
  ...props
}) {
  return <MDXLayout {...layoutProps} {...props} components={components} mdxType="MDXLayout">
    <h2>{`Some terminals don't understand them`}</h2>
    <p>{`If your terminal doesn't treat multi-codepoint emoji correctly (Hyper, for example), it will essentially just iterate through each codepoint and render it as is:`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-python"
      }}>{`>>> print("\\U0001F468\\u200D\\U0001F467\\u200D\\U0001F466")
👨👧👦
`}</code></pre>
    <p>{`This is problematic for terminal-based applications because two identical strings can display with different widths depending on the environment.`}</p>
    <p>{`If a user is running Hyper, three emoji are rendered (👨👧👦).`}</p>
    <p>{`If they're on iTerm2, a single emoji is rendered (👨‍👧‍👦).`}</p>
    <h2>{`Terminals that do understand them don't know how wide they are`}</h2>
    <p>{`Even worse, terminal emulators themselves often don't know how much visual space an emoji will take up.
Graphical emoji are typically treated as "East Asian Wide" characters, which take up 2 "cell-widths" in the terminal.
In other words, they take up the same width as two ASCII characters:`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-python"
      }}>{`>>> print("👨\\n12")  # Emoji is same width as 2 ASCII chars
👨                   # In other words it has "cell width" of 2
12                   # (results may vary in browsers :))
`}</code></pre>
    <p>{`You might wonder:`}</p>
    <div style={{
      "padding": "1.6em",
      "margin": "1em",
      "backgroundColor": "#33333f",
      "color": "#f0f0f0",
      "fontFamily": "serif",
      "fontSize": "1.4rem"
    }}>
"Can't you just detect the width of an emoji by writing it to the terminal and checking how many columns the cursor has moved forward by?"
      <br></br>
      <div style={{
        "textAlign": "right",
        "width": "100%"
      }}>
    <em>— You, maybe</em>
      </div>
    </div>
    <p>{`Unfortunately not. Most terminals incorrectly regard graphical Emoji Presentation Sequences as having cell width of 1.
For example, iTerm2 considers the "rosette" 🏵️ emoji to have width 1, so when it
writes the emoji it only progresses the cursor forward 1 column. This means any text that comes after it will overlap with the emoji:`}</p>
    <p><span parentName="p" {...{
        "className": "gatsby-resp-image-wrapper",
        "style": {
          "position": "relative",
          "display": "block",
          "marginLeft": "auto",
          "marginRight": "auto",
          "maxWidth": "650px"
        }
      }}>{`
      `}<a parentName="span" {...{
          "className": "gatsby-resp-image-link",
          "href": "/static/e18918a65b3c7345f1509cdb2751a7a0/4c42d/rosette_overlap.png",
          "style": {
            "display": "block"
          },
          "target": "_blank",
          "rel": "noopener"
        }}>{`
    `}<span parentName="a" {...{
            "className": "gatsby-resp-image-background-image",
            "style": {
              "paddingBottom": "35.58282208588957%",
              "position": "relative",
              "bottom": "0",
              "left": "0",
              "backgroundImage": "url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAHCAYAAAAIy204AAAACXBIWXMAABYlAAAWJQFJUiTwAAAAm0lEQVQoz63OzQ6CMBAE4NrZdOBmQPxBlKgHglefx6fw/TOmQKKJNVw4fNntJp1dd21rZcVZXDcKVaXQ9QqXm1g34vE0iO/QduKuFcuDGIJIJrnX86F732m7b2SkfJ4LWSYECuTY80ucwwtAktuUheBXMoMMkHk/1h9+ApnZX87HZDMBNtQ4xAyb/iQDx+azFTMXzHFLhCQuXM4bhVmWuXDYOZUAAAAASUVORK5CYII=')",
              "backgroundSize": "cover",
              "display": "block"
            }
          }}></span>{`
  `}<img parentName="a" {...{
            "className": "gatsby-resp-image-image",
            "alt": "Rosette overlapping with text",
            "title": "Rosette overlapping with text",
            "src": "/static/e18918a65b3c7345f1509cdb2751a7a0/a6d36/rosette_overlap.png",
            "srcSet": ["/static/e18918a65b3c7345f1509cdb2751a7a0/222b7/rosette_overlap.png 163w", "/static/e18918a65b3c7345f1509cdb2751a7a0/ff46a/rosette_overlap.png 325w", "/static/e18918a65b3c7345f1509cdb2751a7a0/a6d36/rosette_overlap.png 650w", "/static/e18918a65b3c7345f1509cdb2751a7a0/4c42d/rosette_overlap.png 896w"],
            "sizes": "(max-width: 650px) 100vw, 650px",
            "style": {
              "width": "100%",
              "height": "100%",
              "margin": "0",
              "verticalAlign": "middle",
              "position": "absolute",
              "top": "0",
              "left": "0"
            },
            "loading": "lazy",
            "decoding": "async"
          }}></img>{`
  `}</a>{`
    `}</span></p>
    <p>{`This issue affects every terminal I've tested: Visual Studio Code, iTerm2, Alacritty, and Hyper.`}</p>
    <h2>{`Why does this happen?`}</h2>
    <p>{`A single character can take up more than one cell, and the `}<a parentName="p" {...{
        "href": "https://www.unicode.org/Public/13.0.0/ucd/EastAsianWidth.txt"
      }}>{`East Asian Width Unicode database`}</a>{` is generally used by terminals to
determine how "wide" the character will be. East Asian Wide characters (such as Chinese, Japanese, and Korean ideographs) take up two cell widths in a terminal.`}</p>
    <p>{`This approach comes from the `}<inlineCode parentName="p">{`wcwidth`}</inlineCode>{` utility, and the comment at the top of the `}<a parentName="p" {...{
        "href": "https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c"
      }}>{`C source file`}</a>{` provides further insight into the difficulties faced here.`}</p>
    <p>{`The clue to why some emoji are rendered incorrectly can be found via the `}<inlineCode parentName="p">{`unicodedata`}</inlineCode>{` module. Let's use it to query the "East Asian Width" property of two codepoints in the Unicode database.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-python"
      }}>{`>>> unicodedata.east_asian_width("\\U0001F4A3")  # Bomb emoji 💣
'W'  # 'W' means 'East Asian Wide', terminals think of this as cell_width=2

>>> unicodedata.east_asian_width("\\U0001F6E5")  # Motorboat emoji 🛥
'N'  # 'N' means 'Neutral', terminals think of this as cell_width=1
`}</code></pre>
    <p>{`Here's how these emoji render in a terminal:`}</p>
    <p><span parentName="p" {...{
        "className": "gatsby-resp-image-wrapper",
        "style": {
          "position": "relative",
          "display": "block",
          "marginLeft": "auto",
          "marginRight": "auto",
          "maxWidth": "650px"
        }
      }}>{`
      `}<a parentName="span" {...{
          "className": "gatsby-resp-image-link",
          "href": "/static/b593b0f95b3fa2fb2e0fa441a0d8b344/a1792/bomb_boat.png",
          "style": {
            "display": "block"
          },
          "target": "_blank",
          "rel": "noopener"
        }}>{`
    `}<span parentName="a" {...{
            "className": "gatsby-resp-image-background-image",
            "style": {
              "paddingBottom": "29.447852760736193%",
              "position": "relative",
              "bottom": "0",
              "left": "0",
              "backgroundImage": "url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAGCAYAAADDl76dAAAACXBIWXMAABYlAAAWJQFJUiTwAAABFElEQVQY02WQS0sDQRAGZ+brxLg7s++Yp3kaE0UU9ODF//+3SnZFc/BQ0NBQdLUbFyOccxStcZMCfhSwaCiKvMyoMmNViftazAuxKMW8FFUmitseo4gZkjAz3On9keVuwellzvY8w+IYVRqITaK8Nc53Gtg24tiJQydmhWijqHIjpXQVnk8XHg57umbBcrZmMp4gJ8wZ3nlcEM7Zf/wv/d4Psl7qfOaH5Nh4RtHhxwFLhpKIVU6dGZta7FqxLDXM60rUuSizHqNM+fXC/euR+WbJ09uK1bHFTQJWGypFXkeKifEwFafpzx/75H0rpkk0+Y8wxngVfnx+cbk807V3NHVH8AELNtDPYciwf/jwiwgh/CV/A52Gh16JL9KDAAAAAElFTkSuQmCC')",
              "backgroundSize": "cover",
              "display": "block"
            }
          }}></span>{`
  `}<img parentName="a" {...{
            "className": "gatsby-resp-image-image",
            "alt": "Rendering of the bomb and motorboat emojis in a terminal",
            "title": "Rendering of the bomb and motorboat emojis in a terminal",
            "src": "/static/b593b0f95b3fa2fb2e0fa441a0d8b344/a6d36/bomb_boat.png",
            "srcSet": ["/static/b593b0f95b3fa2fb2e0fa441a0d8b344/222b7/bomb_boat.png 163w", "/static/b593b0f95b3fa2fb2e0fa441a0d8b344/ff46a/bomb_boat.png 325w", "/static/b593b0f95b3fa2fb2e0fa441a0d8b344/a6d36/bomb_boat.png 650w", "/static/b593b0f95b3fa2fb2e0fa441a0d8b344/a1792/bomb_boat.png 780w"],
            "sizes": "(max-width: 650px) 100vw, 650px",
            "style": {
              "width": "100%",
              "height": "100%",
              "margin": "0",
              "verticalAlign": "middle",
              "position": "absolute",
              "top": "0",
              "left": "0"
            },
            "loading": "lazy",
            "decoding": "async"
          }}></img>{`
  `}</a>{`
    `}</span></p>
    <p>{`All terminal emulators I tested consider codepoints with an "East Asian Width" of `}<inlineCode parentName="p">{`N`}</inlineCode>{` to have cell width of 1.
This is incorrect in the case of `}<a parentName="p" {...{
        "href": "https://unicode.org/emoji/charts/emoji-variants.html"
      }}>{`Emoji Presentation Sequences`}</a>{` -`}<br parentName="p"></br>{`
`}<a parentName="p" {...{
        "href": "http://www.unicode.org/reports/tr11/tr11-38.html#Recommendations"
      }}>{`Unicode recommends`}</a>{` they should be always treated as "East Asian Wide" (`}<inlineCode parentName="p">{`W`}</inlineCode>{`).`}</p>
    <p>{`In summary: a massive headache, avoid if possible.`}</p>

    </MDXLayout>;
}
;
MDXContent.isMDXComponent = true;
      