text.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. import {namespaces} from "d3";
  2. import {create} from "../context.js";
  3. import {nonempty} from "../defined.js";
  4. import {formatDefault} from "../format.js";
  5. import {Mark} from "../mark.js";
  6. import {
  7. indexOf,
  8. identity,
  9. string,
  10. maybeNumberChannel,
  11. maybeTuple,
  12. numberChannel,
  13. isNumeric,
  14. isTemporal,
  15. keyword,
  16. maybeFrameAnchor,
  17. isTextual,
  18. isIterable
  19. } from "../options.js";
  20. import {
  21. applyChannelStyles,
  22. applyDirectStyles,
  23. applyIndirectStyles,
  24. applyAttr,
  25. applyTransform,
  26. impliedString,
  27. applyFrameAnchor
  28. } from "../style.js";
  29. import {template} from "../template.js";
  30. import {maybeIntervalMidX, maybeIntervalMidY} from "../transforms/interval.js";
  31. const defaults = {
  32. ariaLabel: "text",
  33. strokeLinejoin: "round",
  34. strokeWidth: 3,
  35. paintOrder: "stroke"
  36. };
  37. const softHyphen = "\u00ad";
  38. export class Text extends Mark {
  39. constructor(data, options = {}) {
  40. const {
  41. x,
  42. y,
  43. text = isIterable(data) && isTextual(data) ? identity : indexOf,
  44. frameAnchor,
  45. textAnchor = /right$/i.test(frameAnchor) ? "end" : /left$/i.test(frameAnchor) ? "start" : "middle",
  46. lineAnchor = /^top/i.test(frameAnchor) ? "top" : /^bottom/i.test(frameAnchor) ? "bottom" : "middle",
  47. lineHeight = 1,
  48. lineWidth = Infinity,
  49. textOverflow,
  50. monospace,
  51. fontFamily = monospace ? "ui-monospace, monospace" : undefined,
  52. fontSize,
  53. fontStyle,
  54. fontVariant,
  55. fontWeight,
  56. rotate
  57. } = options;
  58. const [vrotate, crotate] = maybeNumberChannel(rotate, 0);
  59. const [vfontSize, cfontSize] = maybeFontSizeChannel(fontSize);
  60. super(
  61. data,
  62. {
  63. x: {value: x, scale: "x", optional: true},
  64. y: {value: y, scale: "y", optional: true},
  65. fontSize: {value: vfontSize, optional: true},
  66. rotate: {value: numberChannel(vrotate), optional: true},
  67. text: {value: text, filter: nonempty, optional: true}
  68. },
  69. options,
  70. defaults
  71. );
  72. this.rotate = crotate;
  73. this.textAnchor = impliedString(textAnchor, "middle");
  74. this.lineAnchor = keyword(lineAnchor, "lineAnchor", ["top", "middle", "bottom"]);
  75. this.lineHeight = +lineHeight;
  76. this.lineWidth = +lineWidth;
  77. this.textOverflow = maybeTextOverflow(textOverflow);
  78. this.monospace = !!monospace;
  79. this.fontFamily = string(fontFamily);
  80. this.fontSize = cfontSize;
  81. this.fontStyle = string(fontStyle);
  82. this.fontVariant = string(fontVariant);
  83. this.fontWeight = string(fontWeight);
  84. this.frameAnchor = maybeFrameAnchor(frameAnchor);
  85. if (!(this.lineWidth >= 0)) throw new Error(`invalid lineWidth: ${lineWidth}`);
  86. this.splitLines = splitter(this);
  87. this.clipLine = clipper(this);
  88. }
  89. render(index, scales, channels, dimensions, context) {
  90. const {x, y} = scales;
  91. const {x: X, y: Y, rotate: R, text: T, title: TL, fontSize: FS} = channels;
  92. const {rotate} = this;
  93. const [cx, cy] = applyFrameAnchor(this, dimensions);
  94. return create("svg:g", context)
  95. .call(applyIndirectStyles, this, dimensions, context)
  96. .call(applyIndirectTextStyles, this, T, dimensions)
  97. .call(applyTransform, this, {x: X && x, y: Y && y})
  98. .call((g) =>
  99. g
  100. .selectAll()
  101. .data(index)
  102. .enter()
  103. .append("text")
  104. .call(applyDirectStyles, this)
  105. .call(applyMultilineText, this, T, TL)
  106. .attr(
  107. "transform",
  108. template`translate(${X ? (i) => X[i] : cx},${Y ? (i) => Y[i] : cy})${
  109. R ? (i) => ` rotate(${R[i]})` : rotate ? ` rotate(${rotate})` : ``
  110. }`
  111. )
  112. .call(applyAttr, "font-size", FS && ((i) => FS[i]))
  113. .call(applyChannelStyles, this, channels)
  114. )
  115. .node();
  116. }
  117. }
  118. export function maybeTextOverflow(textOverflow) {
  119. return textOverflow == null
  120. ? null
  121. : keyword(textOverflow, "textOverflow", [
  122. "clip", // shorthand for clip-end
  123. "ellipsis", // … ellipsis-end
  124. "clip-start",
  125. "clip-end",
  126. "ellipsis-start",
  127. "ellipsis-middle",
  128. "ellipsis-end"
  129. ]).replace(/^(clip|ellipsis)$/, "$1-end");
  130. }
  131. function applyMultilineText(selection, mark, T, TL) {
  132. if (!T) return;
  133. const {lineAnchor, lineHeight, textOverflow, splitLines, clipLine} = mark;
  134. selection.each(function (i) {
  135. const lines = splitLines(formatDefault(T[i]) ?? "").map(clipLine);
  136. const n = lines.length;
  137. const y = lineAnchor === "top" ? 0.71 : lineAnchor === "bottom" ? 1 - n : (164 - n * 100) / 200;
  138. if (n > 1) {
  139. let m = 0;
  140. for (let i = 0; i < n; ++i) {
  141. ++m;
  142. if (!lines[i]) continue;
  143. const tspan = this.ownerDocument.createElementNS(namespaces.svg, "tspan");
  144. tspan.setAttribute("x", 0);
  145. if (i === m - 1) tspan.setAttribute("y", `${(y + i) * lineHeight}em`);
  146. else tspan.setAttribute("dy", `${m * lineHeight}em`);
  147. tspan.textContent = lines[i];
  148. this.appendChild(tspan);
  149. m = 0;
  150. }
  151. } else {
  152. if (y) this.setAttribute("y", `${y * lineHeight}em`);
  153. this.textContent = lines[0];
  154. }
  155. if (textOverflow && !TL && lines[0] !== T[i]) {
  156. const title = this.ownerDocument.createElementNS(namespaces.svg, "title");
  157. title.textContent = T[i];
  158. this.appendChild(title);
  159. }
  160. });
  161. }
  162. export function text(data, {x, y, ...options} = {}) {
  163. if (options.frameAnchor === undefined) [x, y] = maybeTuple(x, y);
  164. return new Text(data, {...options, x, y});
  165. }
  166. export function textX(data, {x = identity, ...options} = {}) {
  167. return new Text(data, maybeIntervalMidY({...options, x}));
  168. }
  169. export function textY(data, {y = identity, ...options} = {}) {
  170. return new Text(data, maybeIntervalMidX({...options, y}));
  171. }
  172. export function applyIndirectTextStyles(selection, mark, T) {
  173. applyAttr(selection, "text-anchor", mark.textAnchor);
  174. applyAttr(selection, "font-family", mark.fontFamily);
  175. applyAttr(selection, "font-size", mark.fontSize);
  176. applyAttr(selection, "font-style", mark.fontStyle);
  177. applyAttr(selection, "font-variant", mark.fontVariant === undefined ? inferFontVariant(T) : mark.fontVariant);
  178. applyAttr(selection, "font-weight", mark.fontWeight);
  179. }
  180. function inferFontVariant(T) {
  181. return T && (isNumeric(T) || isTemporal(T)) ? "tabular-nums" : undefined;
  182. }
  183. // https://developer.mozilla.org/en-US/docs/Web/CSS/font-size
  184. const fontSizes = new Set([
  185. // global keywords
  186. "inherit",
  187. "initial",
  188. "revert",
  189. "unset",
  190. // absolute keywords
  191. "xx-small",
  192. "x-small",
  193. "small",
  194. "medium",
  195. "large",
  196. "x-large",
  197. "xx-large",
  198. "xxx-large",
  199. // relative keywords
  200. "larger",
  201. "smaller"
  202. ]);
  203. // The font size may be expressed as a constant in the following forms:
  204. // - number in pixels
  205. // - string keyword: see above
  206. // - string <length>: e.g., "12px"
  207. // - string <percentage>: e.g., "80%"
  208. // Anything else is assumed to be a channel definition.
  209. function maybeFontSizeChannel(fontSize) {
  210. if (fontSize == null || typeof fontSize === "number") return [undefined, fontSize];
  211. if (typeof fontSize !== "string") return [fontSize, undefined];
  212. fontSize = fontSize.trim().toLowerCase();
  213. return fontSizes.has(fontSize) || /^[+-]?\d*\.?\d+(e[+-]?\d+)?(\w*|%)$/.test(fontSize)
  214. ? [undefined, fontSize]
  215. : [fontSize, undefined];
  216. }
  217. // This is a greedy algorithm for line wrapping. It would be better to use the
  218. // Knuth–Plass line breaking algorithm (but that would be much more complex).
  219. // https://en.wikipedia.org/wiki/Line_wrap_and_word_wrap
  220. function lineWrap(input, maxWidth, widthof) {
  221. const lines = [];
  222. let lineStart,
  223. lineEnd = 0;
  224. for (const [wordStart, wordEnd, required] of lineBreaks(input)) {
  225. // Record the start of a line. This isn’t the same as the previous line’s
  226. // end because we often skip spaces between lines.
  227. if (lineStart === undefined) lineStart = wordStart;
  228. // If the current line is not empty, and if adding the current word would
  229. // make the line longer than the allowed width, then break the line at the
  230. // previous word end.
  231. if (lineEnd > lineStart && widthof(input, lineStart, wordEnd) > maxWidth) {
  232. lines.push(input.slice(lineStart, lineEnd) + (input[lineEnd - 1] === softHyphen ? "-" : ""));
  233. lineStart = wordStart;
  234. }
  235. // If this is a required break (a newline), emit the line and reset.
  236. if (required) {
  237. lines.push(input.slice(lineStart, wordEnd));
  238. lineStart = undefined;
  239. continue;
  240. }
  241. // Extend the current line to include the new word.
  242. lineEnd = wordEnd;
  243. }
  244. return lines;
  245. }
  246. // This is a rudimentary (and U.S.-centric) algorithm for finding opportunities
  247. // to break lines between words. A better and far more comprehensive approach
  248. // would be to use the official Unicode Line Breaking Algorithm.
  249. // https://unicode.org/reports/tr14/
  250. function* lineBreaks(input) {
  251. let i = 0,
  252. j = 0;
  253. const n = input.length;
  254. while (j < n) {
  255. let k = 1;
  256. switch (input[j]) {
  257. case softHyphen:
  258. case "-": // hyphen
  259. ++j;
  260. yield [i, j, false];
  261. i = j;
  262. break;
  263. case " ":
  264. yield [i, j, false];
  265. while (input[++j] === " "); // skip multiple spaces
  266. i = j;
  267. break;
  268. case "\r":
  269. if (input[j + 1] === "\n") ++k; // falls through
  270. case "\n":
  271. yield [i, j, true];
  272. j += k;
  273. i = j;
  274. break;
  275. default:
  276. ++j;
  277. break;
  278. }
  279. }
  280. yield [i, j, true];
  281. }
  282. // Computed as round(measureText(text).width * 10) at 10px system-ui. For
  283. // characters that are not represented in this map, we’d ideally want to use a
  284. // weighted average of what we expect to see. But since we don’t really know
  285. // what that is, using “e” seems reasonable.
  286. const defaultWidthMap = {
  287. a: 56,
  288. b: 63,
  289. c: 57,
  290. d: 63,
  291. e: 58,
  292. f: 37,
  293. g: 62,
  294. h: 60,
  295. i: 26,
  296. j: 26,
  297. k: 55,
  298. l: 26,
  299. m: 88,
  300. n: 60,
  301. o: 60,
  302. p: 62,
  303. q: 62,
  304. r: 39,
  305. s: 54,
  306. t: 38,
  307. u: 60,
  308. v: 55,
  309. w: 79,
  310. x: 54,
  311. y: 55,
  312. z: 55,
  313. A: 69,
  314. B: 67,
  315. C: 73,
  316. D: 74,
  317. E: 61,
  318. F: 58,
  319. G: 76,
  320. H: 75,
  321. I: 28,
  322. J: 55,
  323. K: 67,
  324. L: 58,
  325. M: 89,
  326. N: 75,
  327. O: 78,
  328. P: 65,
  329. Q: 78,
  330. R: 67,
  331. S: 65,
  332. T: 65,
  333. U: 75,
  334. V: 69,
  335. W: 98,
  336. X: 69,
  337. Y: 67,
  338. Z: 67,
  339. 0: 64,
  340. 1: 48,
  341. 2: 62,
  342. 3: 64,
  343. 4: 66,
  344. 5: 63,
  345. 6: 65,
  346. 7: 58,
  347. 8: 65,
  348. 9: 65,
  349. " ": 29,
  350. "!": 32,
  351. '"': 49,
  352. "'": 31,
  353. "(": 39,
  354. ")": 39,
  355. ",": 31,
  356. "-": 48,
  357. ".": 31,
  358. "/": 32,
  359. ":": 31,
  360. ";": 31,
  361. "?": 52,
  362. "‘": 31,
  363. "’": 31,
  364. "“": 47,
  365. "”": 47,
  366. "…": 82
  367. };
  368. // This is a rudimentary (and U.S.-centric) algorithm for measuring the width of
  369. // a string based on a technique of Gregor Aisch; it assumes that individual
  370. // characters are laid out independently and does not implement the Unicode
  371. // grapheme cluster breaking algorithm. It does understand code points, though,
  372. // and so treats things like emoji as having the width of a lowercase e (and
  373. // should be equivalent to using for-of to iterate over code points, while also
  374. // being fast). TODO Optimize this by noting that we often re-measure characters
  375. // that were previously measured?
  376. // http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
  377. // https://exploringjs.com/impatient-js/ch_strings.html#atoms-of-text
  378. export function defaultWidth(text, start = 0, end = text.length) {
  379. let sum = 0;
  380. for (let i = start; i < end; i = readCharacter(text, i)) {
  381. sum += defaultWidthMap[text[i]] ?? (isPictographic(text, i) ? 120 : defaultWidthMap.e);
  382. }
  383. return sum;
  384. }
  385. // Even for monospaced text, we can’t assume that the number of UTF-16 code
  386. // points (i.e., the length of a string) corresponds to the number of visible
  387. // characters; we still have to count graphemes. And note that pictographic
  388. // characters such as emojis are typically not monospaced!
  389. export function monospaceWidth(text, start = 0, end = text.length) {
  390. let sum = 0;
  391. for (let i = start; i < end; i = readCharacter(text, i)) {
  392. sum += isPictographic(text, i) ? 126 : 63;
  393. }
  394. return sum;
  395. }
  396. export function splitter({monospace, lineWidth, textOverflow}) {
  397. if (textOverflow != null || lineWidth == Infinity) return (text) => text.split(/\r\n?|\n/g);
  398. const widthof = monospace ? monospaceWidth : defaultWidth;
  399. const maxWidth = lineWidth * 100;
  400. return (text) => lineWrap(text, maxWidth, widthof);
  401. }
  402. export function clipper({monospace, lineWidth, textOverflow}) {
  403. if (textOverflow == null || lineWidth == Infinity) return (text) => text;
  404. const widthof = monospace ? monospaceWidth : defaultWidth;
  405. const maxWidth = lineWidth * 100;
  406. switch (textOverflow) {
  407. case "clip-start":
  408. return (text) => clipStart(text, maxWidth, widthof, "");
  409. case "clip-end":
  410. return (text) => clipEnd(text, maxWidth, widthof, "");
  411. case "ellipsis-start":
  412. return (text) => clipStart(text, maxWidth, widthof, ellipsis);
  413. case "ellipsis-middle":
  414. return (text) => clipMiddle(text, maxWidth, widthof, ellipsis);
  415. case "ellipsis-end":
  416. return (text) => clipEnd(text, maxWidth, widthof, ellipsis);
  417. }
  418. }
  419. export const ellipsis = "…";
  420. // Cuts the given text to the given width, using the specified widthof function;
  421. // the returned [index, error] guarantees text.slice(0, index) fits within the
  422. // specified width with the given error. If the text fits naturally within the
  423. // given width, returns [-1, 0]. If the text needs cutting, the given inset
  424. // specifies how much space (in the same units as width and widthof) to reserve
  425. // for a possible ellipsis character.
  426. export function cut(text, width, widthof, inset) {
  427. const I = []; // indexes of read character boundaries
  428. let w = 0; // current line width
  429. for (let i = 0, j = 0, n = text.length; i < n; i = j) {
  430. j = readCharacter(text, i); // read the next character
  431. const l = widthof(text, i, j); // current character width
  432. if (w + l > width) {
  433. w += inset;
  434. while (w > width && i > 0) (j = i), (i = I.pop()), (w -= widthof(text, i, j)); // remove excess
  435. return [i, width - w];
  436. }
  437. w += l;
  438. I.push(i);
  439. }
  440. return [-1, 0];
  441. }
  442. export function clipEnd(text, width, widthof, ellipsis) {
  443. text = text.trim(); // ignore leading and trailing whitespace
  444. const e = widthof(ellipsis);
  445. const [i] = cut(text, width, widthof, e);
  446. return i < 0 ? text : text.slice(0, i).trimEnd() + ellipsis;
  447. }
  448. export function clipMiddle(text, width, widthof, ellipsis) {
  449. text = text.trim(); // ignore leading and trailing whitespace
  450. const w = widthof(text);
  451. if (w <= width) return text;
  452. const e = widthof(ellipsis) / 2;
  453. const [i, ei] = cut(text, width / 2, widthof, e);
  454. const [j] = cut(text, w - width / 2 - ei + e, widthof, -e); // TODO read spaces?
  455. return j < 0 ? ellipsis : text.slice(0, i).trimEnd() + ellipsis + text.slice(readCharacter(text, j)).trimStart();
  456. }
  457. export function clipStart(text, width, widthof, ellipsis) {
  458. text = text.trim(); // ignore leading and trailing whitespace
  459. const w = widthof(text);
  460. if (w <= width) return text;
  461. const e = widthof(ellipsis);
  462. const [j] = cut(text, w - width + e, widthof, -e); // TODO read spaces?
  463. return j < 0 ? ellipsis : ellipsis + text.slice(readCharacter(text, j)).trimStart();
  464. }
  465. const reCombiner = /[\p{Combining_Mark}\p{Emoji_Modifier}]+/uy;
  466. const rePictographic = /\p{Extended_Pictographic}/uy;
  467. // Reads a single “character” element from the given text starting at the given
  468. // index, returning the index after the read character. Ideally, this implements
  469. // the Unicode text segmentation algorithm and understands grapheme cluster
  470. // boundaries, etc., but in practice this is only smart enough to detect UTF-16
  471. // surrogate pairs, combining marks, and zero-width joiner (zwj) sequences such
  472. // as emoji skin color modifiers. https://unicode.org/reports/tr29/
  473. export function readCharacter(text, i) {
  474. i += isSurrogatePair(text, i) ? 2 : 1;
  475. if (isCombiner(text, i)) i = reCombiner.lastIndex;
  476. if (isZeroWidthJoiner(text, i)) return readCharacter(text, i + 1);
  477. return i;
  478. }
  479. // We avoid more expensive regex tests involving Unicode property classes by
  480. // first checking for the common case of 7-bit ASCII characters.
  481. function isAscii(text, i) {
  482. return text.charCodeAt(i) < 0x80;
  483. }
  484. function isSurrogatePair(text, i) {
  485. const hi = text.charCodeAt(i);
  486. if (hi >= 0xd800 && hi < 0xdc00) {
  487. const lo = text.charCodeAt(i + 1);
  488. return lo >= 0xdc00 && lo < 0xe000;
  489. }
  490. return false;
  491. }
  492. function isZeroWidthJoiner(text, i) {
  493. return text.charCodeAt(i) === 0x200d;
  494. }
  495. function isCombiner(text, i) {
  496. return isAscii(text, i) ? false : ((reCombiner.lastIndex = i), reCombiner.test(text));
  497. }
  498. function isPictographic(text, i) {
  499. return isAscii(text, i) ? false : ((rePictographic.lastIndex = i), rePictographic.test(text));
  500. }