Truncation by display width

I would appreciate any advice related to truncation methods.

There are emojis that consist of many characters (code points) and bytes, which makes them long in terms of character or byte count. However, their display width is only two. Due to this issue, I am seeking a truncation method that truncates strings based on their display width.

Is there already a good method for this purpose, or should I develop a new one?

Inspired by String#truncate() and String#truncate_bytes() in Active Support Core Extensions, I am developing the following methods. However, if there is already a better method, I will adopt it.

I will appreciate your comments.

# coding: utf-8
require 'minitest/unit'
require 'unicode/display_width'

module TruncateBy

  # Returns the length of a string truncated to length <tt>truncate_to</tt>,
  # or nil if +string+ is not longer than <tt>truncate_to</tt>;
  #
  #   TruncateBy.truncation_size('12345678', 7) {|grapheme| grapheme.size}
  #   # => 7
  #   TruncateBy.truncation_size('12345678', 8) {|grapheme| grapheme.size}
  #   # => nil
  #
  # The unit of return values is the number of characters and
  # the unit of <tt>truncate_to</tt> must be the same as the unit of what the block returns:
  #
  #   'πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½'.bytesize
  #   # => 24
  #   TruncateBy.truncation_size('πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½', 16) {|grapheme| grapheme.bytesize}
  #   # => 4 # the bytesize of the sum of the first 4 characters is 16.
  #   'πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½'[0, 4]
  #   # => "πŸ‘πŸ½πŸ‘πŸ½"
  #   'πŸ‘πŸ½πŸ‘πŸ½'.bytesize
  #   # => 16 # equal to truncate_to
  #
  # Pass <tt>:omission_size</tt> to make room for an omission string:
  #
  #   TruncateBy.truncation_size('12345678', 7, omission_size: 3) {|grapheme| grapheme.size}
  #   # => 4
  #
  # The unit of <tt>:omission_size</tt> must be the same as the unit of what the block returns.
  #
  # Not to break grapheme clusters, a grapheme cluster is given to the block each time and
  # the size of string after truncation can be less than <tt>truncate_to</tt>.
  #
  #   TruncateBy.truncation_size('πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½', 20) {|grapheme| grapheme.bytesize}
  #   # => 4
  #   'πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½'[0, 4]
  #   # => "πŸ‘πŸ½πŸ‘πŸ½" # string after truncation
  #   'πŸ‘πŸ½πŸ‘πŸ½'.bytesize
  #   # => 16 # less than truncate_to, which is 20
  def truncation_size(string, truncate_to, omission_size: 0)
    size = 0
    size_chr = 0

    string.each_grapheme_cluster do |grapheme|
      grapheme_size = yield(grapheme)
      size += grapheme_size
      if size <= truncate_to - omission_size
        size_chr += grapheme.size
      elsif size > truncate_to
        return size_chr
      end
    end

    nil
  end

  # Truncates a given +string+ to length <tt>truncate_to</tt> if +string+ is longer than <tt>truncate_to</tt>,
  # and the length is counted based on given block:
  #
  #   TruncateBy.truncate_by('Once upon a time in a world far far away', 27) {|grapheme| grapheme.size}
  #   # => "Once upon a time in a worl…"
  #
  # Not to break grapheme clusters, a grapheme cluster is given to the block each time and a string returned
  # can be shorter than the possible maximum length designated by <tt>truncate_to</tt>.
  #
  #   'πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½'.size
  #   # => 6
  #   TruncateBy.truncate_by('πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½', 4) {|grapheme| grapheme.size}
  #   # => "πŸ‘πŸ½β€¦"
  #   "πŸ‘πŸ½β€¦".size
  #   # => 3 # shoter than 4
  #
  # The tail will be the <tt>:omission</tt> string (defaults to "…").
  #
  #   TruncateBy.truncate_by('Once upon a time in a world far far away', 27, omission: '->') {|grapheme| grapheme.size}
  #   # => "Once upon a time in a wor->"
  #
  #   TruncateBy.truncate_by('Once upon a time in a world far far away', 27, omission: nil) {|grapheme| grapheme.size}
  #   # => "Once upon a time in a world"
  #
  # Raises +ArgumentError+ when the length of <tt>:omission</tt> exceeds <tt>truncate_to</tt>.
  #
  # Pass a string or regexp <tt>:separator</tt> to truncate +string+ at a natural break:
  #
  #   TruncateBy.truncate_by('Once upon a time in a world far far away', 27, separator: ' ') {|grapheme| grapheme.size}
  #   # => "Once upon a time in a…"
  #
  #   TruncateBy.truncate_by('Once upon a time in a world far far away', 27, separator: /\s/) {|grapheme| grapheme.size}
  #   # => "Once upon a time in a…"
  def truncate_by(string, truncate_to, omission: "…", separator: nil, &block)
    omission ||= ""
    omission_size = yield(omission)

    case
    when omission_size > truncate_to
      raise ArgumentError, "Omission #{omission.inspect} is #{omission.bytesize}, larger than the truncation length of #{truncate_to} bytes"
    else
      if size_chr =
         TruncateBy.truncation_size(string, truncate_to,
                                      omission_size: omission_size,
                                      &block)
        stop = (separator && string.rindex(separator, size_chr)) ||
               size_chr
        +"#{string[0, stop]}#{omission}"
      else
        string.dup
      end
    end
  end

  module_function :truncation_size, :truncate_by
end

class TmpTest < Minitest::Test

  def truncate_by_char(string, truncate_to, **kwargs)
    if string.size <= truncate_to
      string.dup
    else
      TruncateBy.truncate_by(string, truncate_to, **kwargs) do |grapheme|
        grapheme.size
      end
    end
  end

  def test_truncate_by_char
    assert_equal "Hello World!", truncate_by_char("Hello World!", 12)
    assert_equal "Hello World…", truncate_by_char("Hello World!!", 12)
    assert_equal "Hello W[...]", truncate_by_char("Hello World!!", 12, omission: "[...]")
    assert_equal "Hello…", truncate_by_char("Hello World!!", 12, separator: " ")
    assert_equal "Hello…", truncate_by_char("Hello World!!", 12, separator: /\s/)
    assert_equal "Hello  …", truncate_by_char("Hello   World!!", 12, separator: " ")
    assert_equal "Hello…",  truncate_by_char("Hello   World!!", 12, separator: /(?<!\s)\s+/)
    assert_equal "…",  truncate_by_char(" HelloWorld!!", 12, separator: " ")
    assert_equal "…",  truncate_by_char(" HelloWorld!!", 12, separator: /(?<!\s)\s+/)

    assert_equal 6, "πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½".size
    assert_equal "πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½", truncate_by_char("πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½", 6)
    assert_equal "πŸ‘πŸ½πŸ‘πŸ½β€¦", truncate_by_char("πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½", 5)
  end


  def truncate_by_byte(string, truncate_to, **kwargs)
    if string.bytesize <= truncate_to
      string.dup
    else
      TruncateBy.truncate_by(string, truncate_to, **kwargs) do |grapheme|
        grapheme.bytesize
      end
    end
  end

  def test_truncate_by_byte
    assert_equal 24, "πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½".bytesize
    assert_equal "πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½", truncate_by_byte("πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½", 24)
    assert_equal "πŸ‘πŸ½πŸ‘πŸ½β€¦", truncate_by_byte("πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½", 23)
  end


  def truncate_by_display_width(*args, **kwargs)
    display_width_opt =
      kwargs.delete(:display_width) || {ambiguous: 2,
                                        emoji: true}
    display_width =
      Unicode::DisplayWidth.new(**display_width_opt)
    TruncateBy.truncate_by(*args, **kwargs) do |grapheme|
      display_width.of grapheme
    end
  end

  def test_truncate_by_display_width
    assert_equal "πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½", truncate_by_display_width("πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½", 6, omission: nil)
    assert_equal "πŸ‘πŸ½πŸ‘πŸ½", truncate_by_display_width("πŸ‘πŸ½πŸ‘πŸ½πŸ‘πŸ½", 5, omission: nil)
  end
end