Extended maintenance of Ruby 1.9.3 ended on February 23, 2015. Read more

In Files

  • iconv/iconv.c

Iconv

Summary

Ruby extension for charset conversion.

Abstract

Iconv is a wrapper class for the UNIX 95 iconv() function family, which translates string between various encoding systems.

See Open Group's on-line documents for more details.

Which coding systems are available is platform-dependent.

Examples

  1. Simple conversion between two charsets.

    converted_text = Iconv.conv('iso-8859-15', 'utf-8', text)
    
  2. Instantiate a new Iconv and use method #iconv.

    cd = Iconv.new(to, from)
    begin
      input.each { |s| output << cd.iconv(s) }
      output << cd.iconv(nil)                   # Don't forget this!
    ensure
      cd.close
    end
    
  3. Invoke ::open with a block.

    Iconv.open(to, from) do |cd|
      input.each { |s| output << cd.iconv(s) }
      output << cd.iconv(nil)
    end
    
  4. Shorthand for (3).

    Iconv.iconv(to, from, *input.to_a)
    

Attentions

Even if some extentions of implementation dependent are useful, DON'T USE those extentions in libraries and scripts to widely distribute. If you want to use those feature, use String#encode.

Public Class Methods

charset_map click to toggle source

Returns the map from canonical name to system dependent name.

 
               static VALUE
charset_map_get(void)
{
    return charset_map;
}
            
conv(to, from, str) click to toggle source

Shorthand for

Iconv.iconv(to, from, str).join

See ::iconv.

 
               static VALUE
iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str)
{
    struct iconv_env_t arg;

    arg.argc = 1;
    arg.argv = &str;
    arg.append = rb_str_append;
    arg.ret = rb_str_new(0, 0);
    arg.cd = iconv_create(to, from, NULL, &arg.toidx);
    return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}
            
ctlmethods => array click to toggle source

Returns available iconvctl() method list.

 
               static VALUE
iconv_s_ctlmethods(VALUE klass)
{
    VALUE ary = rb_ary_new();
#ifdef ICONV_TRIVIALP
    rb_ary_push(ary, ID2SYM(rb_intern("trivial?")));
#endif
#ifdef ICONV_GET_TRANSLITERATE
    rb_ary_push(ary, ID2SYM(rb_intern("transliterate?")));
#endif
#ifdef ICONV_SET_TRANSLITERATE
    rb_ary_push(ary, ID2SYM(rb_intern("transliterate=")));
#endif
#ifdef ICONV_GET_DISCARD_ILSEQ
    rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?")));
#endif
#ifdef ICONV_SET_DISCARD_ILSEQ
    rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq=")));
#endif
    return ary;
}
            
iconv(to, from, *strs) click to toggle source

Shorthand for

Iconv.open(to, from) { |cd|
  (strs + [nil]).collect { |s| cd.iconv(s) }
}

Parameters

to, from

see ::new

strs

strings to be converted

Exceptions

Exceptions thrown by ::new, ::open and #iconv.

 
               static VALUE
iconv_s_iconv(int argc, VALUE *argv, VALUE self)
{
    struct iconv_env_t arg;

    if (argc < 2)               /* needs `to' and `from' arguments at least */
        rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2);

    arg.argc = argc -= 2;
    arg.argv = argv + 2;
    arg.append = rb_ary_push;
    arg.ret = rb_ary_new2(argc);
    arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx);
    return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}
            
list {|*aliases| ... } click to toggle source

Iterates each alias sets.

 
               static VALUE
iconv_s_list(void)
{
#ifdef HAVE_ICONVLIST
    int state;
    VALUE args[2];

    args[1] = rb_block_given_p() ? 0 : rb_ary_new();
    iconvlist(list_iconv, args);
    state = *(int *)args;
    if (state) rb_jump_tag(state);
    if (args[1]) return args[1];
#elif defined(HAVE___ICONV_FREE_LIST)
    char **list;
    size_t sz, i;
    VALUE ary;

    if (__iconv_get_list(&list, &sz)) return Qnil;

    ary = rb_ary_new2(sz);
    for (i = 0; i < sz; i++) {
        rb_ary_push(ary, rb_str_new2(list[i]));
    }
    __iconv_free_list(list, sz);

    if (!rb_block_given_p())
        return ary;
    for (i = 0; i < RARRAY_LEN(ary); i++) {
        rb_yield(RARRAY_PTR(ary)[i]);
    }
#endif
    return Qnil;
}
            
new(to, from, [options]) click to toggle source

Creates new code converter from a coding-system designated with from to another one designated with to.

Parameters

to

encoding name for destination

from

encoding name for source

options

options for converter

Exceptions

TypeError

if to or from aren't String

InvalidEncoding

if designated converter couldn't find out

SystemCallError

if iconv_open(3) fails

 
               static VALUE
iconv_initialize(int argc, VALUE *argv, VALUE self)
{
    VALUE to, from, options;
    struct rb_iconv_opt_t opt;
    int idx;

    rb_scan_args(argc, argv, "21", &to, &from, &options);
    get_iconv_opt(&opt, options);
    iconv_free(check_iconv(self));
    DATA_PTR(self) = NULL;
    DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx));
    if (idx >= 0) ENCODING_SET(self, idx);
    return self;
}
            
open(to, from) { |iconv| ... } click to toggle source

Equivalent to ::new except that when it is called with a block, it yields with the new instance and closes it, and returns the result which returned from the block.

 
               static VALUE
iconv_s_open(int argc, VALUE *argv, VALUE self)
{
    VALUE to, from, options, cd;
    struct rb_iconv_opt_t opt;
    int idx;

    rb_scan_args(argc, argv, "21", &to, &from, &options);
    get_iconv_opt(&opt, options);
    cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx));

    self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
    if (idx >= 0) ENCODING_SET(self, idx);

    if (rb_block_given_p()) {
        return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
    }
    else {
        return self;
    }
}
            

Public Instance Methods

close() click to toggle source

Finishes conversion.

After calling this, calling #iconv will cause an exception, but multiple calls of close are guaranteed to end successfully.

Returns a string containing the byte sequence to change the output buffer to its initial shift state.

 
               static VALUE
iconv_finish(VALUE self)
{
    VALUE cd = check_iconv(self);

    if (!cd) return Qnil;
    return rb_ensure(iconv_init_state, self, iconv_free, cd);
}
            
conv(str...) click to toggle source

Equivalent to

iconv(nil, str..., nil).join
 
               static VALUE
iconv_conv(int argc, VALUE *argv, VALUE self)
{
    iconv_t cd = VALUE2ICONV(check_iconv(self));
    VALUE str, s;
    int toidx = ENCODING_GET(self);

    str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
    if (argc > 0) {
        do {
            s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL);
            if (RSTRING_LEN(s))
                rb_str_buf_append(str, s);
        } while (--argc);
        s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
        if (RSTRING_LEN(s))
            rb_str_buf_append(str, s);
    }

    return str;
}
            
discard_ilseq = flag click to toggle source

Sets discard_ilseq flag.

 
               static VALUE
iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq)
{
    int dis = RTEST(discard_ilseq);
    iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis);
    return self;
}
            
discard_ilseq? click to toggle source

Returns discard_ilseq flag.

 
               static VALUE
iconv_get_discard_ilseq(VALUE self)
{
    int dis = 0;
    iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis);
    if (dis) return Qtrue;
    return Qfalse;
}
            
iconv(str, start=0, length=-1) click to toggle source

Converts string and returns the result.

  • If str is a String, converts str[start, length] and returns the converted string.

  • If str is nil, places converter itself into initial shift state and just returns a string containing the byte sequence to change the output buffer to its initial shift state.

  • Otherwise, raises an exception.

Parameters

str

string to be converted, or nil

start

starting offset

length

conversion length; nil or -1 means whole the string from start

Exceptions

  • IconvIllegalSequence

  • IconvInvalidCharacter

  • IconvOutOfRange

Examples

See the Iconv documentation.

 
               static VALUE
iconv_iconv(int argc, VALUE *argv, VALUE self)
{
    VALUE str, n1, n2;
    VALUE cd = check_iconv(self);
    long start = 0, length = 0, slen = 0;

    rb_scan_args(argc, argv, "12", &str, &n1, &n2);
    if (!NIL_P(str)) {
        VALUE n = rb_str_length(StringValue(str));
        slen = NUM2LONG(n);
    }
    if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
        if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
            length = NIL_P(n2) ? -1 : NUM2LONG(n2);
        }
    }
    if (start > 0 || length > 0) {
        rb_encoding *enc = rb_enc_get(str);
        const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
        const char *ps = s;
        if (start > 0) {
            start = (ps = rb_enc_nth(s, e, start, enc)) - s;
        }
        if (length > 0) {
            length = rb_enc_nth(ps, e, length, enc) - ps;
        }
    }

    return iconv_convert(VALUE2ICONV(cd), str, start, length, ENCODING_GET(self), NULL);
}
            
transliterate = flag click to toggle source

Sets transliterate flag.

 
               static VALUE
iconv_set_transliterate(VALUE self, VALUE transliterate)
{
    int trans = RTEST(transliterate);
    iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans);
    return self;
}
            
transliterate? click to toggle source

Returns transliterate flag.

 
               static VALUE
iconv_get_transliterate(VALUE self)
{
    int trans = 0;
    iconv_ctl(self, ICONV_GET_TRANSLITERATE, trans);
    if (trans) return Qtrue;
    return Qfalse;
}
            
trivial? click to toggle source

Returns trivial flag.

 
               static VALUE
iconv_trivialp(VALUE self)
{
    int trivial = 0;
    iconv_ctl(self, ICONV_TRIVIALP, trivial);
    if (trivial) return Qtrue;
    return Qfalse;
}
            

Commenting is here to help enhance the documentation. For example, code samples, or clarification of the documentation.

If you have questions about Ruby or the documentation, please post to one of the Ruby mailing lists. You will get better, faster, help that way.

If you wish to post a correction of the docs, please do so, but also file bug report so that it can be corrected for the next release. Thank you.

If you want to help improve the Ruby documentation, please visit Documenting-ruby.org.