module URLEscape
Public Class Methods
escape(p1)
click to toggle source
static VALUE escape(VALUE self, VALUE str) { char* buf; int len; VALUE outstr; int i; unsigned char byte_two, byte_three; StringValue(str); buf = RSTRING_PTR(str); len = RSTRING_LEN(str); outstr = rb_str_buf_new(len); for(i = 0; i < len;) { const unsigned char byte_one = buf[i]; /* (UTF-8 escape, 0x0000-0x007F) */ if(byte_one < 0x80) { if(valid_literal(byte_one)) { rb_str_buf_cat(outstr, buf+i, 1); } else if(byte_one == ' ') { // a + or %20 replacement (depending on const plus assignment) rb_str_buf_cat(outstr, plus, 1); } else { // It's ascii but needs encoding rb_str_buf_cat(outstr, hex_table[byte_one], 3); } i++; continue; } // Ok, there are UTF-8 prefix bytes, so we need to interpret // them. // // If we have at least one extra byte to consume if(i + 1 < len) { byte_two = buf[i + 1]; /* (UTF-8 escape, 0x0080-0x07FF) */ if(0xc0 <= byte_one && byte_one <= 0xdf && 0x80 <= byte_two && byte_two <= 0xbf) { rb_str_buf_cat(outstr, hex_table[byte_one], 3); rb_str_buf_cat(outstr, hex_table[byte_two], 3); i += 2; continue; // If we have at least two extra bytes to consume } else if(i + 2 < len) { byte_three = buf[i + 2]; /* (UTF-8 escape, 0x0800-0xFFFF) */ if(0xe0 == byte_one && 0x80 <= byte_two && byte_two <= 0xbf) { rb_str_buf_cat(outstr, hex_table[byte_one], 3); rb_str_buf_cat(outstr, hex_table[byte_two], 3); rb_str_buf_cat(outstr, hex_table[byte_three], 3); i += 3; continue; } } } /* (ISO Latin-1/2/? escape, 0x0080 - x00FF) */ if(0x80 <= byte_one) { rb_str_buf_cat(outstr, hex_table[byte_one], 3); } else { // Well crap. Just throw it in I guess... rb_str_buf_cat(outstr, hex_table[byte_one], 3); } i++; } return outstr; }
unescape(p1)
click to toggle source
static VALUE unescape(VALUE self, VALUE str) { const char* buf; const char* bufend; VALUE outstr; StringValue(str); buf = RSTRING_PTR(str); bufend = buf + RSTRING_LEN(str); outstr = rb_str_buf_new(RSTRING_LEN(str)); while(buf < bufend) { if(buf[0] == '%' && buf + 2 <= bufend) { char high = hex(buf[1]); char low = hex(buf[2]); if(high >= 0 && low >= 0) { const char byte = low + (high << 4); rb_str_buf_cat(outstr, &byte, 1); buf += 3; continue; } } if(buf[0] == '+') { rb_str_buf_cat(outstr, space, 1); } else { rb_str_buf_cat(outstr, buf, 1); } buf++; } return outstr; }