| /* |
| * Copyright (C) 2005, 2007, 2008 Apple Inc. All rights reserved. |
| * Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of |
| * its contributors may be used to endorse or promote products derived |
| * from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY |
| * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #import "WebNSURLExtras.h" |
| |
| #import "WebKitNSStringExtras.h" |
| #import "WebLocalizableStrings.h" |
| #import "WebNSDataExtras.h" |
| #import "WebNSObjectExtras.h" |
| #import "WebSystemInterface.h" |
| #import <Foundation/NSURLRequest.h> |
| #import <WebCore/KURL.h> |
| #import <WebCore/LoaderNSURLExtras.h> |
| #import <WebKitSystemInterface.h> |
| #import <wtf/Assertions.h> |
| #import <unicode/uchar.h> |
| #import <unicode/uidna.h> |
| #import <unicode/uscript.h> |
| |
| using namespace WebCore; |
| using namespace WTF; |
| |
| typedef void (* StringRangeApplierFunction)(NSString *string, NSRange range, void *context); |
| |
| // Needs to be big enough to hold an IDN-encoded name. |
| // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK. |
| #define HOST_NAME_BUFFER_LENGTH 2048 |
| |
| #define URL_BYTES_BUFFER_LENGTH 2048 |
| |
| static pthread_once_t IDNScriptWhiteListFileRead = PTHREAD_ONCE_INIT; |
| static uint32_t IDNScriptWhiteList[(USCRIPT_CODE_LIMIT + 31) / 32]; |
| |
| static inline BOOL isLookalikeCharacter(int charCode) |
| { |
| // FIXME: Move this code down into WebCore so it can be shared with other platforms. |
| |
| // This function treats the following as unsafe, lookalike characters: |
| // any non-printable character, any character considered as whitespace that isn't already converted to a space by ICU, |
| // and any ignorable character. |
| |
| // We also considered the characters in Mozilla's blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars), |
| // and included all of these characters that ICU can encode. |
| |
| if (!u_isprint(charCode) || u_isUWhiteSpace(charCode) || u_hasBinaryProperty(charCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) |
| return YES; |
| |
| switch (charCode) { |
| case 0x01C3: /* LATIN LETTER RETROFLEX CLICK */ |
| case 0x0337: /* COMBINING SHORT SOLIDUS OVERLAY */ |
| case 0x0338: /* COMBINING LONG SOLIDUS OVERLAY */ |
| case 0x05B4: /* HEBREW POINT HIRIQ */ |
| case 0x05BC: /* HEBREW POINT DAGESH OR MAPIQ */ |
| case 0x05C3: /* HEBREW PUNCTUATION SOF PASUQ */ |
| case 0x05F4: /* HEBREW PUNCTUATION GERSHAYIM */ |
| case 0x0660: /* ARABIC INDIC DIGIT ZERO */ |
| case 0x06D4: /* ARABIC FULL STOP */ |
| case 0x06F0: /* EXTENDED ARABIC INDIC DIGIT ZERO */ |
| case 0x2027: /* HYPHENATION POINT */ |
| case 0x2039: /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ |
| case 0x203A: /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ |
| case 0x2044: /* FRACTION SLASH */ |
| case 0x2215: /* DIVISION SLASH */ |
| case 0x23ae: /* INTEGRAL EXTENSION */ |
| case 0x2571: /* BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT */ |
| case 0x29F8: /* BIG SOLIDUS */ |
| case 0x29f6: /* SOLIDUS WITH OVERBAR */ |
| case 0x2AFB: /* TRIPLE SOLIDUS BINARY RELATION */ |
| case 0x2AFD: /* DOUBLE SOLIDUS OPERATOR */ |
| case 0x3008: /* LEFT ANGLE BRACKET */ |
| case 0x3014: /* LEFT TORTOISE SHELL BRACKET */ |
| case 0x3015: /* RIGHT TORTOISE SHELL BRACKET */ |
| case 0x3033: /* VERTICAL KANA REPEAT MARK UPPER HALF */ |
| case 0x321D: /* PARENTHESIZED KOREAN CHARACTER OJEON */ |
| case 0x321E: /* PARENTHESIZED KOREAN CHARACTER O HU */ |
| case 0x33DF: /* SQUARE A OVER M */ |
| case 0xFE14: /* PRESENTATION FORM FOR VERTICAL SEMICOLON */ |
| case 0xFE15: /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */ |
| case 0xFE3F: /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */ |
| case 0xFE5D: /* SMALL LEFT TORTOISE SHELL BRACKET */ |
| case 0xFE5E: /* SMALL RIGHT TORTOISE SHELL BRACKET */ |
| return YES; |
| default: |
| return NO; |
| } |
| } |
| |
| static char hexDigit(int i) |
| { |
| if (i < 0 || i > 16) { |
| LOG_ERROR("illegal hex digit"); |
| return '0'; |
| } |
| int h = i; |
| if (h >= 10) { |
| h = h - 10 + 'A'; |
| } |
| else { |
| h += '0'; |
| } |
| return h; |
| } |
| |
| static BOOL isHexDigit(char c) |
| { |
| return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); |
| } |
| |
| static int hexDigitValue(char c) |
| { |
| if (c >= '0' && c <= '9') { |
| return c - '0'; |
| } |
| if (c >= 'A' && c <= 'F') { |
| return c - 'A' + 10; |
| } |
| if (c >= 'a' && c <= 'f') { |
| return c - 'a' + 10; |
| } |
| LOG_ERROR("illegal hex digit"); |
| return 0; |
| } |
| |
| static void applyHostNameFunctionToMailToURLString(NSString *string, StringRangeApplierFunction f, void *context) |
| { |
| // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' character. |
| // Skip quoted strings so that characters in them don't confuse us. |
| // When we find a '?' character, we are past the part of the URL that contains host names. |
| |
| static NSCharacterSet *hostNameOrStringStartCharacters; |
| if (hostNameOrStringStartCharacters == nil) { |
| hostNameOrStringStartCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"@?"]; |
| CFRetain(hostNameOrStringStartCharacters); |
| } |
| static NSCharacterSet *hostNameEndCharacters; |
| if (hostNameEndCharacters == nil) { |
| hostNameEndCharacters = [NSCharacterSet characterSetWithCharactersInString:@">,?"]; |
| CFRetain(hostNameEndCharacters); |
| } |
| static NSCharacterSet *quotedStringCharacters; |
| if (quotedStringCharacters == nil) { |
| quotedStringCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"\\"]; |
| CFRetain(quotedStringCharacters); |
| } |
| |
| unsigned stringLength = [string length]; |
| NSRange remaining = NSMakeRange(0, stringLength); |
| |
| while (1) { |
| // Find start of host name or of quoted string. |
| NSRange hostNameOrStringStart = [string rangeOfCharacterFromSet:hostNameOrStringStartCharacters options:0 range:remaining]; |
| if (hostNameOrStringStart.location == NSNotFound) { |
| return; |
| } |
| unichar c = [string characterAtIndex:hostNameOrStringStart.location]; |
| remaining.location = NSMaxRange(hostNameOrStringStart); |
| remaining.length = stringLength - remaining.location; |
| |
| if (c == '?') { |
| return; |
| } |
| |
| if (c == '@') { |
| // Find end of host name. |
| unsigned hostNameStart = remaining.location; |
| NSRange hostNameEnd = [string rangeOfCharacterFromSet:hostNameEndCharacters options:0 range:remaining]; |
| BOOL done; |
| if (hostNameEnd.location == NSNotFound) { |
| hostNameEnd.location = stringLength; |
| done = YES; |
| } else { |
| remaining.location = hostNameEnd.location; |
| remaining.length = stringLength - remaining.location; |
| done = NO; |
| } |
| |
| // Process host name range. |
| f(string, NSMakeRange(hostNameStart, hostNameEnd.location - hostNameStart), context); |
| |
| if (done) { |
| return; |
| } |
| } else { |
| // Skip quoted string. |
| ASSERT(c == '"'); |
| while (1) { |
| NSRange escapedCharacterOrStringEnd = [string rangeOfCharacterFromSet:quotedStringCharacters options:0 range:remaining]; |
| if (escapedCharacterOrStringEnd.location == NSNotFound) { |
| return; |
| } |
| c = [string characterAtIndex:escapedCharacterOrStringEnd.location]; |
| remaining.location = NSMaxRange(escapedCharacterOrStringEnd); |
| remaining.length = stringLength - remaining.location; |
| |
| // If we are the end of the string, then break from the string loop back to the host name loop. |
| if (c == '"') { |
| break; |
| } |
| |
| // Skip escaped character. |
| ASSERT(c == '\\'); |
| if (remaining.length == 0) { |
| return; |
| } |
| remaining.location += 1; |
| remaining.length -= 1; |
| } |
| } |
| } |
| } |
| |
| static void applyHostNameFunctionToURLString(NSString *string, StringRangeApplierFunction f, void *context) |
| { |
| // Find hostnames. Too bad we can't use any real URL-parsing code to do this, |
| // but we have to do it before doing all the %-escaping, and this is the only |
| // code we have that parses mailto URLs anyway. |
| |
| // Maybe we should implement this using a character buffer instead? |
| |
| if ([string _webkit_hasCaseInsensitivePrefix:@"mailto:"]) { |
| applyHostNameFunctionToMailToURLString(string, f, context); |
| return; |
| } |
| |
| // Find the host name in a hierarchical URL. |
| // It comes after a "://" sequence, with scheme characters preceding. |
| // If ends with the end of the string or a ":", "/", or a "?". |
| // If there is a "@" character, the host part is just the part after the "@". |
| NSRange separatorRange = [string rangeOfString:@"://"]; |
| if (separatorRange.location == NSNotFound) { |
| return; |
| } |
| |
| // Check that all characters before the :// are valid scheme characters. |
| static NSCharacterSet *nonSchemeCharacters; |
| if (nonSchemeCharacters == nil) { |
| nonSchemeCharacters = [[NSCharacterSet characterSetWithCharactersInString:@"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-."] invertedSet]; |
| CFRetain(nonSchemeCharacters); |
| } |
| if ([string rangeOfCharacterFromSet:nonSchemeCharacters options:0 range:NSMakeRange(0, separatorRange.location)].location != NSNotFound) { |
| return; |
| } |
| |
| unsigned stringLength = [string length]; |
| |
| static NSCharacterSet *hostTerminators; |
| if (hostTerminators == nil) { |
| hostTerminators = [NSCharacterSet characterSetWithCharactersInString:@":/?#"]; |
| CFRetain(hostTerminators); |
| } |
| |
| // Start after the separator. |
| unsigned authorityStart = NSMaxRange(separatorRange); |
| |
| // Find terminating character. |
| NSRange hostNameTerminator = [string rangeOfCharacterFromSet:hostTerminators options:0 range:NSMakeRange(authorityStart, stringLength - authorityStart)]; |
| unsigned hostNameEnd = hostNameTerminator.location == NSNotFound ? stringLength : hostNameTerminator.location; |
| |
| // Find "@" for the start of the host name. |
| NSRange userInfoTerminator = [string rangeOfString:@"@" options:0 range:NSMakeRange(authorityStart, hostNameEnd - authorityStart)]; |
| unsigned hostNameStart = userInfoTerminator.location == NSNotFound ? authorityStart : NSMaxRange(userInfoTerminator); |
| |
| f(string, NSMakeRange(hostNameStart, hostNameEnd - hostNameStart), context); |
| } |
| |
| @implementation NSURL (WebNSURLExtras) |
| |
| static void collectRangesThatNeedMapping(NSString *string, NSRange range, void *context, BOOL encode) |
| { |
| BOOL needsMapping = encode |
| ? [string _web_hostNameNeedsEncodingWithRange:range] |
| : [string _web_hostNameNeedsDecodingWithRange:range]; |
| if (!needsMapping) { |
| return; |
| } |
| |
| NSMutableArray **array = (NSMutableArray **)context; |
| if (*array == nil) { |
| *array = [[NSMutableArray alloc] init]; |
| } |
| |
| [*array addObject:[NSValue valueWithRange:range]]; |
| } |
| |
| static void collectRangesThatNeedEncoding(NSString *string, NSRange range, void *context) |
| { |
| return collectRangesThatNeedMapping(string, range, context, YES); |
| } |
| |
| static void collectRangesThatNeedDecoding(NSString *string, NSRange range, void *context) |
| { |
| return collectRangesThatNeedMapping(string, range, context, NO); |
| } |
| |
| static NSString *mapHostNames(NSString *string, BOOL encode) |
| { |
| // Generally, we want to optimize for the case where there is one host name that does not need mapping. |
| |
| if (encode && [string canBeConvertedToEncoding:NSASCIIStringEncoding]) |
| return string; |
| |
| // Make a list of ranges that actually need mapping. |
| NSMutableArray *hostNameRanges = nil; |
| StringRangeApplierFunction f = encode |
| ? collectRangesThatNeedEncoding |
| : collectRangesThatNeedDecoding; |
| applyHostNameFunctionToURLString(string, f, &hostNameRanges); |
| if (hostNameRanges == nil) |
| return string; |
| |
| // Do the mapping. |
| NSMutableString *mutableCopy = [string mutableCopy]; |
| unsigned i = [hostNameRanges count]; |
| while (i-- != 0) { |
| NSRange hostNameRange = [[hostNameRanges objectAtIndex:i] rangeValue]; |
| NSString *mappedHostName = encode |
| ? [string _web_encodeHostNameWithRange:hostNameRange] |
| : [string _web_decodeHostNameWithRange:hostNameRange]; |
| [mutableCopy replaceCharactersInRange:hostNameRange withString:mappedHostName]; |
| } |
| [hostNameRanges release]; |
| return [mutableCopy autorelease]; |
| } |
| |
| + (NSURL *)_web_URLWithUserTypedString:(NSString *)string relativeToURL:(NSURL *)URL |
| { |
| if (string == nil) { |
| return nil; |
| } |
| string = mapHostNames([string _webkit_stringByTrimmingWhitespace], YES); |
| |
| NSData *userTypedData = [string dataUsingEncoding:NSUTF8StringEncoding]; |
| ASSERT(userTypedData); |
| |
| const UInt8 *inBytes = static_cast<const UInt8 *>([userTypedData bytes]); |
| int inLength = [userTypedData length]; |
| if (inLength == 0) { |
| return [NSURL URLWithString:@""]; |
| } |
| |
| char *outBytes = static_cast<char *>(malloc(inLength * 3)); // large enough to %-escape every character |
| char *p = outBytes; |
| int outLength = 0; |
| int i; |
| for (i = 0; i < inLength; i++) { |
| UInt8 c = inBytes[i]; |
| if (c <= 0x20 || c >= 0x7f) { |
| *p++ = '%'; |
| *p++ = hexDigit(c >> 4); |
| *p++ = hexDigit(c & 0xf); |
| outLength += 3; |
| } |
| else { |
| *p++ = c; |
| outLength++; |
| } |
| } |
| |
| NSData *data = [NSData dataWithBytesNoCopy:outBytes length:outLength]; // adopts outBytes |
| return [self _web_URLWithData:data relativeToURL:URL]; |
| } |
| |
| + (NSURL *)_web_URLWithUserTypedString:(NSString *)string |
| { |
| return [self _web_URLWithUserTypedString:string relativeToURL:nil]; |
| } |
| |
| + (NSURL *)_web_URLWithDataAsString:(NSString *)string |
| { |
| if (string == nil) { |
| return nil; |
| } |
| return [self _web_URLWithDataAsString:string relativeToURL:nil]; |
| } |
| |
| + (NSURL *)_web_URLWithDataAsString:(NSString *)string relativeToURL:(NSURL *)baseURL |
| { |
| if (string == nil) { |
| return nil; |
| } |
| string = [string _webkit_stringByTrimmingWhitespace]; |
| NSData *data = [string dataUsingEncoding:NSISOLatin1StringEncoding]; |
| return [self _web_URLWithData:data relativeToURL:baseURL]; |
| } |
| |
| + (NSURL *)_web_URLWithData:(NSData *)data |
| { |
| return [NSURL _web_URLWithData:data relativeToURL:nil]; |
| } |
| |
| + (NSURL *)_web_URLWithData:(NSData *)data relativeToURL:(NSURL *)baseURL |
| { |
| if (data == nil) |
| return nil; |
| |
| NSURL *result = nil; |
| size_t length = [data length]; |
| if (length > 0) { |
| // work around <rdar://4470771>: CFURLCreateAbsoluteURLWithBytes(.., TRUE) doesn't remove non-path components. |
| baseURL = [baseURL _webkit_URLByRemovingResourceSpecifier]; |
| |
| const UInt8 *bytes = static_cast<const UInt8*>([data bytes]); |
| // NOTE: We use UTF-8 here since this encoding is used when computing strings when returning URL components |
| // (e.g calls to NSURL -path). However, this function is not tolerant of illegal UTF-8 sequences, which |
| // could either be a malformed string or bytes in a different encoding, like shift-jis, so we fall back |
| // onto using ISO Latin 1 in those cases. |
| result = WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, bytes, length, kCFStringEncodingUTF8, (CFURLRef)baseURL, YES)); |
| if (!result) |
| result = WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, bytes, length, kCFStringEncodingISOLatin1, (CFURLRef)baseURL, YES)); |
| } else |
| result = [NSURL URLWithString:@""]; |
| |
| return result; |
| } |
| |
| - (NSData *)_web_originalData |
| { |
| UInt8 *buffer = (UInt8 *)malloc(URL_BYTES_BUFFER_LENGTH); |
| CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, URL_BYTES_BUFFER_LENGTH); |
| if (bytesFilled == -1) { |
| CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0); |
| buffer = (UInt8 *)realloc(buffer, bytesToAllocate); |
| bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, bytesToAllocate); |
| ASSERT(bytesFilled == bytesToAllocate); |
| } |
| |
| // buffer is adopted by the NSData |
| NSData *data = [NSData dataWithBytesNoCopy:buffer length:bytesFilled freeWhenDone:YES]; |
| |
| NSURL *baseURL = (NSURL *)CFURLGetBaseURL((CFURLRef)self); |
| if (baseURL) |
| return [[NSURL _web_URLWithData:data relativeToURL:baseURL] _web_originalData]; |
| return data; |
| } |
| |
| - (NSString *)_web_originalDataAsString |
| { |
| return [[[NSString alloc] initWithData:[self _web_originalData] encoding:NSISOLatin1StringEncoding] autorelease]; |
| } |
| |
| static CFStringRef createStringWithEscapedUnsafeCharacters(CFStringRef string) |
| { |
| CFIndex length = CFStringGetLength(string); |
| Vector<UChar, 2048> sourceBuffer(length); |
| CFStringGetCharacters(string, CFRangeMake(0, length), sourceBuffer.data()); |
| |
| Vector<UChar, 2048> outBuffer; |
| |
| CFIndex i = 0; |
| while (i < length) { |
| UChar32 c; |
| U16_NEXT(sourceBuffer, i, length, c) |
| |
| if (isLookalikeCharacter(c)) { |
| uint8_t utf8Buffer[4]; |
| CFIndex offset = 0; |
| UBool failure = false; |
| U8_APPEND(utf8Buffer, offset, 4, c, failure) |
| ASSERT(!failure); |
| |
| for (CFIndex j = 0; j < offset; ++j) { |
| outBuffer.append('%'); |
| outBuffer.append(hexDigit(utf8Buffer[j] >> 4)); |
| outBuffer.append(hexDigit(utf8Buffer[j] & 0xf)); |
| } |
| } else { |
| UChar utf16Buffer[2]; |
| CFIndex offset = 0; |
| UBool failure = false; |
| U16_APPEND(utf16Buffer, offset, 2, c, failure) |
| ASSERT(!failure); |
| for (CFIndex j = 0; j < offset; ++j) |
| outBuffer.append(utf16Buffer[j]); |
| } |
| } |
| |
| return CFStringCreateWithCharacters(NULL, outBuffer.data(), outBuffer.size()); |
| } |
| |
| - (NSString *)_web_userVisibleString |
| { |
| NSData *data = [self _web_originalData]; |
| const unsigned char *before = static_cast<const unsigned char*>([data bytes]); |
| int length = [data length]; |
| |
| bool needsHostNameDecoding = false; |
| |
| const unsigned char *p = before; |
| int bufferLength = (length * 3) + 1; |
| char *after = static_cast<char *>(malloc(bufferLength)); // large enough to %-escape every character |
| char *q = after; |
| int i; |
| for (i = 0; i < length; i++) { |
| unsigned char c = p[i]; |
| // unescape escape sequences that indicate bytes greater than 0x7f |
| if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) { |
| unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]); |
| if (u > 0x7f) { |
| // unescape |
| *q++ = u; |
| } else { |
| // do not unescape |
| *q++ = p[i]; |
| *q++ = p[i + 1]; |
| *q++ = p[i + 2]; |
| } |
| i += 2; |
| } else { |
| *q++ = c; |
| |
| // Check for "xn--" in an efficient, non-case-sensitive, way. |
| if (c == '-' && i >= 3 && !needsHostNameDecoding && (q[-4] | 0x20) == 'x' && (q[-3] | 0x20) == 'n' && q[-2] == '-') |
| needsHostNameDecoding = true; |
| } |
| } |
| *q = '\0'; |
| |
| // Check string to see if it can be converted to display using UTF-8 |
| NSString *result = [NSString stringWithUTF8String:after]; |
| if (!result) { |
| // Could not convert to UTF-8. |
| // Convert characters greater than 0x7f to escape sequences. |
| // Shift current string to the end of the buffer |
| // then we will copy back bytes to the start of the buffer |
| // as we convert. |
| int afterlength = q - after; |
| char *p = after + bufferLength - afterlength - 1; |
| memmove(p, after, afterlength + 1); // copies trailing '\0' |
| char *q = after; |
| while (*p) { |
| unsigned char c = *p; |
| if (c > 0x7f) { |
| *q++ = '%'; |
| *q++ = hexDigit(c >> 4); |
| *q++ = hexDigit(c & 0xf); |
| } else { |
| *q++ = *p; |
| } |
| p++; |
| } |
| *q = '\0'; |
| result = [NSString stringWithUTF8String:after]; |
| } |
| |
| free(after); |
| |
| result = mapHostNames(result, !needsHostNameDecoding); |
| return WebCFAutorelease(createStringWithEscapedUnsafeCharacters((CFStringRef)result)); |
| } |
| |
| - (BOOL)_web_isEmpty |
| { |
| if (!CFURLGetBaseURL((CFURLRef)self)) |
| return CFURLGetBytes((CFURLRef)self, NULL, 0) == 0; |
| return [[self _web_originalData] length] == 0; |
| } |
| |
| - (const char *)_web_URLCString |
| { |
| NSMutableData *data = [NSMutableData data]; |
| [data appendData:[self _web_originalData]]; |
| [data appendBytes:"\0" length:1]; |
| return (const char *)[data bytes]; |
| } |
| |
| - (NSURL *)_webkit_canonicalize |
| { |
| NSURLRequest *request = [[NSURLRequest alloc] initWithURL:self]; |
| Class concreteClass = WKNSURLProtocolClassForRequest(request); |
| if (!concreteClass) { |
| [request release]; |
| return self; |
| } |
| |
| // This applies NSURL's concept of canonicalization, but not KURL's concept. It would |
| // make sense to apply both, but when we tried that it caused a performance degradation |
| // (see 5315926). It might make sense to apply only the KURL concept and not the NSURL |
| // concept, but it's too risky to make that change for WebKit 3.0. |
| NSURLRequest *newRequest = [concreteClass canonicalRequestForRequest:request]; |
| NSURL *newURL = [newRequest URL]; |
| NSURL *result = [[newURL retain] autorelease]; |
| [request release]; |
| |
| return result; |
| } |
| |
| typedef struct { |
| NSString *scheme; |
| NSString *user; |
| NSString *password; |
| NSString *host; |
| CFIndex port; // kCFNotFound means ignore/omit |
| NSString *path; |
| NSString *query; |
| NSString *fragment; |
| } WebKitURLComponents; |
| |
| - (NSURL *)_webkit_URLByRemovingComponent:(CFURLComponentType)component |
| { |
| CFRange fragRg = CFURLGetByteRangeForComponent((CFURLRef)self, component, NULL); |
| // Check to see if a fragment exists before decomposing the URL. |
| if (fragRg.location == kCFNotFound) |
| return self; |
| |
| UInt8 *urlBytes, buffer[2048]; |
| CFIndex numBytes = CFURLGetBytes((CFURLRef)self, buffer, 2048); |
| if (numBytes == -1) { |
| numBytes = CFURLGetBytes((CFURLRef)self, NULL, 0); |
| urlBytes = static_cast<UInt8*>(malloc(numBytes)); |
| CFURLGetBytes((CFURLRef)self, urlBytes, numBytes); |
| } else |
| urlBytes = buffer; |
| |
| NSURL *result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, fragRg.location - 1, kCFStringEncodingUTF8, NULL)); |
| if (!result) |
| result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, fragRg.location - 1, kCFStringEncodingISOLatin1, NULL)); |
| |
| if (urlBytes != buffer) free(urlBytes); |
| return result ? [result autorelease] : self; |
| } |
| |
| - (NSURL *)_webkit_URLByRemovingFragment |
| { |
| return [self _webkit_URLByRemovingComponent:kCFURLComponentFragment]; |
| } |
| |
| - (NSURL *)_webkit_URLByRemovingResourceSpecifier |
| { |
| return [self _webkit_URLByRemovingComponent:kCFURLComponentResourceSpecifier]; |
| } |
| |
| - (BOOL)_webkit_isJavaScriptURL |
| { |
| return [[self _web_originalDataAsString] _webkit_isJavaScriptURL]; |
| } |
| |
| - (NSString *)_webkit_scriptIfJavaScriptURL |
| { |
| return [[self absoluteString] _webkit_scriptIfJavaScriptURL]; |
| } |
| |
| - (BOOL)_webkit_isFileURL |
| { |
| return [[self _web_originalDataAsString] _webkit_isFileURL]; |
| } |
| |
| - (BOOL)_webkit_isFTPDirectoryURL |
| { |
| return [[self _web_originalDataAsString] _webkit_isFTPDirectoryURL]; |
| } |
| |
| - (BOOL)_webkit_shouldLoadAsEmptyDocument |
| { |
| return [[self _web_originalDataAsString] _webkit_hasCaseInsensitivePrefix:@"about:"] || [self _web_isEmpty]; |
| } |
| |
| - (NSURL *)_web_URLWithLowercasedScheme |
| { |
| CFRange range; |
| CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &range); |
| if (range.location == kCFNotFound) { |
| return self; |
| } |
| |
| UInt8 static_buffer[URL_BYTES_BUFFER_LENGTH]; |
| UInt8 *buffer = static_buffer; |
| CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, URL_BYTES_BUFFER_LENGTH); |
| if (bytesFilled == -1) { |
| CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0); |
| buffer = static_cast<UInt8 *>(malloc(bytesToAllocate)); |
| bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, bytesToAllocate); |
| ASSERT(bytesFilled == bytesToAllocate); |
| } |
| |
| int i; |
| BOOL changed = NO; |
| for (i = 0; i < range.length; ++i) { |
| char c = buffer[range.location + i]; |
| char lower = toASCIILower(c); |
| if (c != lower) { |
| buffer[range.location + i] = lower; |
| changed = YES; |
| } |
| } |
| |
| NSURL *result = changed |
| ? (NSURL *)WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, buffer, bytesFilled, kCFStringEncodingUTF8, nil, YES)) |
| : (NSURL *)self; |
| |
| if (buffer != static_buffer) { |
| free(buffer); |
| } |
| |
| return result; |
| } |
| |
| |
| -(BOOL)_web_hasQuestionMarkOnlyQueryString |
| { |
| CFRange rangeWithSeparators; |
| CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentQuery, &rangeWithSeparators); |
| if (rangeWithSeparators.location != kCFNotFound && rangeWithSeparators.length == 1) { |
| return YES; |
| } |
| return NO; |
| } |
| |
| -(NSData *)_web_schemeSeparatorWithoutColon |
| { |
| NSData *result = nil; |
| CFRange rangeWithSeparators; |
| CFRange range = CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &rangeWithSeparators); |
| if (rangeWithSeparators.location != kCFNotFound) { |
| NSString *absoluteString = [self absoluteString]; |
| NSRange separatorsRange = NSMakeRange(range.location + range.length + 1, rangeWithSeparators.length - range.length - 1); |
| if (separatorsRange.location + separatorsRange.length <= [absoluteString length]) { |
| NSString *slashes = [absoluteString substringWithRange:separatorsRange]; |
| result = [slashes dataUsingEncoding:NSISOLatin1StringEncoding]; |
| } |
| } |
| return result; |
| } |
| |
| #define completeURL (CFURLComponentType)-1 |
| |
| -(NSData *)_web_dataForURLComponentType:(CFURLComponentType)componentType |
| { |
| static int URLComponentTypeBufferLength = 2048; |
| |
| UInt8 staticAllBytesBuffer[URLComponentTypeBufferLength]; |
| UInt8 *allBytesBuffer = staticAllBytesBuffer; |
| |
| CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, URLComponentTypeBufferLength); |
| if (bytesFilled == -1) { |
| CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0); |
| allBytesBuffer = static_cast<UInt8 *>(malloc(bytesToAllocate)); |
| bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, bytesToAllocate); |
| } |
| |
| CFRange range; |
| if (componentType != completeURL) { |
| range = CFURLGetByteRangeForComponent((CFURLRef)self, componentType, NULL); |
| if (range.location == kCFNotFound) { |
| return nil; |
| } |
| } |
| else { |
| range.location = 0; |
| range.length = bytesFilled; |
| } |
| |
| NSData *componentData = [NSData dataWithBytes:allBytesBuffer + range.location length:range.length]; |
| |
| const unsigned char *bytes = static_cast<const unsigned char *>([componentData bytes]); |
| NSMutableData *resultData = [NSMutableData data]; |
| // NOTE: add leading '?' to query strings non-zero length query strings. |
| // NOTE: retain question-mark only query strings. |
| if (componentType == kCFURLComponentQuery) { |
| if (range.length > 0 || [self _web_hasQuestionMarkOnlyQueryString]) { |
| [resultData appendBytes:"?" length:1]; |
| } |
| } |
| int i; |
| for (i = 0; i < range.length; i++) { |
| unsigned char c = bytes[i]; |
| if (c <= 0x20 || c >= 0x7f) { |
| char escaped[3]; |
| escaped[0] = '%'; |
| escaped[1] = hexDigit(c >> 4); |
| escaped[2] = hexDigit(c & 0xf); |
| [resultData appendBytes:escaped length:3]; |
| } |
| else { |
| char b[1]; |
| b[0] = c; |
| [resultData appendBytes:b length:1]; |
| } |
| } |
| |
| if (staticAllBytesBuffer != allBytesBuffer) { |
| free(allBytesBuffer); |
| } |
| |
| return resultData; |
| } |
| |
| -(NSData *)_web_schemeData |
| { |
| return [self _web_dataForURLComponentType:kCFURLComponentScheme]; |
| } |
| |
| -(NSData *)_web_hostData |
| { |
| NSData *result = [self _web_dataForURLComponentType:kCFURLComponentHost]; |
| NSData *scheme = [self _web_schemeData]; |
| // Take off localhost for file |
| if ([scheme _web_isCaseInsensitiveEqualToCString:"file"]) { |
| return ([result _web_isCaseInsensitiveEqualToCString:"localhost"]) ? nil : result; |
| } |
| return result; |
| } |
| |
| - (NSString *)_web_hostString |
| { |
| NSData *data = [self _web_hostData]; |
| if (!data) { |
| data = [NSData data]; |
| } |
| return [[[NSString alloc] initWithData:[self _web_hostData] encoding:NSUTF8StringEncoding] autorelease]; |
| } |
| |
| - (NSString *)_webkit_suggestedFilenameWithMIMEType:(NSString *)MIMEType |
| { |
| return suggestedFilenameWithMIMEType(self, MIMEType); |
| } |
| |
| @end |
| |
| @implementation NSString (WebNSURLExtras) |
| |
| - (BOOL)_web_isUserVisibleURL |
| { |
| BOOL valid = YES; |
| // get buffer |
| |
| char static_buffer[1024]; |
| const char *p; |
| BOOL success = CFStringGetCString((CFStringRef)self, static_buffer, 1023, kCFStringEncodingUTF8); |
| if (success) { |
| p = static_buffer; |
| } else { |
| p = [self UTF8String]; |
| } |
| |
| int length = strlen(p); |
| |
| // check for characters <= 0x20 or >=0x7f, %-escape sequences of %7f, and xn--, these |
| // are the things that will lead _web_userVisibleString to actually change things. |
| int i; |
| for (i = 0; i < length; i++) { |
| unsigned char c = p[i]; |
| // escape control characters, space, and delete |
| if (c <= 0x20 || c == 0x7f) { |
| valid = NO; |
| break; |
| } else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) { |
| unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]); |
| if (u > 0x7f) { |
| valid = NO; |
| break; |
| } |
| i += 2; |
| } else { |
| // Check for "xn--" in an efficient, non-case-sensitive, way. |
| if (c == '-' && i >= 3 && (p[i - 3] | 0x20) == 'x' && (p[i - 2] | 0x20) == 'n' && p[i - 1] == '-') { |
| valid = NO; |
| break; |
| } |
| } |
| } |
| |
| return valid; |
| } |
| |
| |
| - (BOOL)_webkit_isJavaScriptURL |
| { |
| return [self _webkit_hasCaseInsensitivePrefix:@"javascript:"]; |
| } |
| |
| - (BOOL)_webkit_isFileURL |
| { |
| return [self rangeOfString:@"file:" options:(NSCaseInsensitiveSearch | NSAnchoredSearch)].location != NSNotFound; |
| } |
| |
| - (NSString *)_webkit_stringByReplacingValidPercentEscapes |
| { |
| return decodeURLEscapeSequences(self); |
| } |
| |
| - (NSString *)_webkit_scriptIfJavaScriptURL |
| { |
| if (![self _webkit_isJavaScriptURL]) { |
| return nil; |
| } |
| return [[self substringFromIndex:11] _webkit_stringByReplacingValidPercentEscapes]; |
| } |
| |
| - (BOOL)_webkit_isFTPDirectoryURL |
| { |
| int length = [self length]; |
| if (length < 5) { // 5 is length of "ftp:/" |
| return NO; |
| } |
| unichar lastChar = [self characterAtIndex:length - 1]; |
| return lastChar == '/' && [self _webkit_hasCaseInsensitivePrefix:@"ftp:"]; |
| } |
| |
| |
| static BOOL readIDNScriptWhiteListFile(NSString *filename) |
| { |
| if (!filename) { |
| return NO; |
| } |
| FILE *file = fopen([filename fileSystemRepresentation], "r"); |
| if (file == NULL) { |
| return NO; |
| } |
| |
| // Read a word at a time. |
| // Allow comments, starting with # character to the end of the line. |
| while (1) { |
| // Skip a comment if present. |
| int result = fscanf(file, " #%*[^\n\r]%*[\n\r]"); |
| if (result == EOF) { |
| break; |
| } |
| |
| // Read a script name if present. |
| char word[33]; |
| result = fscanf(file, " %32[^# \t\n\r]%*[^# \t\n\r] ", word); |
| if (result == EOF) { |
| break; |
| } |
| if (result == 1) { |
| // Got a word, map to script code and put it into the array. |
| int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, word); |
| if (script >= 0 && script < USCRIPT_CODE_LIMIT) { |
| size_t index = script / 32; |
| uint32_t mask = 1 << (script % 32); |
| IDNScriptWhiteList[index] |= mask; |
| } |
| } |
| } |
| fclose(file); |
| return YES; |
| } |
| |
| static void readIDNScriptWhiteList(void) |
| { |
| // Read white list from library. |
| NSArray *dirs = NSSearchPathForDirectoriesInDomains(NSLibraryDirectory, NSAllDomainsMask, YES); |
| int i, numDirs = [dirs count]; |
| for (i = 0; i < numDirs; i++) { |
| NSString *dir = [dirs objectAtIndex:i]; |
| if (readIDNScriptWhiteListFile([dir stringByAppendingPathComponent:@"IDNScriptWhiteList.txt"])) { |
| return; |
| } |
| } |
| |
| // Fall back on white list inside bundle. |
| NSBundle *bundle = [NSBundle bundleWithIdentifier:@"com.apple.WebKit"]; |
| readIDNScriptWhiteListFile([bundle pathForResource:@"IDNScriptWhiteList" ofType:@"txt"]); |
| } |
| |
| static BOOL allCharactersInIDNScriptWhiteList(const UChar *buffer, int32_t length) |
| { |
| pthread_once(&IDNScriptWhiteListFileRead, readIDNScriptWhiteList); |
| |
| int32_t i = 0; |
| while (i < length) { |
| UChar32 c; |
| U16_NEXT(buffer, i, length, c) |
| UErrorCode error = U_ZERO_ERROR; |
| UScriptCode script = uscript_getScript(c, &error); |
| if (error != U_ZERO_ERROR) { |
| LOG_ERROR("got ICU error while trying to look at scripts: %d", error); |
| return NO; |
| } |
| if (script < 0) { |
| LOG_ERROR("got negative number for script code from ICU: %d", script); |
| return NO; |
| } |
| if (script >= USCRIPT_CODE_LIMIT) { |
| return NO; |
| } |
| size_t index = script / 32; |
| uint32_t mask = 1 << (script % 32); |
| if (!(IDNScriptWhiteList[index] & mask)) { |
| return NO; |
| } |
| |
| if (isLookalikeCharacter(c)) |
| return NO; |
| } |
| return YES; |
| } |
| |
| // Return value of nil means no mapping is necessary. |
| // If makeString is NO, then return value is either nil or self to indicate mapping is necessary. |
| // If makeString is YES, then return value is either nil or the mapped string. |
| - (NSString *)_web_mapHostNameWithRange:(NSRange)range encode:(BOOL)encode makeString:(BOOL)makeString |
| { |
| if (range.length > HOST_NAME_BUFFER_LENGTH) { |
| return nil; |
| } |
| |
| if ([self length] == 0) |
| return nil; |
| |
| UChar sourceBuffer[HOST_NAME_BUFFER_LENGTH]; |
| UChar destinationBuffer[HOST_NAME_BUFFER_LENGTH]; |
| |
| NSString *string = self; |
| if (encode && [self rangeOfString:@"%" options:NSLiteralSearch range:range].location != NSNotFound) { |
| NSString *substring = [self substringWithRange:range]; |
| substring = WebCFAutorelease(CFURLCreateStringByReplacingPercentEscapes(NULL, (CFStringRef)substring, CFSTR(""))); |
| if (substring != nil) { |
| string = substring; |
| range = NSMakeRange(0, [string length]); |
| } |
| } |
| |
| int length = range.length; |
| [string getCharacters:sourceBuffer range:range]; |
| |
| UErrorCode error = U_ZERO_ERROR; |
| int32_t numCharactersConverted = (encode ? uidna_IDNToASCII : uidna_IDNToUnicode) |
| (sourceBuffer, length, destinationBuffer, HOST_NAME_BUFFER_LENGTH, UIDNA_ALLOW_UNASSIGNED, NULL, &error); |
| if (error != U_ZERO_ERROR) { |
| return nil; |
| } |
| if (numCharactersConverted == length && memcmp(sourceBuffer, destinationBuffer, length * sizeof(UChar)) == 0) { |
| return nil; |
| } |
| if (!encode && !allCharactersInIDNScriptWhiteList(destinationBuffer, numCharactersConverted)) { |
| return nil; |
| } |
| return makeString ? (NSString *)[NSString stringWithCharacters:destinationBuffer length:numCharactersConverted] : (NSString *)self; |
| } |
| |
| - (BOOL)_web_hostNameNeedsDecodingWithRange:(NSRange)range |
| { |
| return [self _web_mapHostNameWithRange:range encode:NO makeString:NO] != nil; |
| } |
| |
| - (BOOL)_web_hostNameNeedsEncodingWithRange:(NSRange)range |
| { |
| return [self _web_mapHostNameWithRange:range encode:YES makeString:NO] != nil; |
| } |
| |
| - (NSString *)_web_decodeHostNameWithRange:(NSRange)range |
| { |
| return [self _web_mapHostNameWithRange:range encode:NO makeString:YES]; |
| } |
| |
| - (NSString *)_web_encodeHostNameWithRange:(NSRange)range |
| { |
| return [self _web_mapHostNameWithRange:range encode:YES makeString:YES]; |
| } |
| |
| - (NSString *)_web_decodeHostName |
| { |
| NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:NO makeString:YES]; |
| return name == nil ? self : name; |
| } |
| |
| - (NSString *)_web_encodeHostName |
| { |
| NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:YES makeString:YES]; |
| return name == nil ? self : name; |
| } |
| |
| -(NSRange)_webkit_rangeOfURLScheme |
| { |
| NSRange colon = [self rangeOfString:@":"]; |
| if (colon.location != NSNotFound && colon.location > 0) { |
| NSRange scheme = {0, colon.location}; |
| static NSCharacterSet *InverseSchemeCharacterSet = nil; |
| if (!InverseSchemeCharacterSet) { |
| /* |
| This stuff is very expensive. 10-15 msec on a 2x1.2GHz. If not cached it swamps |
| everything else when adding items to the autocomplete DB. Makes me wonder if we |
| even need to enforce the character set here. |
| */ |
| NSString *acceptableCharacters = @"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-"; |
| InverseSchemeCharacterSet = [[[NSCharacterSet characterSetWithCharactersInString:acceptableCharacters] invertedSet] retain]; |
| } |
| NSRange illegals = [self rangeOfCharacterFromSet:InverseSchemeCharacterSet options:0 range:scheme]; |
| if (illegals.location == NSNotFound) |
| return scheme; |
| } |
| return NSMakeRange(NSNotFound, 0); |
| } |
| |
| -(BOOL)_webkit_looksLikeAbsoluteURL |
| { |
| // Trim whitespace because _web_URLWithString allows whitespace. |
| return [[self _webkit_stringByTrimmingWhitespace] _webkit_rangeOfURLScheme].location != NSNotFound; |
| } |
| |
| - (NSString *)_webkit_URLFragment |
| { |
| NSRange fragmentRange; |
| |
| fragmentRange = [self rangeOfString:@"#" options:NSLiteralSearch]; |
| if (fragmentRange.location == NSNotFound) |
| return nil; |
| return [self substringFromIndex:fragmentRange.location + 1]; |
| } |
| |
| @end |