blob: 9dd655ebba84a69ad9f99012ca759f4bfaa74cba [file] [log] [blame]
/*
* Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
* Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include "config.h"
#include "AtomicString.h"
#include "StringHash.h"
#include <wtf/HashSet.h>
#include <wtf/Threading.h>
#include <wtf/WTFThreadData.h>
#include <wtf/unicode/UTF8.h>
namespace WTF {
using namespace Unicode;
COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size);
class AtomicStringTable {
public:
static AtomicStringTable* create()
{
AtomicStringTable* table = new AtomicStringTable;
WTFThreadData& data = wtfThreadData();
data.m_atomicStringTable = table;
data.m_atomicStringTableDestructor = AtomicStringTable::destroy;
return table;
}
HashSet<StringImpl*>& table()
{
return m_table;
}
private:
static void destroy(AtomicStringTable* table)
{
HashSet<StringImpl*>::iterator end = table->m_table.end();
for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter)
(*iter)->setIsAtomic(false);
delete table;
}
HashSet<StringImpl*> m_table;
};
static inline HashSet<StringImpl*>& stringTable()
{
// Once possible we should make this non-lazy (constructed in WTFThreadData's constructor).
AtomicStringTable* table = wtfThreadData().atomicStringTable();
if (UNLIKELY(!table))
table = AtomicStringTable::create();
return table->table();
}
template<typename T, typename HashTranslator>
static inline PassRefPtr<StringImpl> addToStringTable(const T& value)
{
pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<T, HashTranslator>(value);
// If the string is newly-translated, then we need to adopt it.
// The boolean in the pair tells us if that is so.
return addResult.second ? adoptRef(*addResult.first) : *addResult.first;
}
struct CStringTranslator {
static unsigned hash(const char* c)
{
return StringHasher::computeHash(c);
}
static bool equal(StringImpl* r, const char* s)
{
int length = r->length();
const UChar* d = r->characters();
for (int i = 0; i != length; ++i) {
unsigned char c = s[i];
if (d[i] != c)
return false;
}
return !s[length];
}
static void translate(StringImpl*& location, const char* const& c, unsigned hash)
{
location = StringImpl::create(c).leakRef();
location->setHash(hash);
location->setIsAtomic(true);
}
};
bool operator==(const AtomicString& a, const char* b)
{
StringImpl* impl = a.impl();
if ((!impl || !impl->characters()) && !b)
return true;
if ((!impl || !impl->characters()) || !b)
return false;
return CStringTranslator::equal(impl, b);
}
PassRefPtr<StringImpl> AtomicString::add(const char* c)
{
if (!c)
return 0;
if (!*c)
return StringImpl::empty();
return addToStringTable<const char*, CStringTranslator>(c);
}
struct UCharBuffer {
const UChar* s;
unsigned length;
};
static inline bool equal(StringImpl* string, const UChar* characters, unsigned length)
{
if (string->length() != length)
return false;
// FIXME: perhaps we should have a more abstract macro that indicates when
// going 4 bytes at a time is unsafe
#if CPU(ARM) || CPU(SH4) || CPU(MIPS) || CPU(SPARC)
const UChar* stringCharacters = string->characters();
for (unsigned i = 0; i != length; ++i) {
if (*stringCharacters++ != *characters++)
return false;
}
return true;
#else
/* Do it 4-bytes-at-a-time on architectures where it's safe */
const uint32_t* stringCharacters = reinterpret_cast<const uint32_t*>(string->characters());
const uint32_t* bufferCharacters = reinterpret_cast<const uint32_t*>(characters);
unsigned halfLength = length >> 1;
for (unsigned i = 0; i != halfLength; ++i) {
if (*stringCharacters++ != *bufferCharacters++)
return false;
}
if (length & 1 && *reinterpret_cast<const uint16_t*>(stringCharacters) != *reinterpret_cast<const uint16_t*>(bufferCharacters))
return false;
return true;
#endif
}
bool operator==(const AtomicString& string, const Vector<UChar>& vector)
{
return string.impl() && equal(string.impl(), vector.data(), vector.size());
}
struct UCharBufferTranslator {
static unsigned hash(const UCharBuffer& buf)
{
return StringHasher::computeHash(buf.s, buf.length);
}
static bool equal(StringImpl* const& str, const UCharBuffer& buf)
{
return WTF::equal(str, buf.s, buf.length);
}
static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
{
location = StringImpl::create(buf.s, buf.length).leakRef();
location->setHash(hash);
location->setIsAtomic(true);
}
};
struct HashAndCharacters {
unsigned hash;
const UChar* characters;
unsigned length;
};
struct HashAndCharactersTranslator {
static unsigned hash(const HashAndCharacters& buffer)
{
ASSERT(buffer.hash == StringHasher::computeHash(buffer.characters, buffer.length));
return buffer.hash;
}
static bool equal(StringImpl* const& string, const HashAndCharacters& buffer)
{
return WTF::equal(string, buffer.characters, buffer.length);
}
static void translate(StringImpl*& location, const HashAndCharacters& buffer, unsigned hash)
{
location = StringImpl::create(buffer.characters, buffer.length).leakRef();
location->setHash(hash);
location->setIsAtomic(true);
}
};
struct HashAndUTF8Characters {
unsigned hash;
const char* characters;
unsigned length;
unsigned utf16Length;
};
struct HashAndUTF8CharactersTranslator {
static unsigned hash(const HashAndUTF8Characters& buffer)
{
return buffer.hash;
}
static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
{
if (buffer.utf16Length != string->length())
return false;
const UChar* stringCharacters = string->characters();
// If buffer contains only ASCII characters UTF-8 and UTF16 length are the same.
if (buffer.utf16Length != buffer.length)
return equalUTF16WithUTF8(stringCharacters, stringCharacters + string->length(), buffer.characters, buffer.characters + buffer.length);
for (unsigned i = 0; i < buffer.length; ++i) {
ASSERT(isASCII(buffer.characters[i]));
if (stringCharacters[i] != buffer.characters[i])
return false;
}
return true;
}
static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
{
UChar* target;
location = StringImpl::createUninitialized(buffer.utf16Length, target).releaseRef();
const char* source = buffer.characters;
if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length) != conversionOK)
ASSERT_NOT_REACHED();
location->setHash(hash);
location->setIsAtomic(true);
}
};
PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
{
if (!s)
return 0;
if (!length)
return StringImpl::empty();
UCharBuffer buffer = { s, length };
return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
}
PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash)
{
ASSERT(s);
ASSERT(existingHash);
if (!length)
return StringImpl::empty();
HashAndCharacters buffer = { existingHash, s, length };
return addToStringTable<HashAndCharacters, HashAndCharactersTranslator>(buffer);
}
PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
{
if (!s)
return 0;
int length = 0;
while (s[length] != UChar(0))
length++;
if (!length)
return StringImpl::empty();
UCharBuffer buffer = { s, length };
return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
}
PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r)
{
if (!r || r->isAtomic())
return r;
if (!r->length())
return StringImpl::empty();
StringImpl* result = *stringTable().add(r).first;
if (result == r)
r->setIsAtomic(true);
return result;
}
AtomicStringImpl* AtomicString::find(const UChar* s, unsigned length, unsigned existingHash)
{
ASSERT(s);
ASSERT(existingHash);
if (!length)
return static_cast<AtomicStringImpl*>(StringImpl::empty());
HashAndCharacters buffer = { existingHash, s, length };
HashSet<StringImpl*>::iterator iterator = stringTable().find<HashAndCharacters, HashAndCharactersTranslator>(buffer);
if (iterator == stringTable().end())
return 0;
return static_cast<AtomicStringImpl*>(*iterator);
}
void AtomicString::remove(StringImpl* r)
{
stringTable().remove(r);
}
AtomicString AtomicString::lower() const
{
// Note: This is a hot function in the Dromaeo benchmark.
StringImpl* impl = this->impl();
if (UNLIKELY(!impl))
return *this;
RefPtr<StringImpl> newImpl = impl->lower();
if (LIKELY(newImpl == impl))
return *this;
return AtomicString(newImpl);
}
AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd)
{
HashAndUTF8Characters buffer;
buffer.characters = charactersStart;
buffer.hash = calculateStringHashAndLengthFromUTF8(charactersStart, charactersEnd, buffer.length, buffer.utf16Length);
if (!buffer.hash)
return nullAtom;
AtomicString atomicString;
atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
return atomicString;
}
} // namespace WTF