1/* 2 * Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of Apple Inc. ("Apple") nor the names of 14 * its contributors may be used to endorse or promote products derived 15 * from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#import "WebHTMLRepresentation.h" 30 31#import "DOMElementInternal.h" 32#import "DOMNodeInternal.h" 33#import "DOMRangeInternal.h" 34#import "WebArchive.h" 35#import "WebBasePluginPackage.h" 36#import "WebDataSourceInternal.h" 37#import "WebDocumentPrivate.h" 38#import "WebFrameInternal.h" 39#import "WebKitNSStringExtras.h" 40#import "WebKitStatisticsPrivate.h" 41#import "WebNSObjectExtras.h" 42#import "WebTypesInternal.h" 43#import "WebView.h" 44#import <Foundation/NSURLResponse.h> 45#import <WebCore/Document.h> 46#import <WebCore/DocumentLoader.h> 47#import <WebCore/Editor.h> 48#import <WebCore/Frame.h> 49#import <WebCore/FrameLoader.h> 50#import <WebCore/FrameLoaderClient.h> 51#import <WebCore/HTMLConverter.h> 52#import <WebCore/HTMLFormControlElement.h> 53#import <WebCore/HTMLFormElement.h> 54#import <WebCore/HTMLInputElement.h> 55#import <WebCore/HTMLNames.h> 56#import <WebCore/HTMLTableCellElement.h> 57#import <WebCore/MIMETypeRegistry.h> 58#import <WebCore/NodeTraversal.h> 59#import <WebCore/Range.h> 60#import <WebCore/RenderElement.h> 61#import <WebCore/TextResourceDecoder.h> 62#import <WebKitLegacy/DOMHTMLInputElement.h> 63#import <yarr/RegularExpression.h> 64#import <wtf/Assertions.h> 65#import <wtf/StdLibExtras.h> 66#import <wtf/text/StringBuilder.h> 67 68using namespace WebCore; 69using namespace HTMLNames; 70using JSC::Yarr::RegularExpression; 71 72@interface WebHTMLRepresentationPrivate : NSObject { 73@public 74 WebDataSource *dataSource; 75 76 BOOL hasSentResponseToPlugin; 77 BOOL includedInWebKitStatistics; 78 79 id <WebPluginManualLoader> manualLoader; 80 NSView *pluginView; 81} 82@end 83 84@implementation WebHTMLRepresentationPrivate 85@end 86 87@implementation WebHTMLRepresentation 88 89static NSMutableArray *newArrayWithStrings(const HashSet<String>& set) NS_RETURNS_RETAINED; 90static NSMutableArray *newArrayWithStrings(const HashSet<String>& set) 91{ 92 NSMutableArray *array = [[NSMutableArray alloc] initWithCapacity:set.size()]; 93 HashSet<String>::const_iterator end = set.end(); 94 for (HashSet<String>::const_iterator it = set.begin(); it != end; ++it) 95 [array addObject:(NSString *)(*it)]; 96 return array; 97} 98 99static NSMutableArray *newArrayByConcatenatingArrays(NSArray *first, NSArray *second) NS_RETURNS_RETAINED; 100static NSMutableArray *newArrayByConcatenatingArrays(NSArray *first, NSArray *second) 101{ 102 NSMutableArray *result = [first mutableCopy]; 103 [result addObjectsFromArray:second]; 104 return result; 105} 106 107+ (NSArray *)supportedMIMETypes 108{ 109 static __unsafe_unretained NSArray *staticSupportedMIMETypes = newArrayByConcatenatingArrays([self supportedNonImageMIMETypes], [self supportedImageMIMETypes]); 110 return staticSupportedMIMETypes; 111} 112 113+ (NSArray *)supportedNonImageMIMETypes 114{ 115 static __unsafe_unretained NSArray *staticSupportedNonImageMIMETypes = newArrayWithStrings(MIMETypeRegistry::getSupportedNonImageMIMETypes()); 116 return staticSupportedNonImageMIMETypes; 117} 118 119+ (NSArray *)supportedImageMIMETypes 120{ 121 static __unsafe_unretained NSArray *staticSupportedImageMIMETypes = newArrayWithStrings(MIMETypeRegistry::getSupportedImageMIMETypes()); 122 return staticSupportedImageMIMETypes; 123} 124 125+ (NSArray *)unsupportedTextMIMETypes 126{ 127 static __unsafe_unretained NSArray *staticUnsupportedTextMIMETypes = newArrayWithStrings(MIMETypeRegistry::getUnsupportedTextMIMETypes()); 128 return staticUnsupportedTextMIMETypes; 129} 130 131- (id)init 132{ 133 self = [super init]; 134 if (!self) 135 return nil; 136 137 _private = [[WebHTMLRepresentationPrivate alloc] init]; 138 139 return self; 140} 141 142- (void)dealloc 143{ 144 if (_private && _private->includedInWebKitStatistics) 145 --WebHTMLRepresentationCount; 146 147 [_private release]; 148 149 [super dealloc]; 150} 151 152- (void)finalize 153{ 154 if (_private && _private->includedInWebKitStatistics) 155 --WebHTMLRepresentationCount; 156 157 [super finalize]; 158} 159 160- (void)_redirectDataToManualLoader:(id<WebPluginManualLoader>)manualLoader forPluginView:(NSView *)pluginView 161{ 162 _private->manualLoader = manualLoader; 163 _private->pluginView = pluginView; 164} 165 166- (void)setDataSource:(WebDataSource *)dataSource 167{ 168 _private->dataSource = dataSource; 169 170 if (!_private->includedInWebKitStatistics && [[dataSource webFrame] _isIncludedInWebKitStatistics]) { 171 _private->includedInWebKitStatistics = YES; 172 ++WebHTMLRepresentationCount; 173 } 174} 175 176- (BOOL)_isDisplayingWebArchive 177{ 178 return [[_private->dataSource _responseMIMEType] _webkit_isCaseInsensitiveEqualToString:@"application/x-webarchive"]; 179} 180 181- (void)receivedData:(NSData *)data withDataSource:(WebDataSource *)dataSource 182{ 183 WebFrame *webFrame = [dataSource webFrame]; 184 if (!webFrame) 185 return; 186 187 if (!_private->pluginView) 188 [webFrame _commitData:data]; 189 190 // If the document is a stand-alone media document, now is the right time to cancel the WebKit load 191 Frame* coreFrame = core(webFrame); 192 if (coreFrame->document()->isMediaDocument()) 193 coreFrame->loader().documentLoader()->cancelMainResourceLoad(coreFrame->loader().client().pluginWillHandleLoadError(coreFrame->loader().documentLoader()->response())); 194 195 if (_private->pluginView) { 196 if (!_private->hasSentResponseToPlugin) { 197 [_private->manualLoader pluginView:_private->pluginView receivedResponse:[dataSource response]]; 198 _private->hasSentResponseToPlugin = YES; 199 } 200 201 [_private->manualLoader pluginView:_private->pluginView receivedData:data]; 202 } 203} 204 205- (void)receivedError:(NSError *)error withDataSource:(WebDataSource *)dataSource 206{ 207 if (_private->pluginView) { 208 [_private->manualLoader pluginView:_private->pluginView receivedError:error]; 209 } 210} 211 212- (void)finishedLoadingWithDataSource:(WebDataSource *)dataSource 213{ 214 WebFrame* webFrame = [dataSource webFrame]; 215 216 if (_private->pluginView) { 217 [_private->manualLoader pluginViewFinishedLoading:_private->pluginView]; 218 return; 219 } 220 221 if (!webFrame) 222 return; 223 WebView *webView = [webFrame webView]; 224 if ([webView mainFrame] == webFrame && [webView isEditable]) 225 core(webFrame)->editor().applyEditingStyleToBodyElement(); 226} 227 228- (BOOL)canProvideDocumentSource 229{ 230 return [[_private->dataSource webFrame] _canProvideDocumentSource]; 231} 232 233- (BOOL)canSaveAsWebArchive 234{ 235 return [[_private->dataSource webFrame] _canSaveAsWebArchive]; 236} 237 238- (NSString *)documentSource 239{ 240 if ([self _isDisplayingWebArchive]) { 241 SharedBuffer *parsedArchiveData = [_private->dataSource _documentLoader]->parsedArchiveData(); 242 NSString *result = [[NSString alloc] initWithData:parsedArchiveData ? parsedArchiveData->createNSData().get() : nil encoding:NSUTF8StringEncoding]; 243 return [result autorelease]; 244 } 245 246 Frame* coreFrame = core([_private->dataSource webFrame]); 247 if (!coreFrame) 248 return nil; 249 Document* document = coreFrame->document(); 250 if (!document) 251 return nil; 252 TextResourceDecoder* decoder = document->decoder(); 253 if (!decoder) 254 return nil; 255 NSData *data = [_private->dataSource data]; 256 if (!data) 257 return nil; 258 return decoder->encoding().decode(reinterpret_cast<const char*>([data bytes]), [data length]); 259} 260 261- (NSString *)title 262{ 263 return nsStringNilIfEmpty([_private->dataSource _documentLoader]->title().string()); 264} 265 266- (DOMDocument *)DOMDocument 267{ 268 return [[_private->dataSource webFrame] DOMDocument]; 269} 270 271#if !PLATFORM(IOS) 272- (NSAttributedString *)attributedText 273{ 274 // FIXME: Implement 275 return nil; 276} 277 278- (NSAttributedString *)attributedStringFrom:(DOMNode *)startNode startOffset:(int)startOffset to:(DOMNode *)endNode endOffset:(int)endOffset 279{ 280 return editingAttributedStringFromRange(*Range::create(core(startNode)->document(), core(startNode), startOffset, core(endNode), endOffset)); 281} 282#endif 283 284static HTMLFormElement* formElementFromDOMElement(DOMElement *element) 285{ 286 Element* node = core(element); 287 return node && node->hasTagName(formTag) ? static_cast<HTMLFormElement*>(node) : 0; 288} 289 290- (DOMElement *)elementWithName:(NSString *)name inForm:(DOMElement *)form 291{ 292 HTMLFormElement* formElement = formElementFromDOMElement(form); 293 if (!formElement) 294 return nil; 295 const Vector<FormAssociatedElement*>& elements = formElement->associatedElements(); 296 AtomicString targetName = name; 297 for (unsigned i = 0; i < elements.size(); i++) { 298 FormAssociatedElement& element = *elements[i]; 299 if (element.name() == targetName) 300 return kit(&element.asHTMLElement()); 301 } 302 return nil; 303} 304 305static HTMLInputElement* inputElementFromDOMElement(DOMElement* element) 306{ 307 Element* node = core(element); 308 return node && isHTMLInputElement(node) ? toHTMLInputElement(node) : 0; 309} 310 311- (BOOL)elementDoesAutoComplete:(DOMElement *)element 312{ 313 HTMLInputElement* inputElement = inputElementFromDOMElement(element); 314 return inputElement 315 && inputElement->isTextField() 316 && !inputElement->isPasswordField() 317 && inputElement->shouldAutocomplete(); 318} 319 320- (BOOL)elementIsPassword:(DOMElement *)element 321{ 322 HTMLInputElement* inputElement = inputElementFromDOMElement(element); 323 return inputElement && inputElement->isPasswordField(); 324} 325 326- (DOMElement *)formForElement:(DOMElement *)element 327{ 328 HTMLInputElement* inputElement = inputElementFromDOMElement(element); 329 return inputElement ? kit(inputElement->form()) : 0; 330} 331 332- (DOMElement *)currentForm 333{ 334 return kit(core([_private->dataSource webFrame])->selection().currentForm()); 335} 336 337- (NSArray *)controlsInForm:(DOMElement *)form 338{ 339 HTMLFormElement* formElement = formElementFromDOMElement(form); 340 if (!formElement) 341 return nil; 342 NSMutableArray *results = nil; 343 const Vector<FormAssociatedElement*>& elements = formElement->associatedElements(); 344 for (unsigned i = 0; i < elements.size(); i++) { 345 if (elements[i]->isEnumeratable()) { // Skip option elements, other duds 346 DOMElement *element = kit(&elements[i]->asHTMLElement()); 347 if (!results) 348 results = [NSMutableArray arrayWithObject:element]; 349 else 350 [results addObject:element]; 351 } 352 } 353 return results; 354} 355 356// Either get cached regexp or build one that matches any of the labels. 357// The regexp we build is of the form: (STR1|STR2|STRN) 358static RegularExpression* regExpForLabels(NSArray *labels) 359{ 360 // All the ObjC calls in this method are simple array and string 361 // calls which we can assume do not raise exceptions 362 363 // Parallel arrays that we use to cache regExps. In practice the number of expressions 364 // that the app will use is equal to the number of locales is used in searching. 365 static const unsigned int regExpCacheSize = 4; 366 static NSMutableArray* regExpLabels = nil; 367 DEPRECATED_DEFINE_STATIC_LOCAL(Vector<RegularExpression*>, regExps, ()); 368 DEPRECATED_DEFINE_STATIC_LOCAL(RegularExpression, wordRegExp, ("\\w", TextCaseSensitive)); 369 370 RegularExpression* result; 371 if (!regExpLabels) 372 regExpLabels = [[NSMutableArray alloc] initWithCapacity:regExpCacheSize]; 373 CFIndex cacheHit = [regExpLabels indexOfObject:labels]; 374 if (cacheHit != NSNotFound) 375 result = regExps.at(cacheHit); 376 else { 377 StringBuilder pattern; 378 pattern.append('('); 379 unsigned numLabels = [labels count]; 380 unsigned i; 381 for (i = 0; i < numLabels; i++) { 382 String label = [labels objectAtIndex:i]; 383 384 bool startsWithWordChar = false; 385 bool endsWithWordChar = false; 386 if (label.length() != 0) { 387 startsWithWordChar = wordRegExp.match(label.substring(0, 1)) >= 0; 388 endsWithWordChar = wordRegExp.match(label.substring(label.length() - 1, 1)) >= 0; 389 } 390 391 if (i != 0) 392 pattern.append('|'); 393 // Search for word boundaries only if label starts/ends with "word characters". 394 // If we always searched for word boundaries, this wouldn't work for languages 395 // such as Japanese. 396 if (startsWithWordChar) 397 pattern.appendLiteral("\\b"); 398 pattern.append(label); 399 if (endsWithWordChar) 400 pattern.appendLiteral("\\b"); 401 } 402 pattern.append(')'); 403 result = new RegularExpression(pattern.toString(), TextCaseInsensitive); 404 } 405 406 // add regexp to the cache, making sure it is at the front for LRU ordering 407 if (cacheHit != 0) { 408 if (cacheHit != NSNotFound) { 409 // remove from old spot 410 [regExpLabels removeObjectAtIndex:cacheHit]; 411 regExps.remove(cacheHit); 412 } 413 // add to start 414 [regExpLabels insertObject:labels atIndex:0]; 415 regExps.insert(0, result); 416 // trim if too big 417 if ([regExpLabels count] > regExpCacheSize) { 418 [regExpLabels removeObjectAtIndex:regExpCacheSize]; 419 RegularExpression* last = regExps.last(); 420 regExps.removeLast(); 421 delete last; 422 } 423 } 424 return result; 425} 426 427static NSString* searchForLabelsBeforeElement(Frame* frame, NSArray* labels, Element* element, size_t* resultDistance, bool* resultIsInCellAbove) 428{ 429 RegularExpression* regExp = regExpForLabels(labels); 430 // We stop searching after we've seen this many chars 431 const unsigned int charsSearchedThreshold = 500; 432 // This is the absolute max we search. We allow a little more slop than 433 // charsSearchedThreshold, to make it more likely that we'll search whole nodes. 434 const unsigned int maxCharsSearched = 600; 435 // If the starting element is within a table, the cell that contains it 436 HTMLTableCellElement* startingTableCell = 0; 437 bool searchedCellAbove = false; 438 439 if (resultDistance) 440 *resultDistance = notFound; 441 if (resultIsInCellAbove) 442 *resultIsInCellAbove = false; 443 444 // walk backwards in the node tree, until another element, or form, or end of tree 445 unsigned lengthSearched = 0; 446 Node* n; 447 for (n = NodeTraversal::previous(element); 448 n && lengthSearched < charsSearchedThreshold; 449 n = NodeTraversal::previous(n)) 450 { 451 if (n->hasTagName(formTag) 452 || (n->isHTMLElement() && toElement(n)->isFormControlElement())) 453 { 454 // We hit another form element or the start of the form - bail out 455 break; 456 } else if (n->hasTagName(tdTag) && !startingTableCell) { 457 startingTableCell = static_cast<HTMLTableCellElement*>(n); 458 } else if (n->hasTagName(trTag) && startingTableCell) { 459 NSString* result = frame->searchForLabelsAboveCell(*regExp, startingTableCell, resultDistance); 460 if (result && [result length] > 0) { 461 if (resultIsInCellAbove) 462 *resultIsInCellAbove = true; 463 return result; 464 } 465 searchedCellAbove = true; 466 } else if (n->isTextNode() && n->renderer() && n->renderer()->style().visibility() == VISIBLE) { 467 // For each text chunk, run the regexp 468 String nodeString = n->nodeValue(); 469 // add 100 for slop, to make it more likely that we'll search whole nodes 470 if (lengthSearched + nodeString.length() > maxCharsSearched) 471 nodeString = nodeString.right(charsSearchedThreshold - lengthSearched); 472 int pos = regExp->searchRev(nodeString); 473 if (pos >= 0) { 474 if (resultDistance) 475 *resultDistance = lengthSearched; 476 return nodeString.substring(pos, regExp->matchedLength()); 477 } 478 lengthSearched += nodeString.length(); 479 } 480 } 481 482 // If we started in a cell, but bailed because we found the start of the form or the 483 // previous element, we still might need to search the row above us for a label. 484 if (startingTableCell && !searchedCellAbove) { 485 NSString* result = frame->searchForLabelsAboveCell(*regExp, startingTableCell, resultDistance); 486 if (result && [result length] > 0) { 487 if (resultIsInCellAbove) 488 *resultIsInCellAbove = true; 489 return result; 490 } 491 } 492 493 return nil; 494} 495 496static NSString *matchLabelsAgainstString(NSArray *labels, const String& stringToMatch) 497{ 498 if (stringToMatch.isEmpty()) 499 return nil; 500 501 String mutableStringToMatch = stringToMatch; 502 503 // Make numbers and _'s in field names behave like word boundaries, e.g., "address2" 504 replace(mutableStringToMatch, RegularExpression("\\d", TextCaseSensitive), " "); 505 mutableStringToMatch.replace('_', ' '); 506 507 RegularExpression* regExp = regExpForLabels(labels); 508 // Use the largest match we can find in the whole string 509 int pos; 510 int length; 511 int bestPos = -1; 512 int bestLength = -1; 513 int start = 0; 514 do { 515 pos = regExp->match(mutableStringToMatch, start); 516 if (pos != -1) { 517 length = regExp->matchedLength(); 518 if (length >= bestLength) { 519 bestPos = pos; 520 bestLength = length; 521 } 522 start = pos + 1; 523 } 524 } while (pos != -1); 525 526 if (bestPos != -1) 527 return mutableStringToMatch.substring(bestPos, bestLength); 528 return nil; 529} 530 531static NSString* matchLabelsAgainstElement(NSArray* labels, Element* element) 532{ 533 // Match against the name element, then against the id element if no match is found for the name element. 534 // See 7538330 for one popular site that benefits from the id element check. 535 String resultFromNameAttribute = matchLabelsAgainstString(labels, element->getAttribute(nameAttr)); 536 if (!resultFromNameAttribute.isEmpty()) 537 return resultFromNameAttribute; 538 539 return matchLabelsAgainstString(labels, element->getAttribute(idAttr)); 540} 541 542 543- (NSString *)searchForLabels:(NSArray *)labels beforeElement:(DOMElement *)element 544{ 545 return [self searchForLabels:labels beforeElement:element resultDistance:0 resultIsInCellAbove:0]; 546} 547 548- (NSString *)searchForLabels:(NSArray *)labels beforeElement:(DOMElement *)element resultDistance:(NSUInteger*)outDistance resultIsInCellAbove:(BOOL*)outIsInCellAbove 549{ 550 size_t distance; 551 bool isInCellAbove; 552 553 NSString *result = searchForLabelsBeforeElement(core([_private->dataSource webFrame]), labels, core(element), &distance, &isInCellAbove); 554 555 if (outDistance) { 556 if (distance == notFound) 557 *outDistance = NSNotFound; 558 else 559 *outDistance = distance; 560 } 561 562 if (outIsInCellAbove) 563 *outIsInCellAbove = isInCellAbove; 564 565 return result; 566} 567 568- (NSString *)matchLabels:(NSArray *)labels againstElement:(DOMElement *)element 569{ 570 return matchLabelsAgainstElement(labels, core(element)); 571} 572 573@end 574