1/*
2 * Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1.  Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 * 2.  Redistributions in binary form must reproduce the above copyright
11 *     notice, this list of conditions and the following disclaimer in the
12 *     documentation and/or other materials provided with the distribution.
13 * 3.  Neither the name of Apple Inc. ("Apple") nor the names of
14 *     its contributors may be used to endorse or promote products derived
15 *     from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#import "WebHTMLRepresentation.h"
30
31#import "DOMElementInternal.h"
32#import "DOMNodeInternal.h"
33#import "DOMRangeInternal.h"
34#import "WebArchive.h"
35#import "WebBasePluginPackage.h"
36#import "WebDataSourceInternal.h"
37#import "WebDocumentPrivate.h"
38#import "WebFrameInternal.h"
39#import "WebKitNSStringExtras.h"
40#import "WebKitStatisticsPrivate.h"
41#import "WebNSObjectExtras.h"
42#import "WebTypesInternal.h"
43#import "WebView.h"
44#import <Foundation/NSURLResponse.h>
45#import <WebCore/Document.h>
46#import <WebCore/DocumentLoader.h>
47#import <WebCore/Editor.h>
48#import <WebCore/Frame.h>
49#import <WebCore/FrameLoader.h>
50#import <WebCore/FrameLoaderClient.h>
51#import <WebCore/HTMLConverter.h>
52#import <WebCore/HTMLFormControlElement.h>
53#import <WebCore/HTMLFormElement.h>
54#import <WebCore/HTMLInputElement.h>
55#import <WebCore/HTMLNames.h>
56#import <WebCore/HTMLTableCellElement.h>
57#import <WebCore/MIMETypeRegistry.h>
58#import <WebCore/NodeTraversal.h>
59#import <WebCore/Range.h>
60#import <WebCore/RenderElement.h>
61#import <WebCore/TextResourceDecoder.h>
62#import <WebKitLegacy/DOMHTMLInputElement.h>
63#import <yarr/RegularExpression.h>
64#import <wtf/Assertions.h>
65#import <wtf/StdLibExtras.h>
66#import <wtf/text/StringBuilder.h>
67
68using namespace WebCore;
69using namespace HTMLNames;
70using JSC::Yarr::RegularExpression;
71
72@interface WebHTMLRepresentationPrivate : NSObject {
73@public
74    WebDataSource *dataSource;
75
76    BOOL hasSentResponseToPlugin;
77    BOOL includedInWebKitStatistics;
78
79    id <WebPluginManualLoader> manualLoader;
80    NSView *pluginView;
81}
82@end
83
84@implementation WebHTMLRepresentationPrivate
85@end
86
87@implementation WebHTMLRepresentation
88
89static NSMutableArray *newArrayWithStrings(const HashSet<String>& set) NS_RETURNS_RETAINED;
90static NSMutableArray *newArrayWithStrings(const HashSet<String>& set)
91{
92    NSMutableArray *array = [[NSMutableArray alloc] initWithCapacity:set.size()];
93    HashSet<String>::const_iterator end = set.end();
94    for (HashSet<String>::const_iterator it = set.begin(); it != end; ++it)
95        [array addObject:(NSString *)(*it)];
96    return array;
97}
98
99static NSMutableArray *newArrayByConcatenatingArrays(NSArray *first, NSArray *second) NS_RETURNS_RETAINED;
100static NSMutableArray *newArrayByConcatenatingArrays(NSArray *first, NSArray *second)
101{
102    NSMutableArray *result = [first mutableCopy];
103    [result addObjectsFromArray:second];
104    return result;
105}
106
107+ (NSArray *)supportedMIMETypes
108{
109    static __unsafe_unretained NSArray *staticSupportedMIMETypes = newArrayByConcatenatingArrays([self supportedNonImageMIMETypes], [self supportedImageMIMETypes]);
110    return staticSupportedMIMETypes;
111}
112
113+ (NSArray *)supportedNonImageMIMETypes
114{
115    static __unsafe_unretained NSArray *staticSupportedNonImageMIMETypes = newArrayWithStrings(MIMETypeRegistry::getSupportedNonImageMIMETypes());
116    return staticSupportedNonImageMIMETypes;
117}
118
119+ (NSArray *)supportedImageMIMETypes
120{
121    static __unsafe_unretained NSArray *staticSupportedImageMIMETypes = newArrayWithStrings(MIMETypeRegistry::getSupportedImageMIMETypes());
122    return staticSupportedImageMIMETypes;
123}
124
125+ (NSArray *)unsupportedTextMIMETypes
126{
127    static __unsafe_unretained NSArray *staticUnsupportedTextMIMETypes = newArrayWithStrings(MIMETypeRegistry::getUnsupportedTextMIMETypes());
128    return staticUnsupportedTextMIMETypes;
129}
130
131- (id)init
132{
133    self = [super init];
134    if (!self)
135        return nil;
136
137    _private = [[WebHTMLRepresentationPrivate alloc] init];
138
139    return self;
140}
141
142- (void)dealloc
143{
144    if (_private && _private->includedInWebKitStatistics)
145        --WebHTMLRepresentationCount;
146
147    [_private release];
148
149    [super dealloc];
150}
151
152- (void)finalize
153{
154    if (_private && _private->includedInWebKitStatistics)
155        --WebHTMLRepresentationCount;
156
157    [super finalize];
158}
159
160- (void)_redirectDataToManualLoader:(id<WebPluginManualLoader>)manualLoader forPluginView:(NSView *)pluginView
161{
162    _private->manualLoader = manualLoader;
163    _private->pluginView = pluginView;
164}
165
166- (void)setDataSource:(WebDataSource *)dataSource
167{
168    _private->dataSource = dataSource;
169
170    if (!_private->includedInWebKitStatistics && [[dataSource webFrame] _isIncludedInWebKitStatistics]) {
171        _private->includedInWebKitStatistics = YES;
172        ++WebHTMLRepresentationCount;
173    }
174}
175
176- (BOOL)_isDisplayingWebArchive
177{
178    return [[_private->dataSource _responseMIMEType] _webkit_isCaseInsensitiveEqualToString:@"application/x-webarchive"];
179}
180
181- (void)receivedData:(NSData *)data withDataSource:(WebDataSource *)dataSource
182{
183    WebFrame *webFrame = [dataSource webFrame];
184    if (!webFrame)
185        return;
186
187    if (!_private->pluginView)
188        [webFrame _commitData:data];
189
190    // If the document is a stand-alone media document, now is the right time to cancel the WebKit load
191    Frame* coreFrame = core(webFrame);
192    if (coreFrame->document()->isMediaDocument())
193        coreFrame->loader().documentLoader()->cancelMainResourceLoad(coreFrame->loader().client().pluginWillHandleLoadError(coreFrame->loader().documentLoader()->response()));
194
195    if (_private->pluginView) {
196        if (!_private->hasSentResponseToPlugin) {
197            [_private->manualLoader pluginView:_private->pluginView receivedResponse:[dataSource response]];
198            _private->hasSentResponseToPlugin = YES;
199        }
200
201        [_private->manualLoader pluginView:_private->pluginView receivedData:data];
202    }
203}
204
205- (void)receivedError:(NSError *)error withDataSource:(WebDataSource *)dataSource
206{
207    if (_private->pluginView) {
208        [_private->manualLoader pluginView:_private->pluginView receivedError:error];
209    }
210}
211
212- (void)finishedLoadingWithDataSource:(WebDataSource *)dataSource
213{
214    WebFrame* webFrame = [dataSource webFrame];
215
216    if (_private->pluginView) {
217        [_private->manualLoader pluginViewFinishedLoading:_private->pluginView];
218        return;
219    }
220
221    if (!webFrame)
222        return;
223    WebView *webView = [webFrame webView];
224    if ([webView mainFrame] == webFrame && [webView isEditable])
225        core(webFrame)->editor().applyEditingStyleToBodyElement();
226}
227
228- (BOOL)canProvideDocumentSource
229{
230    return [[_private->dataSource webFrame] _canProvideDocumentSource];
231}
232
233- (BOOL)canSaveAsWebArchive
234{
235    return [[_private->dataSource webFrame] _canSaveAsWebArchive];
236}
237
238- (NSString *)documentSource
239{
240    if ([self _isDisplayingWebArchive]) {
241        SharedBuffer *parsedArchiveData = [_private->dataSource _documentLoader]->parsedArchiveData();
242        NSString *result = [[NSString alloc] initWithData:parsedArchiveData ? parsedArchiveData->createNSData().get() : nil encoding:NSUTF8StringEncoding];
243        return [result autorelease];
244    }
245
246    Frame* coreFrame = core([_private->dataSource webFrame]);
247    if (!coreFrame)
248        return nil;
249    Document* document = coreFrame->document();
250    if (!document)
251        return nil;
252    TextResourceDecoder* decoder = document->decoder();
253    if (!decoder)
254        return nil;
255    NSData *data = [_private->dataSource data];
256    if (!data)
257        return nil;
258    return decoder->encoding().decode(reinterpret_cast<const char*>([data bytes]), [data length]);
259}
260
261- (NSString *)title
262{
263    return nsStringNilIfEmpty([_private->dataSource _documentLoader]->title().string());
264}
265
266- (DOMDocument *)DOMDocument
267{
268    return [[_private->dataSource webFrame] DOMDocument];
269}
270
271#if !PLATFORM(IOS)
272- (NSAttributedString *)attributedText
273{
274    // FIXME: Implement
275    return nil;
276}
277
278- (NSAttributedString *)attributedStringFrom:(DOMNode *)startNode startOffset:(int)startOffset to:(DOMNode *)endNode endOffset:(int)endOffset
279{
280    return editingAttributedStringFromRange(*Range::create(core(startNode)->document(), core(startNode), startOffset, core(endNode), endOffset));
281}
282#endif
283
284static HTMLFormElement* formElementFromDOMElement(DOMElement *element)
285{
286    Element* node = core(element);
287    return node && node->hasTagName(formTag) ? static_cast<HTMLFormElement*>(node) : 0;
288}
289
290- (DOMElement *)elementWithName:(NSString *)name inForm:(DOMElement *)form
291{
292    HTMLFormElement* formElement = formElementFromDOMElement(form);
293    if (!formElement)
294        return nil;
295    const Vector<FormAssociatedElement*>& elements = formElement->associatedElements();
296    AtomicString targetName = name;
297    for (unsigned i = 0; i < elements.size(); i++) {
298        FormAssociatedElement& element = *elements[i];
299        if (element.name() == targetName)
300            return kit(&element.asHTMLElement());
301    }
302    return nil;
303}
304
305static HTMLInputElement* inputElementFromDOMElement(DOMElement* element)
306{
307    Element* node = core(element);
308    return node && isHTMLInputElement(node) ? toHTMLInputElement(node) : 0;
309}
310
311- (BOOL)elementDoesAutoComplete:(DOMElement *)element
312{
313    HTMLInputElement* inputElement = inputElementFromDOMElement(element);
314    return inputElement
315        && inputElement->isTextField()
316        && !inputElement->isPasswordField()
317        && inputElement->shouldAutocomplete();
318}
319
320- (BOOL)elementIsPassword:(DOMElement *)element
321{
322    HTMLInputElement* inputElement = inputElementFromDOMElement(element);
323    return inputElement && inputElement->isPasswordField();
324}
325
326- (DOMElement *)formForElement:(DOMElement *)element
327{
328    HTMLInputElement* inputElement = inputElementFromDOMElement(element);
329    return inputElement ? kit(inputElement->form()) : 0;
330}
331
332- (DOMElement *)currentForm
333{
334    return kit(core([_private->dataSource webFrame])->selection().currentForm());
335}
336
337- (NSArray *)controlsInForm:(DOMElement *)form
338{
339    HTMLFormElement* formElement = formElementFromDOMElement(form);
340    if (!formElement)
341        return nil;
342    NSMutableArray *results = nil;
343    const Vector<FormAssociatedElement*>& elements = formElement->associatedElements();
344    for (unsigned i = 0; i < elements.size(); i++) {
345        if (elements[i]->isEnumeratable()) { // Skip option elements, other duds
346            DOMElement *element = kit(&elements[i]->asHTMLElement());
347            if (!results)
348                results = [NSMutableArray arrayWithObject:element];
349            else
350                [results addObject:element];
351        }
352    }
353    return results;
354}
355
356// Either get cached regexp or build one that matches any of the labels.
357// The regexp we build is of the form:  (STR1|STR2|STRN)
358static RegularExpression* regExpForLabels(NSArray *labels)
359{
360    // All the ObjC calls in this method are simple array and string
361    // calls which we can assume do not raise exceptions
362
363    // Parallel arrays that we use to cache regExps.  In practice the number of expressions
364    // that the app will use is equal to the number of locales is used in searching.
365    static const unsigned int regExpCacheSize = 4;
366    static NSMutableArray* regExpLabels = nil;
367    DEPRECATED_DEFINE_STATIC_LOCAL(Vector<RegularExpression*>, regExps, ());
368    DEPRECATED_DEFINE_STATIC_LOCAL(RegularExpression, wordRegExp, ("\\w", TextCaseSensitive));
369
370    RegularExpression* result;
371    if (!regExpLabels)
372        regExpLabels = [[NSMutableArray alloc] initWithCapacity:regExpCacheSize];
373    CFIndex cacheHit = [regExpLabels indexOfObject:labels];
374    if (cacheHit != NSNotFound)
375        result = regExps.at(cacheHit);
376    else {
377        StringBuilder pattern;
378        pattern.append('(');
379        unsigned numLabels = [labels count];
380        unsigned i;
381        for (i = 0; i < numLabels; i++) {
382            String label = [labels objectAtIndex:i];
383
384            bool startsWithWordChar = false;
385            bool endsWithWordChar = false;
386            if (label.length() != 0) {
387                startsWithWordChar = wordRegExp.match(label.substring(0, 1)) >= 0;
388                endsWithWordChar = wordRegExp.match(label.substring(label.length() - 1, 1)) >= 0;
389            }
390
391            if (i != 0)
392                pattern.append('|');
393            // Search for word boundaries only if label starts/ends with "word characters".
394            // If we always searched for word boundaries, this wouldn't work for languages
395            // such as Japanese.
396            if (startsWithWordChar)
397                pattern.appendLiteral("\\b");
398            pattern.append(label);
399            if (endsWithWordChar)
400                pattern.appendLiteral("\\b");
401        }
402        pattern.append(')');
403        result = new RegularExpression(pattern.toString(), TextCaseInsensitive);
404    }
405
406    // add regexp to the cache, making sure it is at the front for LRU ordering
407    if (cacheHit != 0) {
408        if (cacheHit != NSNotFound) {
409            // remove from old spot
410            [regExpLabels removeObjectAtIndex:cacheHit];
411            regExps.remove(cacheHit);
412        }
413        // add to start
414        [regExpLabels insertObject:labels atIndex:0];
415        regExps.insert(0, result);
416        // trim if too big
417        if ([regExpLabels count] > regExpCacheSize) {
418            [regExpLabels removeObjectAtIndex:regExpCacheSize];
419            RegularExpression* last = regExps.last();
420            regExps.removeLast();
421            delete last;
422        }
423    }
424    return result;
425}
426
427static NSString* searchForLabelsBeforeElement(Frame* frame, NSArray* labels, Element* element, size_t* resultDistance, bool* resultIsInCellAbove)
428{
429    RegularExpression* regExp = regExpForLabels(labels);
430    // We stop searching after we've seen this many chars
431    const unsigned int charsSearchedThreshold = 500;
432    // This is the absolute max we search.  We allow a little more slop than
433    // charsSearchedThreshold, to make it more likely that we'll search whole nodes.
434    const unsigned int maxCharsSearched = 600;
435    // If the starting element is within a table, the cell that contains it
436    HTMLTableCellElement* startingTableCell = 0;
437    bool searchedCellAbove = false;
438
439    if (resultDistance)
440        *resultDistance = notFound;
441    if (resultIsInCellAbove)
442        *resultIsInCellAbove = false;
443
444    // walk backwards in the node tree, until another element, or form, or end of tree
445    unsigned lengthSearched = 0;
446    Node* n;
447    for (n = NodeTraversal::previous(element);
448         n && lengthSearched < charsSearchedThreshold;
449         n = NodeTraversal::previous(n))
450    {
451        if (n->hasTagName(formTag)
452            || (n->isHTMLElement() && toElement(n)->isFormControlElement()))
453        {
454            // We hit another form element or the start of the form - bail out
455            break;
456        } else if (n->hasTagName(tdTag) && !startingTableCell) {
457            startingTableCell = static_cast<HTMLTableCellElement*>(n);
458        } else if (n->hasTagName(trTag) && startingTableCell) {
459            NSString* result = frame->searchForLabelsAboveCell(*regExp, startingTableCell, resultDistance);
460            if (result && [result length] > 0) {
461                if (resultIsInCellAbove)
462                    *resultIsInCellAbove = true;
463                return result;
464            }
465            searchedCellAbove = true;
466        } else if (n->isTextNode() && n->renderer() && n->renderer()->style().visibility() == VISIBLE) {
467            // For each text chunk, run the regexp
468            String nodeString = n->nodeValue();
469            // add 100 for slop, to make it more likely that we'll search whole nodes
470            if (lengthSearched + nodeString.length() > maxCharsSearched)
471                nodeString = nodeString.right(charsSearchedThreshold - lengthSearched);
472            int pos = regExp->searchRev(nodeString);
473            if (pos >= 0) {
474                if (resultDistance)
475                    *resultDistance = lengthSearched;
476                return nodeString.substring(pos, regExp->matchedLength());
477            }
478            lengthSearched += nodeString.length();
479        }
480    }
481
482    // If we started in a cell, but bailed because we found the start of the form or the
483    // previous element, we still might need to search the row above us for a label.
484    if (startingTableCell && !searchedCellAbove) {
485        NSString* result = frame->searchForLabelsAboveCell(*regExp, startingTableCell, resultDistance);
486        if (result && [result length] > 0) {
487            if (resultIsInCellAbove)
488                *resultIsInCellAbove = true;
489            return result;
490        }
491    }
492
493    return nil;
494}
495
496static NSString *matchLabelsAgainstString(NSArray *labels, const String& stringToMatch)
497{
498    if (stringToMatch.isEmpty())
499        return nil;
500
501    String mutableStringToMatch = stringToMatch;
502
503    // Make numbers and _'s in field names behave like word boundaries, e.g., "address2"
504    replace(mutableStringToMatch, RegularExpression("\\d", TextCaseSensitive), " ");
505    mutableStringToMatch.replace('_', ' ');
506
507    RegularExpression* regExp = regExpForLabels(labels);
508    // Use the largest match we can find in the whole string
509    int pos;
510    int length;
511    int bestPos = -1;
512    int bestLength = -1;
513    int start = 0;
514    do {
515        pos = regExp->match(mutableStringToMatch, start);
516        if (pos != -1) {
517            length = regExp->matchedLength();
518            if (length >= bestLength) {
519                bestPos = pos;
520                bestLength = length;
521            }
522            start = pos + 1;
523        }
524    } while (pos != -1);
525
526    if (bestPos != -1)
527        return mutableStringToMatch.substring(bestPos, bestLength);
528    return nil;
529}
530
531static NSString* matchLabelsAgainstElement(NSArray* labels, Element* element)
532{
533    // Match against the name element, then against the id element if no match is found for the name element.
534    // See 7538330 for one popular site that benefits from the id element check.
535    String resultFromNameAttribute = matchLabelsAgainstString(labels, element->getAttribute(nameAttr));
536    if (!resultFromNameAttribute.isEmpty())
537        return resultFromNameAttribute;
538
539    return matchLabelsAgainstString(labels, element->getAttribute(idAttr));
540}
541
542
543- (NSString *)searchForLabels:(NSArray *)labels beforeElement:(DOMElement *)element
544{
545    return [self searchForLabels:labels beforeElement:element resultDistance:0 resultIsInCellAbove:0];
546}
547
548- (NSString *)searchForLabels:(NSArray *)labels beforeElement:(DOMElement *)element resultDistance:(NSUInteger*)outDistance resultIsInCellAbove:(BOOL*)outIsInCellAbove
549{
550    size_t distance;
551    bool isInCellAbove;
552
553    NSString *result = searchForLabelsBeforeElement(core([_private->dataSource webFrame]), labels, core(element), &distance, &isInCellAbove);
554
555    if (outDistance) {
556        if (distance == notFound)
557            *outDistance = NSNotFound;
558        else
559            *outDistance = distance;
560    }
561
562    if (outIsInCellAbove)
563        *outIsInCellAbove = isInCellAbove;
564
565    return result;
566}
567
568- (NSString *)matchLabels:(NSArray *)labels againstElement:(DOMElement *)element
569{
570    return matchLabelsAgainstElement(labels, core(element));
571}
572
573@end
574