且构网

分享程序员开发的那些事...
且构网 - 分享程序员编程开发的那些事

使用NSInputStream流式传输NSXMLParser

更新时间:2022-02-03 22:19:12

- [NSXMLParser initWithStream:] 是唯一的界面到目前执行数据流式解析的 NSXMLParser 。将其连接到以递增方式提供数据的异步 NSURLConnection 是不实用的,因为 NSXMLParser 采用阻塞,拉为基础从 NSInputStream 读取的方法。也就是说, - [NSXMLParser parse] 在处理 NSInputStream 时会执行以下操作:

-[NSXMLParser initWithStream:] is the only interface to NSXMLParser that currently performs a streaming parse of the data. Hooking it up to an asynchronous NSURLConnection that's providing data incrementally is unwieldy because NSXMLParser takes a blocking, "pull"-based approach to reading from the NSInputStream. That is, -[NSXMLParser parse] does something like the following when dealing with an NSInputStream:

while (1) {
    NSInteger length = [stream read:buffer maxLength:maxLength];
    if (!length)
        break;

    // Parse data …
}

为了逐步向此解析器提供数据需要一个自定义 NSInputStream 子类,用于汇总由后台队列上的 NSURLConnectionDelegate 调用接收的数据或者runloop到 -read:maxLength:调用 NSXMLParser 等待。

In order to incrementally provide data to this parser a custom NSInputStream subclass is needed that funnels data received by the NSURLConnectionDelegate calls on a background queue or runloop over to the -read:maxLength: call that NSXMLParser is waiting on.

概念验证实现如下:

#include <Foundation/Foundation.h>

@interface ReceivedDataStream : NSInputStream <NSURLConnectionDelegate>
@property (retain) NSURLConnection *connection;
@property (retain) NSMutableArray *bufferedData;
@property (assign, getter=isFinished) BOOL finished;
@property (retain) dispatch_semaphore_t semaphore;
@end

@implementation ReceivedDataStream

- (id)initWithContentsOfURL:(NSURL *)url
{
    if (!(self = [super init]))
        return nil;

    NSURLRequest *request = [NSURLRequest requestWithURL:url];
    self.connection = [[[NSURLConnection alloc] initWithRequest:request delegate:self startImmediately:NO] autorelease];
    self.connection.delegateQueue = [[[NSOperationQueue alloc] init] autorelease];
    self.bufferedData = [NSMutableArray array];
    self.semaphore = dispatch_semaphore_create(0);

    return self;
}

- (void)dealloc
{
    self.connection = nil;
    self.bufferedData = nil;
    self.semaphore = nil;

    [super dealloc];
}

- (BOOL)hasBufferedData
{
    @synchronized (self) { return self.bufferedData.count > 0; }
}

#pragma mark - NSInputStream overrides

- (void)open
{
    NSLog(@"open");
    [self.connection start];
}

- (void)close
{
    NSLog(@"close");
    [self.connection cancel];
}

- (NSInteger)read:(uint8_t *)buffer maxLength:(NSUInteger)maxLength
{
    NSLog(@"read:%p maxLength:%ld", buffer, maxLength);
    if (self.isFinished && !self.hasBufferedData)
        return 0;

    if (!self.hasBufferedData)
        dispatch_semaphore_wait(self.semaphore, DISPATCH_TIME_FOREVER);

    NSAssert(self.isFinished || self.hasBufferedData, @"Was woken without new information");

    if (self.isFinished && !self.hasBufferedData)
        return 0;

    NSData *data = nil;
    @synchronized (self) {
        data = [[self.bufferedData[0] retain] autorelease];
        [self.bufferedData removeObjectAtIndex:0];
        if (data.length > maxLength) {
            NSData *remainingData = [NSData dataWithBytes:data.bytes + maxLength length:data.length - maxLength];
            [self.bufferedData insertObject:remainingData atIndex:0];
        }
    }

    NSUInteger copiedLength = MIN([data length], maxLength);
    memcpy(buffer, [data bytes], copiedLength);
    return copiedLength;
}


#pragma mark - NSURLConnetionDelegate methods

- (void)connection:(NSURLConnection *)connection didReceiveData:(NSData *)data
{
    NSLog(@"connection:%@ didReceiveData:…", connection);
    @synchronized (self) {
        [self.bufferedData addObject:data];
    }
    dispatch_semaphore_signal(self.semaphore);
}

- (void)connectionDidFinishLoading:(NSURLConnection *)connection
{
    NSLog(@"connectionDidFinishLoading:%@", connection);
    self.finished = YES;
    dispatch_semaphore_signal(self.semaphore);
}

@end

@interface ParserDelegate : NSObject <NSXMLParserDelegate>
@end

@implementation ParserDelegate

- (void)parser:(NSXMLParser *)parser didStartElement:(NSString *)elementName namespaceURI:(NSString *)namespaceURI qualifiedName:(NSString *)qualifiedName attributes:(NSDictionary *)attributeDict
{
    NSLog(@"parser:%@ didStartElement:%@ namespaceURI:%@ qualifiedName:%@ attributes:%@", parser, elementName, namespaceURI, qualifiedName, attributeDict);
}

- (void)parserDidEndDocument:(NSXMLParser *)parser
{
    NSLog(@"parserDidEndDocument:%@", parser);
    CFRunLoopStop(CFRunLoopGetCurrent());
}

@end


int main(int argc, char **argv)
{
    @autoreleasepool {

        NSURL *url = [NSURL URLWithString:@"http://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xml"];
        ReceivedDataStream *stream = [[ReceivedDataStream alloc] initWithContentsOfURL:url];
        NSXMLParser *parser = [[NSXMLParser alloc] initWithStream:stream];
        parser.delegate = [[[ParserDelegate alloc] init] autorelease];

        [parser performSelector:@selector(parse) withObject:nil afterDelay:0.0];

        CFRunLoopRun();

    }
    return 0;
}