API

类型

主要类型

type Dictionary<T = any> = {
    [index: string]: T,
}

type Config = {
    maxNestedLevel?: number;

    fields?: Field;

    export?: ExportConfig;
}

type ExportConfig = {
    name: string;
}

type Field = Dictionary<Query>

type Query = string | {
    areaType?: string,
    area?: Selector,
    type?: string,
    selector?: Selector,
    fields?: Field,
    repeated?: boolean,
    recursive?: boolean,
    callback?: ((field: any) => any),
} | CallbackSelector

type CallbackSelector<I = any, O = any> = (content: I) => O | O[]

type Selector = string | RegExp | CallbackSelector

UrlMatch

type UrlAbsoluteMatch = string

type UrlRegexMatch = RegExp

type UrlCallbackMatch = (url: string) => boolean

type UrlMatch = UrlAbsoluteMatch | UrlRegexMatch | UrlCallbackMatch

方法

Extractor<I, O>

方法名说明
registerExtractor(name: string, extractor: IExtractor): void注册一个提取器
registerExporter(name: string, exporter: IExporter): void注册一个导出器
findExtractor(name: string): IExtractor<any, I, O>从已注册的提取器列表中获取指定名字的提取器
findExporter(name: string): IExporter<O>从已注册的导出器列表中获取指定名字的导出器
findUrls(content: string): string[]使用css:a[href] @href选择器从content中提取出所有的url
isUrlMatch(url: string, patterns: UrlMatch[]): boolean判断指定的url是否满足任一UrlMatch模式
parseSelector(selector: string): { type: string, selector: string }解析字符串形式的TypedSelector
fetchSingleField(type: string, selector: any, content: I): O使用指定的选择器提取单个字段
fetchRepeatedField(type: string, selector: any, content: I): O[]使用指定的选择器提取单个重复字段
extract(content: I, fields?: Field): Ocontent中提取多个字段
export(data: O, name?: string): void使用指定的或默认导出器导出数据

IExtractor<T, I, O>

方法名说明
extractOne(selector: T, content: I): O提取单个字段
extractAll(selector: T, content: I): O[]提取单个重复字段

Util

方法名说明
simpleCloneDeep<T>(data: T): T深拷贝(仅支持基本可序列化类型)
isEmpty(obj: object | any[]): boolean判断obj是否为空数组或空对象

IExporter

方法名说明
export(data: T, config: ExportConfig): void导出数据