【发布时间】:2016-01-24 04:00:56
【问题描述】:
在Node.js The Right Way 书中有一个解析RDF 文件的示例。我无法让这个示例按照书中的描述工作。
RDF 解析器是这样的:
"use strict";
const fs = require("fs");
const cheerio = require("cheerio");
module.exports = function(filename, callback) {
fs.readFile(filename, function(err, data) {
if (err) {
return callback(err);
}
let $ = cheerio.load(data.toString());
let collect = function(index, elem) {
return $(elem).text();
};
callback(null, {
_id: $('pgterms\\:ebook').attr('rdf:about').replace('ebooks/', ''),
title: $('dcterms\\:title').text(),
authors: $('pgterms\\:agent pgterms\\:name').map(collect),
subjects: $('[rdf\\:resource$="/LCSH"] ~ rdf\\:value').map(collect)
});
});
};
输入的RDF文件是这样的:
<rdf:RDF>
<pgterms:ebook rdf:about="ebooks/132">
<dcterms:subject>
<rdf:Description>
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
<rdf:value>Military art and science -- Early works to 1800</rdf:value>
<rdf:value>War -- Early works to 1800</rdf:value>
</rdf:Description>
</dcterms:subject>
<dcterms:title>The Art of War</dcterms:title>
</pgterms:ebook>
<pgterms:agent rdf:about="2009/agents/4349">
<pgterms:name>Sunzi (6th cent. BC)</pgterms:name>
</pgterms:agent>
<pgterms:agent rdf:about="2009/agents/5101">
<pgterms:name>Giles, Lionel</pgterms:name>
</pgterms:agent>
</rdf:RDF>
我相信解析器的输出应该是这样的:
{
"_id": "132",
"title": "The Art of War",
"authors": [
"Sunzi (6th cent. BC)",
"Giles, Lionel"
],
"subjects": [
"Military art and science -- Early works to 1800",
"War -- Early works to 1800"
]
}
代码会运行并解析 RDF 文件,但有很多我不熟悉的额外“东西”。我不知道是我没有正确使用示例代码,还是本书的源代码中存在错误。
我用来运行解析器的命令是这样的:
node -e 'require("./lib/rdf-parser.js")("test/pg132.rdf", console.log)'
我得到以下输出而不是上面的预期输出:
null { _id: '132',
title: 'The Art of War',
authors:
{ '0': 'Sunzi (6th cent. BC)',
'1': 'Giles, Lionel',
options:
{ withDomLvl1: true,
normalizeWhitespace: false,
xmlMode: false,
decodeEntities: true },
_root: { '0': [Object], options: [Object], length: 1, _root: [Circular] },
length: 2,
prevObject:
{ '0': [Object],
'1': [Object],
options: [Object],
_root: [Object],
length: 2,
prevObject: [Object] } },
subjects:
{ options:
{ withDomLvl1: true,
normalizeWhitespace: false,
xmlMode: false,
decodeEntities: true },
_root: { '0': [Object], options: [Object], length: 1, _root: [Circular] },
length: 0,
prevObject:
{ options: [Object],
_root: [Object],
length: 0,
prevObject: [Object] } } }
有什么问题?
【问题讨论】:
标签: json xml node.js parsing rdf