【发布时间】:2019-09-12 17:53:28
【问题描述】:
这是我第一次尝试 spacy。 我有一个spacy训练数据,格式如下。
[
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"Michael",
"tag":"-",
"ner":"U-PER"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"Irwin",
"tag":"-",
"ner":"U-PER"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"Jordan",
"tag":"-",
"ner":"U-PER"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"is",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"an",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"American",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"scientist",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"Professor",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"at",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"the",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"University",
"tag":"-",
"ner":"U-ORG"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"of",
"tag":"-",
"ner":"U-ORG"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"California",
"tag":"-",
"ner":"U-ORG"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"Berkeley",
"tag":"-",
"ner":"U-LOC"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"and",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"a",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"researcher",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"in",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"machine",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"learning",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"statistics",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"and",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"artificial",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"intelligence",
"tag":"-",
"ner":"O"
}
]
}
]
}
]
},
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"orth":"",
"tag":"",
"ner":"O"
}
]
}
]
}
]
}
]
到目前为止,我看到的所有训练 spacy 模型 (https://spacy.io/usage/training#spacy-train-cli) 的示例都适用于以下类型的输入
有人可以举个例子来训练第一种形式的 sapcy 输入
【问题讨论】:
标签: python-3.x spacy