sift
sift copied to clipboard
Extract inline citations for Wikipedia corpus
Example output:
print json.dumps(docs.take(1)[-1], indent=2, sort_keys=True)
{
"_id": "en.wikipedia.org/wiki/Tonje_Sagstuen",
"citations": [
{
"offset": 179,
"title": "\"1992 Summer Olympics \u2013 Barcelona, Spain \u2013 Handball\" \"databaseOlympics.com\" (Retrieved on February 12, 2008)",
"type": "simple",
"url": "www.databaseolympics.com/games/gamessport.htm?g=23&sp=HAN"
},
{
"accessdate": "13 December 2013",
"date": "7 June 2012",
"first": "Mina",
"language": "Norwegian",
"last": "Watz",
"offset": 343,
"title": "Ny ansvarlig redakt\u00f8r i OA",
"type": "news"
}
],
"links": [
{
"start": 0,
"stop": 14,
"target": "en.wikipedia.org/wiki/Tonje_Sagstuen"
},
{
"start": 42,
"stop": 51,
"target": "en.wikipedia.org/wiki/L\u00f8renskog"
},
{
"start": 58,
"stop": 67,
"target": "en.wikipedia.org/wiki/Norway"
},
{
"start": 68,
"stop": 81,
"target": "en.wikipedia.org/wiki/Team_handball"
},
{
"start": 145,
"stop": 165,
"target": "en.wikipedia.org/wiki/1992_Summer_Olympics"
},
{
"start": 169,
"stop": 178,
"target": "en.wikipedia.org/wiki/Barcelona"
},
{
"start": 321,
"stop": 341,
"target": "en.wikipedia.org/wiki/Oppland_Arbeiderblad"
}
],
"text": "Tonje Sagstuen (born November 17, 1971 in L\u00f8renskog) is a Norwegian team handball player and Olympic medalist. She received silver medals at the 1992 Summer Olympics in Barcelona. \nTonje Sagstuen played 217 games for the national team during her career, scoring 593 goals.\n\nIn 2012 she became the new editor-in-chief of \"Oppland Arbeiderblad\".\n\nReferences"
}