ir_datasets
: WikIRA suite of IR benchmarks in multiple languages built from Wikipeida.
A small version of WikIR for English.
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en1k')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Test set of wikir/en1k. Scoreddocs are the provided BM25 run.
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en1k/test')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en1k/test')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Relevance levels
Rel. | Definition |
---|---|
0 | Otherwise |
1 | There is a link to the article with the query as its title in the first sentence |
2 | Query is the article title |
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en1k/test')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en1k/test')
for scoreddoc in dataset.scoreddocs_iter():
scoreddoc # namedtuple<query_id, doc_id, score>
Training set of wikir/en1k. Scoreddocs are the provided BM25 run.
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en1k/training')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en1k/training')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Relevance levels
Rel. | Definition |
---|---|
0 | Otherwise |
1 | There is a link to the article with the query as its title in the first sentence |
2 | Query is the article title |
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en1k/training')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en1k/training')
for scoreddoc in dataset.scoreddocs_iter():
scoreddoc # namedtuple<query_id, doc_id, score>
Validation set of wikir/en1k. Scoreddocs are the provided BM25 run.
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en1k/validation')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en1k/validation')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Relevance levels
Rel. | Definition |
---|---|
0 | Otherwise |
1 | There is a link to the article with the query as its title in the first sentence |
2 | Query is the article title |
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en1k/validation')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en1k/validation')
for scoreddoc in dataset.scoreddocs_iter():
scoreddoc # namedtuple<query_id, doc_id, score>
WikIR for English.
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en59k')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Test set of wikir/en59k. Scoreddocs are the provided BM25 run.
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en59k/test')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en59k/test')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Relevance levels
Rel. | Definition |
---|---|
0 | Otherwise |
1 | There is a link to the article with the query as its title in the first sentence |
2 | Query is the article title |
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en59k/test')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en59k/test')
for scoreddoc in dataset.scoreddocs_iter():
scoreddoc # namedtuple<query_id, doc_id, score>
Training set of wikir/en59k. Scoreddocs are the provided BM25 run.
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en59k/training')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en59k/training')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Relevance levels
Rel. | Definition |
---|---|
0 | Otherwise |
1 | There is a link to the article with the query as its title in the first sentence |
2 | Query is the article title |
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en59k/training')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en59k/training')
for scoreddoc in dataset.scoreddocs_iter():
scoreddoc # namedtuple<query_id, doc_id, score>
Validation set of wikir/en59k. Scoreddocs are the provided BM25 run.
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en59k/validation')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en59k/validation')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Relevance levels
Rel. | Definition |
---|---|
0 | Otherwise |
1 | There is a link to the article with the query as its title in the first sentence |
2 | Query is the article title |
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en59k/validation')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Example
import ir_datasets
dataset = ir_datasets.load('wikir/en59k/validation')
for scoreddoc in dataset.scoreddocs_iter():
scoreddoc # namedtuple<query_id, doc_id, score>
WikIR for Spanish.
Language: es
Example
import ir_datasets
dataset = ir_datasets.load('wikir/es13k')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Test set of wikir/es13k. Scoreddocs are the provided BM25 run.
Language: es
Example
import ir_datasets
dataset = ir_datasets.load('wikir/es13k/test')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: es
Example
import ir_datasets
dataset = ir_datasets.load('wikir/es13k/test')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Relevance levels
Rel. | Definition |
---|---|
0 | Otherwise |
1 | There is a link to the article with the query as its title in the first sentence |
2 | Query is the article title |
Example
import ir_datasets
dataset = ir_datasets.load('wikir/es13k/test')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Example
import ir_datasets
dataset = ir_datasets.load('wikir/es13k/test')
for scoreddoc in dataset.scoreddocs_iter():
scoreddoc # namedtuple<query_id, doc_id, score>
Training set of wikir/es13k. Scoreddocs are the provided BM25 run.
Language: es
Example
import ir_datasets
dataset = ir_datasets.load('wikir/es13k/training')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: es
Example
import ir_datasets
dataset = ir_datasets.load('wikir/es13k/training')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Relevance levels
Rel. | Definition |
---|---|
0 | Otherwise |
1 | There is a link to the article with the query as its title in the first sentence |
2 | Query is the article title |
Example
import ir_datasets
dataset = ir_datasets.load('wikir/es13k/training')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Example
import ir_datasets
dataset = ir_datasets.load('wikir/es13k/training')
for scoreddoc in dataset.scoreddocs_iter():
scoreddoc # namedtuple<query_id, doc_id, score>
Validation set of wikir/es13k. Scoreddocs are the provided BM25 run.
Language: es
Example
import ir_datasets
dataset = ir_datasets.load('wikir/es13k/validation')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: es
Example
import ir_datasets
dataset = ir_datasets.load('wikir/es13k/validation')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Relevance levels
Rel. | Definition |
---|---|
0 | Otherwise |
1 | There is a link to the article with the query as its title in the first sentence |
2 | Query is the article title |
Example
import ir_datasets
dataset = ir_datasets.load('wikir/es13k/validation')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Example
import ir_datasets
dataset = ir_datasets.load('wikir/es13k/validation')
for scoreddoc in dataset.scoreddocs_iter():
scoreddoc # namedtuple<query_id, doc_id, score>
WikIR for French.
Language: fr
Example
import ir_datasets
dataset = ir_datasets.load('wikir/fr14k')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Test set of wikir/fr14k. Scoreddocs are the provided BM25 run.
Language: fr
Example
import ir_datasets
dataset = ir_datasets.load('wikir/fr14k/test')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: fr
Example
import ir_datasets
dataset = ir_datasets.load('wikir/fr14k/test')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Relevance levels
Rel. | Definition |
---|---|
0 | Otherwise |
1 | There is a link to the article with the query as its title in the first sentence |
2 | Query is the article title |
Example
import ir_datasets
dataset = ir_datasets.load('wikir/fr14k/test')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Example
import ir_datasets
dataset = ir_datasets.load('wikir/fr14k/test')
for scoreddoc in dataset.scoreddocs_iter():
scoreddoc # namedtuple<query_id, doc_id, score>
Training set of wikir/fr14k. Scoreddocs are the provided BM25 run.
Language: fr
Example
import ir_datasets
dataset = ir_datasets.load('wikir/fr14k/training')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: fr
Example
import ir_datasets
dataset = ir_datasets.load('wikir/fr14k/training')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Relevance levels
Rel. | Definition |
---|---|
0 | Otherwise |
1 | There is a link to the article with the query as its title in the first sentence |
2 | Query is the article title |
Example
import ir_datasets
dataset = ir_datasets.load('wikir/fr14k/training')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Example
import ir_datasets
dataset = ir_datasets.load('wikir/fr14k/training')
for scoreddoc in dataset.scoreddocs_iter():
scoreddoc # namedtuple<query_id, doc_id, score>
Validation set of wikir/fr14k. Scoreddocs are the provided BM25 run.
Language: fr
Example
import ir_datasets
dataset = ir_datasets.load('wikir/fr14k/validation')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: fr
Example
import ir_datasets
dataset = ir_datasets.load('wikir/fr14k/validation')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Relevance levels
Rel. | Definition |
---|---|
0 | Otherwise |
1 | There is a link to the article with the query as its title in the first sentence |
2 | Query is the article title |
Example
import ir_datasets
dataset = ir_datasets.load('wikir/fr14k/validation')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Example
import ir_datasets
dataset = ir_datasets.load('wikir/fr14k/validation')
for scoreddoc in dataset.scoreddocs_iter():
scoreddoc # namedtuple<query_id, doc_id, score>
WikIR for Italian.
Language: it
Example
import ir_datasets
dataset = ir_datasets.load('wikir/it16k')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Test set of wikir/it16k. Scoreddocs are the provided BM25 run.
Language: it
Example
import ir_datasets
dataset = ir_datasets.load('wikir/it16k/test')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: it
Example
import ir_datasets
dataset = ir_datasets.load('wikir/it16k/test')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Relevance levels
Rel. | Definition |
---|---|
0 | Otherwise |
1 | There is a link to the article with the query as its title in the first sentence |
2 | Query is the article title |
Example
import ir_datasets
dataset = ir_datasets.load('wikir/it16k/test')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Example
import ir_datasets
dataset = ir_datasets.load('wikir/it16k/test')
for scoreddoc in dataset.scoreddocs_iter():
scoreddoc # namedtuple<query_id, doc_id, score>
Training set of wikir/it16k. Scoreddocs are the provided BM25 run.
Language: it
Example
import ir_datasets
dataset = ir_datasets.load('wikir/it16k/training')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: it
Example
import ir_datasets
dataset = ir_datasets.load('wikir/it16k/training')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Relevance levels
Rel. | Definition |
---|---|
0 | Otherwise |
1 | There is a link to the article with the query as its title in the first sentence |
2 | Query is the article title |
Example
import ir_datasets
dataset = ir_datasets.load('wikir/it16k/training')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Example
import ir_datasets
dataset = ir_datasets.load('wikir/it16k/training')
for scoreddoc in dataset.scoreddocs_iter():
scoreddoc # namedtuple<query_id, doc_id, score>
Validation set of wikir/it16k. Scoreddocs are the provided BM25 run.
Language: it
Example
import ir_datasets
dataset = ir_datasets.load('wikir/it16k/validation')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: it
Example
import ir_datasets
dataset = ir_datasets.load('wikir/it16k/validation')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, text>
Relevance levels
Rel. | Definition |
---|---|
0 | Otherwise |
1 | There is a link to the article with the query as its title in the first sentence |
2 | Query is the article title |
Example
import ir_datasets
dataset = ir_datasets.load('wikir/it16k/validation')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Example
import ir_datasets
dataset = ir_datasets.load('wikir/it16k/validation')
for scoreddoc in dataset.scoreddocs_iter():
scoreddoc # namedtuple<query_id, doc_id, score>