ir_datasets
: CodeSearchNetA benchmark for semantic code search. Uses
Language: multiple/other/unknown
Example
import ir_datasets
dataset = ir_datasets.load('codesearchnet')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, repo, path, func_name, code, language>
Official challenge set, with keyword queries and deep relevance assessments.
Language: multiple/other/unknown
Example
import ir_datasets
dataset = ir_datasets.load('codesearchnet/challenge')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: multiple/other/unknown
Example
import ir_datasets
dataset = ir_datasets.load('codesearchnet/challenge')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, repo, path, func_name, code, language>
Relevance levels
Rel. | Definition |
---|---|
0 | Irrelevant |
1 | Weak Match |
2 | String Match |
3 | Exact Match |
Example
import ir_datasets
dataset = ir_datasets.load('codesearchnet/challenge')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, note>
Official test set, using queries inferred from docstrings.
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('codesearchnet/test')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: multiple/other/unknown
Example
import ir_datasets
dataset = ir_datasets.load('codesearchnet/test')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, repo, path, func_name, code, language>
Relevance levels
Rel. | Definition |
---|---|
1 | Matches docstring |
Example
import ir_datasets
dataset = ir_datasets.load('codesearchnet/test')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Official train set, using queries inferred from docstrings.
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('codesearchnet/train')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: multiple/other/unknown
Example
import ir_datasets
dataset = ir_datasets.load('codesearchnet/train')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, repo, path, func_name, code, language>
Relevance levels
Rel. | Definition |
---|---|
1 | Matches docstring |
Example
import ir_datasets
dataset = ir_datasets.load('codesearchnet/train')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>
Official validation set, using queries inferred from docstrings.
Language: en
Example
import ir_datasets
dataset = ir_datasets.load('codesearchnet/valid')
for query in dataset.queries_iter():
query # namedtuple<query_id, text>
Language: multiple/other/unknown
Example
import ir_datasets
dataset = ir_datasets.load('codesearchnet/valid')
for doc in dataset.docs_iter():
doc # namedtuple<doc_id, repo, path, func_name, code, language>
Relevance levels
Rel. | Definition |
---|---|
1 | Matches docstring |
Example
import ir_datasets
dataset = ir_datasets.load('codesearchnet/valid')
for qrel in dataset.qrels_iter():
qrel # namedtuple<query_id, doc_id, relevance, iteration>