```python
doi_regexp = re.compile(
r"(doi:\s|(?:https?://)?(?:dx.)?doi.org/)?(10.\d+(.\d+)/.+)$", flags=re.I
)
"""See http://en.wikipedia.org/wiki/Digital_object_identifier."""
handle_regexp = re.compile(
r"(hdl:\s|(?:https?://)?hdl.handle.net/)?" r"([^/.]+(.[^/.]+)/.)$", flags=re.I
)
"""See http://handle.net/rfc/rfc3651.html.
<Handle> = <NamingAuthority> "/" <LocalName>
<NamingAuthority> = (<NamingAuthority> ".") <NAsegment>
<NAsegment> = Any UTF8 char except "/" and "."
<LocalName> = Any UTF8 char
"""
arxiv_post_2007_regexp = re.compile(r"(arxiv:)?(\d{4}).(\d{4,5})(v\d+)?$", flags=re.I)
"""See http://arxiv.org/help/arxiv_identifier and
http://arxiv.org/help/arxiv_identifier_for_services."""
arxiv_pre_2007_regexp = re.compile(
r"(arxiv:)?([a-z-]+)(.[a-z]{2})?(/\d{4})(\d+)(v\d+)?$", flags=re.I
)
"""See http://arxiv.org/help/arxiv_identifier and
http://arxiv.org/help/arxiv_identifier_for_services."""
arxiv_post_2007_with_class_regexp = re.compile(
r"(arxiv:)?(?:[a-z-]+)(?:.[a-z]{2})?/(\d{4}).(\d{4,5})(v\d+)?$", flags=re.I
)
"""Matches new style arXiv ID, with an old-style class specification;
technically malformed, however appears in real data."""
hal_regexp = re.compile(r"(hal:|HAL:)?([a-z]{3}[a-z]*-|(sic|mem|ijn)_)\d{8}(v\d+)?$")
"""Matches HAL identifiers (sic mem and ijn are old identifiers form)."""
ads_regexp = re.compile(r"(ads:|ADS:)?(\d{4}[A-Za-z]\S{13}[A-Za-z.:])$")
"""See http://adsabs.harvard.edu/abs_doc/help_pages/data.html"""
pmcid_regexp = re.compile(r"PMC\d+$", flags=re.I)
"""PubMed Central ID regular expression."""
pmid_regexp = re.compile(
r"(pmid:|https?://pubmed.ncbi.nlm.nih.gov/)?(\d+)/?$", flags=re.I
)
"""PubMed ID regular expression."""
ark_suffix_regexp = re.compile(r"ark:/[0-9bcdfghjkmnpqrstvwxz]+/.+$")
"""See http://en.wikipedia.org/wiki/Archival_Resource_Key and
https://confluence.ucop.edu/display/Curation/ARK."""
lsid_regexp = re.compile(r"urn:lsid:[^:]+(:[^:]+){2,3}$", flags=re.I)
"""See http://en.wikipedia.org/wiki/LSID."""
orcid_urls = ["http://orcid.org/", "https://orcid.org/"]
gnd_regexp = re.compile(
r"(gnd:|GND:)?("
r"(1|10)\d{7}[0-9X]|"
r"[47]\d{6}-\d|"
r"[1-9]\d{0,7}-[0-9X]|"
r"3\d{7}[0-9X]"
r")"
)
"""See https://www.wikidata.org/wiki/Property:P227."""
gnd_resolver_url = "http://d-nb.info/gnd/"
sra_regexp = re.compile(r"[SED]R[APRSXZ]\d+$")
"""Sequence Read Archive regular expression.
See
https://www.ncbi.nlm.nih.gov/books/NBK56913/#search.what_do_the_different_sra_accessi
"""
bioproject_regexp = re.compile(r"PRJ(NA|EA|EB|DB)\d+$")
"""BioProject regular expression.
See https://www.ddbj.nig.ac.jp/bioproject/faq-e.html#project-accession
https://www.ebi.ac.uk/ena/submit/project-format
https://www.ncbi.nlm.nih.gov/bioproject/docs/faq/#under-what-circumstances-is-it-n
"""
biosample_regexp = re.compile(r"SAM(N|EA|D)\d+$")
"""BioSample regular expression.
See https://www.ddbj.nig.ac.jp/biosample/faq-e.html
https://ena-docs.readthedocs.io/en/latest/submit/samples/programmatic.html#accession-numbers-in-the-receipt-xml
https://www.ncbi.nlm.nih.gov/biosample/docs/submission/faq/
"""
ensembl_regexp = re.compile(
r"({prefixes})(E|FM|G|GT|P|R|T)\d{{11}}$".format(
prefixes="|".join(ENSEMBL_PREFIXES)
)
)
"""Ensembl regular expression.
See https://asia.ensembl.org/info/genome/stable_ids/prefixes.html
"""
uniprot_regexp = re.compile(
r"([A-NR-Z]0-9{1,2})|"
r"([OPQ][0-9][A-Z0-9]{3}[0-9])(.\d+)?$"
)
"""UniProt regular expression.
See https://www.uniprot.org/help/accession_numbers
"""
refseq_regexp = re.compile(
r"((AC|NC|NG|NT|NW|NM|NR|XM|XR|AP|NP|YP|XP|WP)|" r"NZ[A-Z]{4})\d+(.\d+)?$"
)
"""RefSeq regular expression.
See https://academic.oup.com/nar/article/44/D1/D733/2502674 (Table 1)
"""
genome_regexp = re.compile(r"GC[AF]_\d+.\d+$")
"""GenBank or RefSeq genome assembly accession.
See https://www.ebi.ac.uk/ena/browse/genome-assembly-database
"""
geo_regexp = re.compile(r"G(PL|SM|SE|DS)\d+$")
"""Gene Expression Omnibus (GEO) accession.
See https://www.ncbi.nlm.nih.gov/geo/info/overview.html#org
"""
arrayexpress_array_regexp = re.compile(
r"A-({codes})-\d+$".format(codes="|".join(ARRAYEXPRESS_CODES))
)
"""ArrayExpress array accession.
See https://www.ebi.ac.uk/arrayexpress/help/accession_codes.html
"""
arrayexpress_experiment_regexp = re.compile(
r"E-({codes})-\d+$".format(codes="|".join(ARRAYEXPRESS_CODES))
)
"""ArrayExpress array accession.
See https://www.ebi.ac.uk/arrayexpress/help/accession_codes.html
"""
ascl_regexp = re.compile(r"^ascl:[0-9]{4}.[0-9]{3,4}$", flags=re.I)
"""ASCL regular expression."""
swh_regexp = re.compile(
r"swh:1:(cnt|dir|rel|rev|snp):[0-9a-f]{40}"
r"(;(origin|visit|anchor|path|lines)=\S+)*$"
)
"""Matches Software Heritage identifiers."""
ror_regexp = re.compile(r"(?:https?://)?(?:ror.org/)?(0\w{6}\d{2})$", flags=re.I)
"""See https://ror.org/facts/#core-components."""
```