- Jul 2022
-
-
```python doi_regexp = re.compile( r"(doi:\s|(?:https?://)?(?:dx.)?doi.org/)?(10.\d+(.\d+)/.+)$", flags=re.I ) """See http://en.wikipedia.org/wiki/Digital_object_identifier."""
handle_regexp = re.compile( r"(hdl:\s|(?:https?://)?hdl.handle.net/)?" r"([^/.]+(.[^/.]+)/.)$", flags=re.I ) """See http://handle.net/rfc/rfc3651.html. <Handle> = <NamingAuthority> "/" <LocalName> <NamingAuthority> = (<NamingAuthority> ".") <NAsegment> <NAsegment> = Any UTF8 char except "/" and "." <LocalName> = Any UTF8 char """
arxiv_post_2007_regexp = re.compile(r"(arxiv:)?(\d{4}).(\d{4,5})(v\d+)?$", flags=re.I) """See http://arxiv.org/help/arxiv_identifier and http://arxiv.org/help/arxiv_identifier_for_services."""
arxiv_pre_2007_regexp = re.compile( r"(arxiv:)?([a-z-]+)(.[a-z]{2})?(/\d{4})(\d+)(v\d+)?$", flags=re.I ) """See http://arxiv.org/help/arxiv_identifier and http://arxiv.org/help/arxiv_identifier_for_services."""
arxiv_post_2007_with_class_regexp = re.compile( r"(arxiv:)?(?:[a-z-]+)(?:.[a-z]{2})?/(\d{4}).(\d{4,5})(v\d+)?$", flags=re.I ) """Matches new style arXiv ID, with an old-style class specification; technically malformed, however appears in real data."""
hal_regexp = re.compile(r"(hal:|HAL:)?([a-z]{3}[a-z]*-|(sic|mem|ijn)_)\d{8}(v\d+)?$") """Matches HAL identifiers (sic mem and ijn are old identifiers form)."""
ads_regexp = re.compile(r"(ads:|ADS:)?(\d{4}[A-Za-z]\S{13}[A-Za-z.:])$") """See http://adsabs.harvard.edu/abs_doc/help_pages/data.html"""
pmcid_regexp = re.compile(r"PMC\d+$", flags=re.I) """PubMed Central ID regular expression."""
pmid_regexp = re.compile( r"(pmid:|https?://pubmed.ncbi.nlm.nih.gov/)?(\d+)/?$", flags=re.I ) """PubMed ID regular expression."""
ark_suffix_regexp = re.compile(r"ark:/[0-9bcdfghjkmnpqrstvwxz]+/.+$") """See http://en.wikipedia.org/wiki/Archival_Resource_Key and https://confluence.ucop.edu/display/Curation/ARK."""
lsid_regexp = re.compile(r"urn:lsid:[^:]+(:[^:]+){2,3}$", flags=re.I) """See http://en.wikipedia.org/wiki/LSID."""
orcid_urls = ["http://orcid.org/", "https://orcid.org/"]
gnd_regexp = re.compile( r"(gnd:|GND:)?(" r"(1|10)\d{7}[0-9X]|" r"[47]\d{6}-\d|" r"[1-9]\d{0,7}-[0-9X]|" r"3\d{7}[0-9X]" r")" ) """See https://www.wikidata.org/wiki/Property:P227."""
gnd_resolver_url = "http://d-nb.info/gnd/"
sra_regexp = re.compile(r"[SED]R[APRSXZ]\d+$") """Sequence Read Archive regular expression. See https://www.ncbi.nlm.nih.gov/books/NBK56913/#search.what_do_the_different_sra_accessi """
bioproject_regexp = re.compile(r"PRJ(NA|EA|EB|DB)\d+$") """BioProject regular expression. See https://www.ddbj.nig.ac.jp/bioproject/faq-e.html#project-accession https://www.ebi.ac.uk/ena/submit/project-format https://www.ncbi.nlm.nih.gov/bioproject/docs/faq/#under-what-circumstances-is-it-n """
biosample_regexp = re.compile(r"SAM(N|EA|D)\d+$") """BioSample regular expression. See https://www.ddbj.nig.ac.jp/biosample/faq-e.html https://ena-docs.readthedocs.io/en/latest/submit/samples/programmatic.html#accession-numbers-in-the-receipt-xml https://www.ncbi.nlm.nih.gov/biosample/docs/submission/faq/ """
ensembl_regexp = re.compile( r"({prefixes})(E|FM|G|GT|P|R|T)\d{{11}}$".format( prefixes="|".join(ENSEMBL_PREFIXES) ) ) """Ensembl regular expression. See https://asia.ensembl.org/info/genome/stable_ids/prefixes.html """
uniprot_regexp = re.compile( r"([A-NR-Z]0-9{1,2})|" r"([OPQ][0-9][A-Z0-9]{3}[0-9])(.\d+)?$" ) """UniProt regular expression. See https://www.uniprot.org/help/accession_numbers """
refseq_regexp = re.compile( r"((AC|NC|NG|NT|NW|NM|NR|XM|XR|AP|NP|YP|XP|WP)|" r"NZ[A-Z]{4})\d+(.\d+)?$" ) """RefSeq regular expression. See https://academic.oup.com/nar/article/44/D1/D733/2502674 (Table 1) """
genome_regexp = re.compile(r"GC[AF]_\d+.\d+$") """GenBank or RefSeq genome assembly accession. See https://www.ebi.ac.uk/ena/browse/genome-assembly-database """
geo_regexp = re.compile(r"G(PL|SM|SE|DS)\d+$") """Gene Expression Omnibus (GEO) accession. See https://www.ncbi.nlm.nih.gov/geo/info/overview.html#org """
arrayexpress_array_regexp = re.compile( r"A-({codes})-\d+$".format(codes="|".join(ARRAYEXPRESS_CODES)) ) """ArrayExpress array accession. See https://www.ebi.ac.uk/arrayexpress/help/accession_codes.html """
arrayexpress_experiment_regexp = re.compile( r"E-({codes})-\d+$".format(codes="|".join(ARRAYEXPRESS_CODES)) ) """ArrayExpress array accession. See https://www.ebi.ac.uk/arrayexpress/help/accession_codes.html """
ascl_regexp = re.compile(r"^ascl:[0-9]{4}.[0-9]{3,4}$", flags=re.I) """ASCL regular expression."""
swh_regexp = re.compile( r"swh:1:(cnt|dir|rel|rev|snp):[0-9a-f]{40}" r"(;(origin|visit|anchor|path|lines)=\S+)*$" ) """Matches Software Heritage identifiers."""
ror_regexp = re.compile(r"(?:https?://)?(?:ror.org/)?(0\w{6}\d{2})$", flags=re.I) """See https://ror.org/facts/#core-components.""" ```
-