49 Matching Annotations
  1. Last 7 days
  2. Feb 2023
  3. Jan 2023
  4. Nov 2022
  5. Sep 2022
    1. ^([^\x00-\x20\x7f"'%<>\\^`{|}]|%[0-9A-Fa-f]{2}|{[+#./;?&=,!@|]?((\w|%[0-9A-Fa-f]{2})(\.?(\w|%[0-9A-Fa-f]{2}))*(:[1-9]\d{0,3}|\*)?)(,((\w|%[0-9A-Fa-f]{2})(\.?(\w|%[0-9A-Fa-f]{2}))*(:[1-9]\d{0,3}|\*)?))*})*$ \ \_____________/ \\____________/ \\__________________/ \__________________/ /\________________/ / \_________________________________________________________________/ // \ pct-encoded / \ operator \\ varchar varchar / modifier-level4 / varspec // \______________________________________/ \ \\________________________________________/ / // literals \ \ varname / // \ \_________________________________________________________/ // \ \ varspec // \ \____________________________________________________________________________________________________________________________// \ variable-list / \_________________________________________________________________________________________________________________________________________/ expression
  6. Aug 2022
  7. Jul 2022
    1. ```python doi_regexp = re.compile( r"(doi:\s|(?:https?://)?(?:dx.)?doi.org/)?(10.\d+(.\d+)/.+)$", flags=re.I ) """See http://en.wikipedia.org/wiki/Digital_object_identifier."""

      handle_regexp = re.compile( r"(hdl:\s|(?:https?://)?hdl.handle.net/)?" r"([^/.]+(.[^/.]+)/.)$", flags=re.I ) """See http://handle.net/rfc/rfc3651.html. <Handle> = <NamingAuthority> "/" <LocalName> <NamingAuthority> = (<NamingAuthority> ".") <NAsegment> <NAsegment> = Any UTF8 char except "/" and "." <LocalName> = Any UTF8 char """

      arxiv_post_2007_regexp = re.compile(r"(arxiv:)?(\d{4}).(\d{4,5})(v\d+)?$", flags=re.I) """See http://arxiv.org/help/arxiv_identifier and http://arxiv.org/help/arxiv_identifier_for_services."""

      arxiv_pre_2007_regexp = re.compile( r"(arxiv:)?([a-z-]+)(.[a-z]{2})?(/\d{4})(\d+)(v\d+)?$", flags=re.I ) """See http://arxiv.org/help/arxiv_identifier and http://arxiv.org/help/arxiv_identifier_for_services."""

      arxiv_post_2007_with_class_regexp = re.compile( r"(arxiv:)?(?:[a-z-]+)(?:.[a-z]{2})?/(\d{4}).(\d{4,5})(v\d+)?$", flags=re.I ) """Matches new style arXiv ID, with an old-style class specification; technically malformed, however appears in real data."""

      hal_regexp = re.compile(r"(hal:|HAL:)?([a-z]{3}[a-z]*-|(sic|mem|ijn)_)\d{8}(v\d+)?$") """Matches HAL identifiers (sic mem and ijn are old identifiers form)."""

      ads_regexp = re.compile(r"(ads:|ADS:)?(\d{4}[A-Za-z]\S{13}[A-Za-z.:])$") """See http://adsabs.harvard.edu/abs_doc/help_pages/data.html"""

      pmcid_regexp = re.compile(r"PMC\d+$", flags=re.I) """PubMed Central ID regular expression."""

      pmid_regexp = re.compile( r"(pmid:|https?://pubmed.ncbi.nlm.nih.gov/)?(\d+)/?$", flags=re.I ) """PubMed ID regular expression."""

      ark_suffix_regexp = re.compile(r"ark:/[0-9bcdfghjkmnpqrstvwxz]+/.+$") """See http://en.wikipedia.org/wiki/Archival_Resource_Key and https://confluence.ucop.edu/display/Curation/ARK."""

      lsid_regexp = re.compile(r"urn:lsid:[^:]+(:[^:]+){2,3}$", flags=re.I) """See http://en.wikipedia.org/wiki/LSID."""

      orcid_urls = ["http://orcid.org/", "https://orcid.org/"]

      gnd_regexp = re.compile( r"(gnd:|GND:)?(" r"(1|10)\d{7}[0-9X]|" r"[47]\d{6}-\d|" r"[1-9]\d{0,7}-[0-9X]|" r"3\d{7}[0-9X]" r")" ) """See https://www.wikidata.org/wiki/Property:P227."""

      gnd_resolver_url = "http://d-nb.info/gnd/"

      sra_regexp = re.compile(r"[SED]R[APRSXZ]\d+$") """Sequence Read Archive regular expression. See https://www.ncbi.nlm.nih.gov/books/NBK56913/#search.what_do_the_different_sra_accessi """

      bioproject_regexp = re.compile(r"PRJ(NA|EA|EB|DB)\d+$") """BioProject regular expression. See https://www.ddbj.nig.ac.jp/bioproject/faq-e.html#project-accession https://www.ebi.ac.uk/ena/submit/project-format https://www.ncbi.nlm.nih.gov/bioproject/docs/faq/#under-what-circumstances-is-it-n """

      biosample_regexp = re.compile(r"SAM(N|EA|D)\d+$") """BioSample regular expression. See https://www.ddbj.nig.ac.jp/biosample/faq-e.html https://ena-docs.readthedocs.io/en/latest/submit/samples/programmatic.html#accession-numbers-in-the-receipt-xml https://www.ncbi.nlm.nih.gov/biosample/docs/submission/faq/ """

      ensembl_regexp = re.compile( r"({prefixes})(E|FM|G|GT|P|R|T)\d{{11}}$".format( prefixes="|".join(ENSEMBL_PREFIXES) ) ) """Ensembl regular expression. See https://asia.ensembl.org/info/genome/stable_ids/prefixes.html """

      uniprot_regexp = re.compile( r"([A-NR-Z]0-9{1,2})|" r"([OPQ][0-9][A-Z0-9]{3}[0-9])(.\d+)?$" ) """UniProt regular expression. See https://www.uniprot.org/help/accession_numbers """

      refseq_regexp = re.compile( r"((AC|NC|NG|NT|NW|NM|NR|XM|XR|AP|NP|YP|XP|WP)|" r"NZ[A-Z]{4})\d+(.\d+)?$" ) """RefSeq regular expression. See https://academic.oup.com/nar/article/44/D1/D733/2502674 (Table 1) """

      genome_regexp = re.compile(r"GC[AF]_\d+.\d+$") """GenBank or RefSeq genome assembly accession. See https://www.ebi.ac.uk/ena/browse/genome-assembly-database """

      geo_regexp = re.compile(r"G(PL|SM|SE|DS)\d+$") """Gene Expression Omnibus (GEO) accession. See https://www.ncbi.nlm.nih.gov/geo/info/overview.html#org """

      arrayexpress_array_regexp = re.compile( r"A-({codes})-\d+$".format(codes="|".join(ARRAYEXPRESS_CODES)) ) """ArrayExpress array accession. See https://www.ebi.ac.uk/arrayexpress/help/accession_codes.html """

      arrayexpress_experiment_regexp = re.compile( r"E-({codes})-\d+$".format(codes="|".join(ARRAYEXPRESS_CODES)) ) """ArrayExpress array accession. See https://www.ebi.ac.uk/arrayexpress/help/accession_codes.html """

      ascl_regexp = re.compile(r"^ascl:[0-9]{4}.[0-9]{3,4}$", flags=re.I) """ASCL regular expression."""

      swh_regexp = re.compile( r"swh:1:(cnt|dir|rel|rev|snp):[0-9a-f]{40}" r"(;(origin|visit|anchor|path|lines)=\S+)*$" ) """Matches Software Heritage identifiers."""

      ror_regexp = re.compile(r"(?:https?://)?(?:ror.org/)?(0\w{6}\d{2})$", flags=re.I) """See https://ror.org/facts/#core-components.""" ```

  8. May 2022
    1. A solution is to add captures for the preceding and following text: str.replace(/(.*name="\w+)(\d+)(\w+".*)/, "$1!NEW_ID!$3")
      • REFERENCIAR GRUPOS DE CAPTURA
  9. Mar 2022
  10. Jan 2022
    1. nstead of using the /regex\d/g syntax, you can construct a new RegExp object: var replace = "regex\\d"; var re = new RegExp(replace,"g");
      • REGEX
      • crear regex a partir de pattern
      • CIUDADO: \b -> \b; \s -> \s en la cadena pattern
    1. Pattern for Local Unique Identifiers Local identifiers in arXiv should match this regular expression:^(\w+(\-\w+)?(\.\w+)?)?\d{4,7}(\.\d+(v\d+)?)?$
      • VALID ONLY for "new" format!!!
      • not valid for hep-th/9108008v1
    2. Pattern for Local Unique Identifiers Local identifiers in arXiv should match this regular expression:^(\w+(\-\w+)?(\.\w+)?)?\d{4,7}(\.\d+(v\d+)?)?$ Example Local Unique Identifier 0807.4956v1   Resolve Pattern for CURIES Compact URIs (CURIEs) constructed from arXiv should match this regular expression:^arxiv:(\w+(\-\w+)?(\.\w+)?)?\d{4,7}(\.\d+(v\d+)?)?$ Example CURIE arxiv:0807.4956v1
      • REGEX ARXIV
  11. Dec 2021
  12. Nov 2021
  13. Oct 2021
  14. May 2020
  15. Feb 2020
    1. A combinaison of split(), subString(), removePrefix(), removeSuffix() is usually enough.

      Sometimes the following functions are more than enough for your string matching problems

  16. Nov 2019
  17. Oct 2019
    1. Really useful page for generating regexes of ip ranges. Note they are missing some parenthesis in places though.

  18. Sep 2019
  19. Mar 2019
  20. Nov 2018
    1. /^10.\d{4,9}/[-._;()/:A-Z0-9]+$/i

      Actually, it'd be better to express this as /^10.\d{4,9}/[-._;()/:a-zA-Z0-9]+$i (adding lowercase letters a-z, instead of using the case insensitivity flag "i") to avoid compatibility issues with certain regex parsers

  21. Oct 2018
  22. Sep 2018
  23. Jun 2017
    1. It will then backtrack from the end until it reaches the first space.

      so greedy is evil? since it'll backtrack form the end!

    2. The faster you can throw out non-matching input, the fewer cycles you waste

      but in log analysis field, almost every line are machted, we'just want to use regex to extract fields inside the line. Is the "the longer the better" still matter?

    1. Character classes Possessive quantifiers (and atomic groups) Lazy quantifiers Anchors and boundaries Optimizing regex order
  24. Apr 2017
  25. Nov 2016
    1. Interesting dive into how string slicing with String#substring is implemented in the Dart and V8 VMs and the performance consequences of that. This investigation was prompted by poor performance of a port of less.js lexer to Dart vs. the original JS implementation.

      The article ends with benchmarks showing the cost of trying to match sequences of characters in a lexer using a regex vs. manually.

    2. A person with a bit more insight into RegExp features might come up with the following optimization:

      Neat trick for matching regular expressions within a string starting at a fixed position using pre-ES6 features:

      1. Create a regex with the global flag set which matches pattern|() where () is an irrefutable pattern which is guaranteed to match
      2. Set regex.lastIndex to the position you want to match at
      3. Use regex.exec(str)
    3. match can be easily implemented in any modern JavaScript interpreter that supports sticky RegExp flag introduced in ES6:

      Notes on how to match a regex starting at a given position in a string, making use of the sticky flag introduced in ES6.