osci/preprocess/match_company/match.py (11 lines of code) (raw):
"""Copyright since 2019, EPAM Systems
This file is part of OSCI.
OSCI is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OSCI is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OSCI. If not, see <http://www.gnu.org/licenses/>."""
from typing import Optional
from .company_domain_matcher import CompanyDomainMatcher
import re
EMAIL_REGEXP = r'^[a-zA-Z0-9_.+-]+@(?P<domain>[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)$'
def match_company_by_email(email: str) -> Optional[str]:
email_match = re.match(EMAIL_REGEXP, email)
if email_match:
return CompanyDomainMatcher().match_company_by_domain(domain=email_match.group('domain'))
return
if __name__ == '__main__':
cli()