#!/usr/local/bin/python3.11

################################################################################
##                                                                            ##
##  This file is part of NCrystal (see https://mctools.github.io/ncrystal/)   ##
##                                                                            ##
##  Copyright 2015-2023 NCrystal developers                                   ##
##                                                                            ##
##  Licensed under the Apache License, Version 2.0 (the "License");           ##
##  you may not use this file except in compliance with the License.          ##
##  You may obtain a copy of the License at                                   ##
##                                                                            ##
##      http://www.apache.org/licenses/LICENSE-2.0                            ##
##                                                                            ##
##  Unless required by applicable law or agreed to in writing, software       ##
##  distributed under the License is distributed on an "AS IS" BASIS,         ##
##  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  ##
##  See the License for the specific language governing permissions and       ##
##  limitations under the License.                                            ##
##                                                                            ##
################################################################################

"""

Script which can be used to embed the content of .ncmat files (or actually any
ASCII/UTF8 excoded text files) directly into a C++ library. It does so by
reading the .ncmat files and creating C++ code which keeps the contents of the
files in static strings, and registers those strings with NCrystal, using the
original filename as key. Naturally, those file must be compiled along with the
rest of the C++ library, and the enclosing function must be invoked.

"""

#NB: Important that we do not try to always import NCrystal here, since the
#script is also used from the standard CMake code to embed the data library.

################################################################################################
####### Common code for all NCrystal cmdline scripts needing to import NCrystal modules ########
import sys
pyversion = sys.version_info[0:3]
_minpyversion=(3,6,0)
if pyversion < _minpyversion:
    raise SystemExit('Unsupported python version %i.%i.%i detected (needs %i.%i.%i or later).'%(pyversion+_minpyversion))
import os as _os
os = _os
import pathlib as _pathlib
pathlib = _pathlib

def maybeThisIsConda():
    return ( os.environ.get('CONDA_PREFIX',None) or
             os.path.exists(os.path.join(sys.base_prefix, 'conda-meta')) )

def fixSysPathAndImportNCrystal( *, allowfail = False ):
    thisdir = pathlib.Path( __file__ ).parent
    def extract_cmake_pymodloc():
        p = thisdir / 'ncrystal-config'
        if not p.exists():
            return
        with p.open('rt') as fh:
            for i,l in enumerate(fh):
                if i==30:
                    break
                if l.startswith('#CMAKE_RELPATH_TO_PYMOD:'):
                    l = l[24:].strip()
                    return ( thisdir / l ) if l else None
    pml = extract_cmake_pymodloc()
    hack_syspath = pml and ( pml / 'NCrystal' / '__init__.py' ).exists()
    if hack_syspath:
        sys.path.insert(0,str(pml.absolute().resolve()))
    try:
        import NCrystal
    except ImportError as e:
        if allowfail:
            return
        msg = 'ERROR: Could not import the NCrystal Python module (reason: "%s")'%e
        if maybeThisIsConda():
            msg += ' (if using conda it might help to close your terminal and activate your environment again)'
        elif not hack_syspath:
            msg += ' (perhaps your PYTHONPATH is misconfigured)'
        raise SystemExit(msg)
    return NCrystal

def ncImportSubModule( mainmodule, submodulename ):
    _ = __import__( '%s.%s' % ( mainmodule.__name__, submodulename ) )
    return getattr(_,submodulename)
################################################################################################
import argparse

def parseArgs():
    descr="""

Script which can be used to embed the content of .ncmat files (or actually any
ASCII/UTF8 excoded text files) directly into a C++ library. It does so by
reading the .ncmat files and creating C++ code which keeps the contents of the
files in static strings, and registers those strings with NCrystal, using the
original filename as key. Naturally, those file must be compiled along with the
rest of the C++ library, and the enclosing function must be invoked.

"""
    parser = argparse.ArgumentParser(description=descr)
    parser.add_argument('FILE', type=str, nargs='+',
                        help="""One or more NCMAT (or other text) files. They will be registered with a key equal to their
                        filename (without preceding directory name), which must
                        therefore be unique in the list""")
    parser.add_argument('--full','-f', action='store_true',
                        help="""Unless this option is provided, all comments and excess whitespace will be
                        stripped from the file data.""")
    parser.add_argument('--validate','-v', action='store_true',
                        help="""If specified input files will be validated by confirming that they can be
                        loaded with NCrystal. For this to work, the NCrystal python module must be
                        available.""")
    parser.add_argument("--name",'-n',default='registerNCMATData',type=str,
                        help="""Name of C++ function to create which must be called in order to register the
                        data with NCrystal. If desired, it can contain namespace(s), e.g. a value of
                        "MyNameSpace::myFunction" will create a function "void myFunction()" in the
                        namespace "MyNameSpace.""")
    parser.add_argument("--regfctname",default='NCrystal::registerInMemoryStaticFileData(const std::string&,const char*)',type=str,
                        help="""Name of C++ function used to register string objects with NCrystal.""")
    parser.add_argument("--include",nargs='+',type=str,action='append',
                        help="""One or more extra include statements for the top of the file. The file
                                NCrystal/NCDefs.hh will always be included by default (prevent this by
                                adding a special entry 'no-ncrystal-includes' to the list).""")
    parser.add_argument("--width",'-w',type=int,default=80,
                        help="""Wrap C++ code at this column width. Ignored for text files <~60KB when running with --full.""")
    parser.add_argument('--outfile','-o',type=argparse.FileType('w'),default=sys.stdout,
                        help="Name of output file (default: stdout)")

    args=parser.parse_args()
    if not args.name or ' ' in args.name:
        parser.error('Invalid C++ function name provided to --name')

    filepaths = set()
    bns=set()
    for f in set(args.FILE):
        p=pathlib.Path(f)
        if not p.exists():
            parser.error('File not found: %s'%f)
        p=p.resolve().absolute()
        if p in filepaths:
            parser.error('The same file is specified more than once: %s'%p)
        if p.name in bns:
            parser.error('Filenames without directory part is not unique: %s'%f)
        filepaths.add(p)
        bns.add(p.name)
    args.files = list(sorted(filepaths))
    args.FILE=None

    wmin=30
    wmax=999999
    if args.width>wmax:
        args.width=wmax
    if args.width < wmin:
        parser.error('Out of range value of --width (must be at least %i)'%wmin)

    #flatten args.include, so we get single list with 3 elements from:
    #        --inc 'foobla.hh' --inc '<vector>' Bla/Bla.hh
    args.include = [item for sublist in (args.include or []) for item in sublist]

    return args

def files2cppcode(infiles,outfile,
                  cppfunctionname='registerData',
                  compact=True,
                  compactwidth=140,
                  validatefct=None,
                  extra_includes=None,
                  regfctname='NCrystal::registerInMemoryStaticFileData(const std::string&,const char*)' ):

    if regfctname.startswith('NCrystal::'):
        regfctname = 'NCRYSTAL_NAMESPACE::' + regfctname[len('NCrystal::'):]

    out=['// Code automatically generated by ncrystal_ncmat2cpp','']
    if 'no-ncrystal-includes' in extra_includes:
        extra_includes = [ e for e in extra_includes if e!='no-ncrystal-includes' ]
    else:
        extra_includes.insert(0,'NCrystal/NCDefs.hh')

    for inc in extra_includes:
        if inc.startswith('#include'):
            out.append(inc)
        elif inc.startswith('<'):
            out.append('#include %s'%inc)
        else:
            out.append('#include "%s"'%inc)
    out.append('')

    large_files = False

    def fwddeclare(out,fctname,args_str=''):
        if not '(' in fctname:
            fctname+='()'
        _ = fctname.split('(',1)[0].split('::')
        namespaces,justname = _[0:-1],_[-1]
        if namespaces and namespaces[0]=='NCrystal':
            namespaces[0]='NCRYSTAL_NAMESPACE'
        argssignature='('+fctname.split('(',1)[1]
        tmp=''
        for ns in namespaces:
            tmp += 'namespace %s { '%ns
        tmp += 'void %s%s;'%(justname,argssignature)
        tmp += ' }'*len(namespaces)
        out+=[tmp]
        out+=['']

    fwddeclare(out,regfctname)
    if '::' in cppfunctionname:
        fwddeclare(out,cppfunctionname)

    out+=['void %s()'%cppfunctionname,'{']
    prefix='  '
    seen = set()

    for p in [pathlib.Path(f) for f in infiles]:
        print("ncmat2cpp : Processing %s"%p.name)
        fn=p.name
        assert not fn in seen, "ERROR: Multiple files in input named: %s"%fn
        seen.add(fn)
        if validatefct:
            print("Trying to validate: %s"%fn)
            validatefct(p)
            print('  -> OK')

        def fmtline(line):
            if compact:
                ncmatcfg=None
                if 'NCRYSTALMATCFG[' in line:
                    #special case: preserve NCRYSTALMATCFG
                    _=line.split('NCRYSTALMATCFG[',1)[1]
                    assert not 'NCRYSTALMATCFG' in _, "multiple NCRYSTALMATCFG entries in a single line"
                    assert ']' in _, "NCRYSTALMATCFG[ entry without closing ] bracket"
                    ncmatcfg=_.split(']',1)[0].strip()
                    ncmatcfg.encode('utf8')#Just a check
                line=' '.join(line.split('#',1)[0].split())
                line.encode('ascii')#Just a check
                if ncmatcfg:
                    line += '#NCRYSTALMATCFG[%s]'%ncmatcfg
                if not line:
                    return ''
            else:
                line.encode('utf8')#Just a check
            return line.replace('"',r'\"')+r'\n'

        def as_c_str(strdata):
            try:
                strdata.encode('ascii')
                return '"%s"'%strdata
            except UnicodeEncodeError:
                pass
            try:
                strdata.encode('utf8')
                return 'u8"%s"'%strdata
            except UnicodeEncodeError:
                raise SystemExit('Invalid encoding encountered in input (must be ASCII or UTF8)')

        lines = list(p.read_text().splitlines())
        assert lines,"file was empty: %s"%fn

        #string literals have a limit of 65K in the standard. For such large
        #files we must embed contents in const std::array<std::uint8_t, 12>
        #arrays.
        is_large = ( sum(len(as_c_str(fmtline(line))) for line in lines) > 60000 )

        if is_large:
            large_files=True

        out+= [prefix+"{"]
        out+= [prefix+"  // File %s%s%s"%(fn,
                                          (' (compact form without comments)' if compact else ''),
                                          (' (too large for string literals)' if is_large else ''))]

        if is_large:
            #NB: This could be used for non-text-data as well!
            raw_data_bytes = p.read_bytes()
            out += [ prefix+'  static const std::array<std::uint8_t,%i> rawdata {'%(len(raw_data_bytes)+1)]
            i, n = 0, len(raw_data_bytes)
            n_on_current_line = 0
            delim,currentline='',''
            _prefstr = prefix+'    '
            ndatawidth = compactwidth-len(_prefstr)
            for c in raw_data_bytes:
                ++n_on_current_line
                currentline += delim + str(c)
                delim = ','
                if len(currentline)>=ndatawidth:
                    out += [_prefstr + currentline+',']
                    delim=''
                    currentline = ''
            eol=',0};'
            if currentline:
                out += [_prefstr + currentline+eol]
            else:
                out[-1] = out[-1][:-1]+eol
            out += ['    const char * textdata = (const char*)(&rawdata[0]);']
        else:
            count_all_entries = [0]
            linepattern = lambda strdata : '    %s'%as_c_str(strdata)
            fix_nentries_iout = len(out)
            out += [ prefix+'  const char * textdata =']
            if not compact:
                for line in lines:
                    out+= [prefix+linepattern(fmtline(line))]
            else:
                alldata=''
                for line in lines:
                    alldata += fmtline(line)
                while alldata:
                    n = max(10,compactwidth-(len(linepattern(''))))
                    if alldata[n-1:n+1]==r'\n':
                        n+=1#dont break up '\n' entries
                    out += [ prefix+linepattern(alldata[0:n])]
                    alldata = alldata[n:]
            out[-1]+=';'
        out+= [prefix+"  ::%s(\"%s\",textdata);"%(regfctname.split('(')[0],fn)]
        out+= [prefix+"}"]
    out+= ['}','']

    if large_files:
        out.insert(1,'#include <cstdint>')
        out.insert(1,'#include <array>')

    out = '\n'.join(out)
    if hasattr(outfile,'write'):
        outfile.write(out)
        if hasattr(outfile,'name'):
            print('Wrote: %s'%outfile.name)
    else:
        with pathlib.Path(outfile).open('wt') as fh:
            fh.write(out)
        print('Wrote: %s'%outfile)

def main():
    args=parseArgs()

    validatefct=None
    if args.validate:
        nc = fixSysPathAndImportNCrystal( allowfail = True )
        if not nc:
            raise SystemExit("ERROR: Could not import the NCrystal Python module (this is required"
                             " when running with --validate). If it is installed, make sure your"
                             " PYTHONPATH is setup correctly.")
        validatefct = lambda filename : nc.createInfo('%s;inelas=sterile'%filename)

    files2cppcode( args.files,
                   outfile = args.outfile,
                   cppfunctionname = args.name,
                   compact = not args.full,
                   compactwidth = args.width,
                   extra_includes = args.include,
                   validatefct = validatefct,
                   regfctname = args.regfctname )

if __name__=='__main__':
    main()
