User:Chorus/genprotosets.py: Difference between revisions

From the Dyson Sphere Program Wiki
(Add script for scraping select Dyson Sphere Program data.)
Tag: 2017 source edit
 
(Include version number, round floats to avoid precision nonsense)
 
(One intermediate revision by the same user not shown)
Line 6: Line 6:
     $DSP \
     $DSP \
     --locale $DSP/Locale/1033 \
     --locale $DSP/Locale/1033 \
    -e Achievement \
     -e Item \
     -e Item \
     -e Recipe \
     -e Recipe \
     -e Tech \
     -e Tech \
     -e Theme \
     -e Theme \
    -e Vein \
     >protosets.json
     >protosets.json
'''
'''
Line 40: Line 42:
     sys.path.append(dlldir)
     sys.path.append(dlldir)
     assembly = clr.AddReference('Assembly-CSharp')
     assembly = clr.AddReference('Assembly-CSharp')
     result = dict()
     result = dict(version=getVersion(gamedir))
     env = UnityPy.load(os.path.join(gamedir, 'DSPGAME_Data'))
     env = UnityPy.load(os.path.join(gamedir, 'DSPGAME_Data'))
     for obj in env.objects:
     for obj in env.objects:
Line 70: Line 72:
                 translate[orig] = match
                 translate[orig] = match
     return translate
     return translate
def getVersion(gamedir):
    '''
    extract latest version from $gamedir/Configs/versions
    '''
    with open(os.path.join(gamedir, 'Configs', 'versions')) as vin:
        lines = (l.strip() for l in vin if l)
        return sorted(lines, key=Version)[-1]
def Version(v):
    return tuple(map(int, v.split('.')))


def allowed(name, enabled, disabled):
def allowed(name, enabled, disabled):
Line 101: Line 114:
         fields = [(f.Name, parserForType(f.FieldType)) for f in fields]
         fields = [(f.Name, parserForType(f.FieldType)) for f in fields]
         if not fields:
         if not fields:
             raise Exception(f'Could not find fields for {ctype.Name}')
             print(f'Could not find fields for {ctype.Name}', file=sys.stderr)
         ret = ObjectParser(fields)
         ret = ObjectParser(fields)
     _cache[ctype.Name] = ret
     _cache[ctype.Name] = ret
Line 114: Line 127:


     def __call__(self, *args, **kwargs):
     def __call__(self, *args, **kwargs):
         res = {}
         return {key: subparse(*args, **kwargs) for key, subparse in self.parsers}
        for key, subparse in self.parsers:
            res[key] = subparse(*args, **kwargs)
        return res


class EnumParser(object):
class EnumParser(object):
Line 191: Line 201:
     parse a 32-bit float from the datastream
     parse a 32-bit float from the datastream
     '''
     '''
     return struct.unpack('<f', reader.read(4))[0]
     raw = struct.unpack('<f', reader.read(4))[0]
    return float(f'{raw:0.7f}')


def doubleParser(reader, translate):
def doubleParser(reader, translate):
Line 198: Line 209:
     '''
     '''
     return struct.unpack('<d', reader.read(8))[0]
     return struct.unpack('<d', reader.read(8))[0]
def byteParser(reader, translate):
    '''
    parse a byte from the datastream aligned to 4 bytes
    '''
    return struct.unpack('<bxxx', reader.read(4))[0]
def nullParser(reader, translate):
    pass


predefined = {
predefined = {
Line 206: Line 226:
     'Single': floatParser,
     'Single': floatParser,
     'Double': doubleParser,
     'Double': doubleParser,
    'Byte': byteParser,
    'PrefabDesc': nullParser,
}
}


Line 224: Line 246:
         '-d', '--disable',
         '-d', '--disable',
         action='append',
         action='append',
        default=['Abnormality', 'Vege', 'Vein'],
         help='Disable certain ProtoSet tables')
         help='Disable certain ProtoSet tables')
     parser.add_argument(
     parser.add_argument(

Latest revision as of 18:29, 26 May 2024

The following code requires Python 3, as well as pythonnet and UnityPy.

#!/usr/bin/env python3
'''
Recommend: genprotosets.py \
    $DSP \
    --locale $DSP/Locale/1033 \
    -e Achievement \
    -e Item \
    -e Recipe \
    -e Tech \
    -e Theme \
    -e Vein \
    >protosets.json
'''

import io
import itertools as it
import json
import operator as op
import os
import struct
import sys

def main(gamedir, locale, indent, enable, disable):
    '''
    Load Dyson Sphere Program assembly, parse assets for MonoBehaviours,
    roll my own parsers from the .NET type definition, then export
    specified ProtoSets as JSON
    '''
    try:
        import clr
    except ImportError:
        sys.exit('please install pythonnet using \"pip install pythonnet [--user]\"')
    try:
        import UnityPy
    except ImportError:
        sys.exit('please install UnityPy using \"pip install UnityPy [--user]\"')
    if not os.path.exists(os.path.join(gamedir, 'DSPGAME_Data')):
        sys.exit('$DSP/DSPGAME_Data directory not found')
    dlldir = os.path.join(gamedir, 'DSPGAME_Data', 'Managed')
    sys.path.append(dlldir)
    assembly = clr.AddReference('Assembly-CSharp')
    result = dict(version=getVersion(gamedir))
    env = UnityPy.load(os.path.join(gamedir, 'DSPGAME_Data'))
    for obj in env.objects:
        if obj.type.name == 'MonoBehaviour':
            mb = obj.read()
            if mb.name.endswith('ProtoSet'):
                print(f'found MonoBehaviour "{mb.name}" in {mb.assets_file.name}', file=sys.stderr)
                if allowed(mb.name, enable, disable):
                    mbtype = assembly.GetType(mb.name)
                    parser = parserForType(mbtype)
                    reader = io.BytesIO(mb.raw_data)
                    result[mb.name] = parser(reader, locale)
                else:
                    print(f'skipping "{mb.name}"', file=sys.stderr)
    print(json.dumps(result, indent=indent))

def loadlocale(localedir):
    '''
    load translations from locale directory
    '''
    translate = dict()
    for mapfile in os.listdir(localedir):
        mappath = os.path.join(localedir, mapfile)
        with open(mappath, 'r', encoding='utf-16') as mapf:
            for line in mapf:
                orig, empty, number, match = line.strip('\n').split('\t')
                if orig in translate:
                    print(f'Duplicate locale key "{orig}" = "{match}" (keeping "{translate[orig]}") (from {mappath})', file=sys.stderr)
                translate[orig] = match
    return translate

def getVersion(gamedir):
    '''
    extract latest version from $gamedir/Configs/versions
    '''
    with open(os.path.join(gamedir, 'Configs', 'versions')) as vin:
        lines = (l.strip() for l in vin if l)
        return sorted(lines, key=Version)[-1]

def Version(v):
    return tuple(map(int, v.split('.')))

def allowed(name, enabled, disabled):
    '''
    check if parsing a table is allowed according to enabled/disabled prefixes
    '''
    if enabled:
        return any(name.startswith(prefix) for prefix in enabled)
    else:
        return not any(name.startswith(prefix) for prefix in disabled)

def parserForType(ctype, _cache=dict()):
    '''
    recursively construct a parser for a given type
    '''
    from System.Reflection import BindingFlags
    if ctype.Name in predefined:
        return predefined[ctype.Name]
    elif ctype.Name in _cache:
        return _cache[ctype.Name]
    print(f'parser {ctype.Name}', file=sys.stderr)
    if ctype.IsArray:
        subparser = parserForType(ctype.GetElementType())
        ret = ArrayParser(subparser)
    elif ctype.IsEnum:
        ret = EnumParser(ctype)
    else:
        fields = ctype.GetFields(BindingFlags.Public | BindingFlags.Instance)
        fields = it.filterfalse(op.attrgetter('IsNotSerialized'), fields)
        fields = sorted(fields, key=op.attrgetter('MetadataToken'))
        fields = [(f.Name, parserForType(f.FieldType)) for f in fields]
        if not fields:
            print(f'Could not find fields for {ctype.Name}', file=sys.stderr)
        ret = ObjectParser(fields)
    _cache[ctype.Name] = ret
    return ret

class ObjectParser(object):
    '''
    parse the fields of custom object
    '''
    def __init__(self, parsers):
        self.parsers = parsers

    def __call__(self, *args, **kwargs):
        return {key: subparse(*args, **kwargs) for key, subparse in self.parsers}

class EnumParser(object):
    '''
    parse an Enum value into either a collection of values or a single
    value
    '''
    def __init__(self, enumtype):
        from System import Attribute, FlagsAttribute
        self.enumtype = enumtype
        self.isflag = Attribute.GetCustomAttribute(enumtype, FlagsAttribute) is not None

    def __call__(self, reader, translate):
        from System import Enum
        obj = intParser(reader, translate)
        eobj = Enum.ToObject(self.enumtype, obj)
        if self.isflag:
            return [
                str(v)
                for v in self.enumtype.GetEnumValues()
                if eobj.HasFlag(v)]
        elif Enum.IsDefined(self.enumtype, eobj):
            return str(eobj)
        else:
            return obj

class ArrayParser(object):
    '''
    parse an array of objects, starting with the length of the array
    '''
    def __init__(self, memberparser):
        self.memberparser = memberparser

    def __call__(self, reader, translate):
        quantity = struct.unpack('<i', reader.read(4))[0]
        return [self.memberparser(reader, translate) for i in range(quantity)]

def stringParser(reader, translate):
    '''
    parse a string from the datastream

    make sure it's aligned to 4 bytes when we're done
    '''
    length = struct.unpack('<i', reader.read(4))[0]
    fmt = f'<{length}s'
    if length % 4 != 0:
        pad = 4 - length % 4
        fmt += f'{pad}x'
        length += pad
    val = struct.unpack(fmt, reader.read(length))[0].decode()
    return translate.get(val, val)

def intParser(reader, translate):
    '''
    parse a 32-bit integer from the datastream
    '''
    return struct.unpack('<i', reader.read(4))[0]

def longParser(reader, translate):
    '''
    parse a 64-bit integer from the datastream
    '''
    return struct.unpack('<q', reader.read(8))[0]

def boolParser(reader, translate):
    '''
    parse a 4-byte aligned boolean from the datastream
    '''
    return struct.unpack('<?xxx', reader.read(4))[0]

def floatParser(reader, translate):
    '''
    parse a 32-bit float from the datastream
    '''
    raw = struct.unpack('<f', reader.read(4))[0]
    return float(f'{raw:0.7f}')

def doubleParser(reader, translate):
    '''
    parse a 64-bit double from the datastream
    '''
    return struct.unpack('<d', reader.read(8))[0]

def byteParser(reader, translate):
    '''
    parse a byte from the datastream aligned to 4 bytes
    '''
    return struct.unpack('<bxxx', reader.read(4))[0]

def nullParser(reader, translate):
    pass

predefined = {
    'String': stringParser,
    'Int32': intParser,
    'Int64': longParser,
    'Boolean': boolParser,
    'Single': floatParser,
    'Double': doubleParser,
    'Byte': byteParser,
    'PrefabDesc': nullParser,
}

def parse_args(args=None):
    import argparse as ap
    parser = ap.ArgumentParser(description=__doc__)
    parser.add_argument('gamedir', help='Dyson Sphere Program directory ($DSP)')
    parser.add_argument(
        '-l', '--locale',
        type=loadlocale,
        help='Locale to translate strings (probably $DSP/Locale/1033)')
    parser.add_argument(
        '-i', '--indent',
        type=int,
        default=None,
        help='JSON indentation level (default=%(default)s)')
    parser.add_argument(
        '-d', '--disable',
        action='append',
        help='Disable certain ProtoSet tables')
    parser.add_argument(
        '-e', '--enable',
        action='append',
        help='Only enable specific ProtoSet tables')
    return vars(parser.parse_args(args))

if __name__ == '__main__':
    main(**parse_args())
🍪 We use cookies to keep session information to provide you a better experience.