Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

verzijamodulpythonSupekPadobran
2.4.0scientific/ray/2.4.0-rayproject3.9(tick)(error) 
2.10.0scientific/ray/v2.10.0 3.8(error) 

(tick) 


Note
titleKorištenje aplikacije na Supeku

Python aplikacije i knjižnice na Supeku su dostavljene u obliku kontejnera i zahtijevaju korištenje wrappera kao što je opisano ispod.

Više informacija o python aplikacijama i kontejnerima na Supeku možete dobiti na sljedećim poveznicama:

...

Primjeri pokretanja na Supeku

Ispod se nalaze neki primjeri funckionalnosti koje Ray pruža poput:

...

Code Block
languagepy
titlesklearn-automl.py
linenumberstrue
collapsetrue
# sources:
# 1) data - https://archive.ics.uci.edu/dataset/15/breast+cancer+wisconsin+original

import os
import glob
import numpy as np
import pandas as pd
import itertools

import ray
from ray import air, tune
from ray.air import Checkpoint, session
from ray.train.sklearn import SklearnTrainer

from sklearn.datasets import make_classification
from sklearn.model_selection import cross_validate
from sklearn.metrics import mean_squared_error, mean_absolute_error

from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

def get_estimators(estimators):
    for model, options in estimators.items():
        options_cross = itertools.product(*options.values())
        options_keys = options.keys()
        for options_row in options_cross:
            yield model(**{key: value for key, value in zip(options_keys, options_row)})

def cross_validate_config(config, data):

    # X, y
    df = data.to_pandas()
    X = df.drop(columns=['id-number', 'diagnosis'])
    y = df['diagnosis'].map({'M': 1, 'B': 0})

    # cv
    result = cross_validate(estimator=config['estimator'],
                            X=X,
                            y=y,
                            scoring="f1",
                            n_jobs=1)

    # output
    results = { "f1": result['test_score'].mean() }
    session.report(results)

def main():

    # train, test
    data = ray.data.read_csv('wdbc.data')
    train, test = data.train_test_split(test_size=0.2)

    # estimator space
    estimators = {
        SVC: {
            'kernel': ['linear',
                       'poly',
                       'rbf',
                       'sigmoid'],
            'C': [1,
                  4,
                  16],
        },
        DecisionTreeClassifier: {
            'max_depth': [None,
                          2,
                          8,
                          32],
            'splitter': ['best',
                         'random'],
        },
        KNeighborsClassifier: {
            'algorithm': ['auto',
                          'ball_tree',
                          'kd_tree',
                          'brute'],
            'weights': ['uniform',
                        'distance'],
        },
        MLPClassifier: {
            'hidden_layer_sizes': [10,
                                   40,
                                   160],
            'activation': ['identity',
                           'logistic',
                           'tanh'],
        },
    }
    estimators = list(get_estimators(estimators))

    # grid search
    trainable = tune.with_parameters(cross_validate_config, data=train)

    param_space = {"estimator": tune.grid_search(estimators),
                   "n_splits": 5}

    tune_config = tune.TuneConfig(metric="f1",
                                  mode="max")

    tuner = tune.Tuner(trainable=trainable,
                       param_space=param_space,
                       tune_config=tune_config)

    result_grid = tuner.fit()
    best_result = result_grid.get_best_result()
    print(best_result)
    print(best_result.config)

if __name__ == '__main__':
    import ray
    ray.init(address='auto',
             _node_ip_address=os.environ['NODE_IP_ADDRESS'])
    main()


Primjer pokretanja na Padobranu

Code Block
languagebash
titleskripta_za_pokretanje.pbs
linenumberstrue
collapsetrue
#!/bin/bash

#PBS -q cpu
#PBS -l select=1:ncpus=20:mem=20GB

# environment
module load scientific/ray/v2.10.0


# cd
cd ${PBS_O_WORKDIR:-""}

# run
ray-run python3 ime_vase_py_skripte.py


Napomene za pokretanje na Supeku

Tip
titleIzvođenje aplikacija putem wrappera

Za ispravno korištenje knjižnice Ray, dostavljen je wrapper ray-launcher.sh koji osigurava pravilno zauzimanje dodijeljenih resursa stvaranjem Ray klastera i omogućava pokretanje aplikacija kroz ray.init API

U skripti posla PBS, potrebno je pozvati skriptu Python na sljedeći način:

Code Block
languagebash
# aktiviraj Ray
module load scientific/ray/2.4.0-rayproject

# pokreni aplikaciju
ray-launcher.sh moj_program.py

Dok se u skripti Python sučelje Ray incijalizira na sljedeći način:

Code Block
languagepy
import os
import ray

ray.init(address='auto',
         _node_ip_address=os.environ['NODE_IP_ADDRESS'])

... Python kod ...


...