Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.





Table of Contents
maxLevel2

Opis

Tensorflow je python knjižnica namijenjena razvoju aplikacija temeljenih na dubokom učenju koja se oslanja na ubrzanje grafičkim procesorima. Jedna od glavnih značajki ove knjižnice je postojanje API-a za brži razvoj modela strojnog učenja Keras, koja u sebi sadrži module i funkcije za svaki dio pipelinea u tipičnoj ML aplikaciji (preprocessing podataka, definicija modela, načina optimizacije i validacije)

Verzije

verzijamodulSupekredPadobran
2.10.1scientific/tensorflow/2.10.1-ngcgpu

Dokumentacija

Primjeri

Ispod se nalaze primjeri aplikacija umjetnog benchmarka koji testira performanse na modelu Resnet50.

Primjeri su redom:

  • singlegpu.* - skripte za pokretanje na jednom grafičkom procesoru
  • multigpu-singlenode.* - skripte za pokretanje na više grafičkih procesora na jednom čvoru
  • multigpu-multinode.* - skripte za pokretanje na više grafičkih procesora na jednom čvoru

Jedan grafički procesor

Code Block
languagebash
titlesinglegpu.sh
linenumberstrue
collapsetrue
 #!/bin/bash

#PBS -q gpu
#PBS -l select=1:ncpus=8:ngpus=1:mem=10GB
#PBS -o output/
#PBS -e output/

# pozovi modul
module load scientific/tensorflow/2.10.1-ngc

# pomakni se u direktorij gdje se nalazi skripta
cd ${PBS_O_WORKDIR:-""}

# potjeraj skriptu
run-singlenode.sh singlegpu.py
(tick)
2.12.0scientific/tensorflow/2.12.0
(tick)
2.15.0scientific/tensorflow/2.15.0
(tick)


Note
titleKorištenje aplikacije na Supeku

Python aplikacije i knjižnice na Supeku su dostavljene u obliku kontejnera i zahtijevaju korištenje wrappera kao što je opisano ispod.

Više informacija o python aplikacijama i kontejnerima na Supeku možete dobiti na sljedećim poveznicama:

Dokumentacija

Supek

Ispod se nalaze primjeri aplikacija umjetnog benchmarka koji testira performanse na modelu Resnet50.

Primjeri su redom:

  • singlegpu.* - skripte za pokretanje na jednom grafičkom procesoru
  • multigpu-singlenode.* - skripte za pokretanje na više grafičkih procesora na jednom čvoru
  • multigpu-multinode.* - skripte za pokretanje na više grafičkih procesora na jednom čvoru

Jedan grafički procesor

Code Block
languagebash
titlesinglegpu.sh
Code Block
languagepy
titlesinglegpu.py
linenumberstrue
collapsetrue
 #!/usr/bin/env python3bash

# source:
##PBS -q gpu
#PBS -l select=1:ncpus=8:ngpus=1:mem=10GB

# pozovi modul
module load scientific/tensorflow/2.10.1-ngc

# pomakni se u direktorij gdje se nalazi skripta
cd ${PBS_O_WORKDIR:-""}

# potjeraj skriptu
run-singlenode.sh singlegpu.py
Code Block
languagepy
titlesinglegpu.py
linenumberstrue
collapsetrue
 #!/usr/bin/env python3

# source:
# - https://github.com/leondgarse/Keras_insightface/discussions/17

import sys
import time
import argparse
import numpy as np
import tensorflow as tf

def main():

    # vars
    batch_size = 256
    samples = 256 * 20
    epochs = 10

    # do not allocate all GPU memory https://github.com/leondgarse/Keras_insightface/discussions/17

import sys
import time
import argparse
import numpy as np
import tensorflow as tf

def main():

    # vars
    batch_size = 256
    samples = 256 * 20
    epochs = 10

    # do not allocate all GPU memory
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

    # use fp16 for faster inference
    tf.keras.mixed_precision.set_global_policy('mixed_float16')

    # strategy
    gpus = tf.config.experimental.list_physical_devices('GPU')
    devices = [ gpu.name[-5:] for gpu in gpus:
  ]
    strategy = tf.config.distribute.OneDeviceStrategy(device=devices[0]experimental.set_memory_growth(gpu, True)

    # dataset
  use fp16 for faster inference
  data = np.random.uniform(size=[samples, 224, 224, 3])tf.keras.mixed_precision.set_global_policy('mixed_float16')

    # strategy
    targetgpus = nptf.random.uniform(size=[samples, 1], low=0, high=999).astype("int64"config.experimental.list_physical_devices('GPU')
    datasetdevices = tf.data.Dataset.from_tensor_slices((data, target))[ gpu.name[-5:] for gpu in gpus ]
    datasetstrategy = datasettf.distribute.batch(batch_size*strategy.num_replicas_in_syncOneDeviceStrategy(device=devices[0])

    # define modeldataset
    with strategy.scope():
   data = np.random.uniform(size=[samples, 224, 224, 3])
    target model = tfnp.kerasrandom.applications.ResNet50uniform(weights=None)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(size=[samples, 1], low=0, high=999).astype("int64")
    dataset    optimizer = tf.optimizers.SGD(0.01data.Dataset.from_tensor_slices((data, target))
    dataset =   modeldataset.compile(optimizer=optimizer, loss=lossbatch(batch_size*strategy.num_replicas_in_sync)

    # define fitmodel
    callbacks = []
    model.fit(dataset,with strategy.scope():
        model      callbacks=callbacks,= tf.keras.applications.ResNet50(weights=None)
        loss      epochs=epochs,
= tf.keras.losses.SparseCategoricalCrossentropy()
        optimizer = tf.optimizers.SGD(0.01)
         verbose=2model.compile(optimizer=optimizer, loss=loss)

if __name__ == "__main__":
    main()

Više grafičkih procesora na jednom čvoru

Code Block
languagebash
titlemultigpu-singlenode.sh
linenumberstrue
collapsetrue
 #!/bin/bash

#PBS -q gpu
#PBS -l select=1:ncpus=16:ngpus=2:mem=10GB
#PBS -o output/
#PBS -e output/

# pozovi modul
module load scientific/tensorflow/2.10.1-ngc

# pomakni se u direktorij gdje se nalazi skripta
cd ${PBS_O_WORKDIR:-""}

# potjeraj skriptu
run-singlenode.sh multigpu-singlenode.py
 # fit
    callbacks = []
    model.fit(dataset,
              callbacks=callbacks,
              epochs=epochs,
              verbose=2)

if __name__ == "__main__":
    main()

Više grafičkih procesora na jednom čvoru

Code Block
languagebash
Code Block
languagepy
titlemultigpu-singlenode.pysh
linenumberstrue
collapsetrue
 #!/usr/bin/env python3bash

# source:
##PBS -q gpu
#PBS -l select=1:ncpus=16:ngpus=2:mem=10GB

# pozovi modul
module load scientific/tensorflow/2.10.1-ngc

# pomakni se u direktorij gdje se nalazi skripta
cd ${PBS_O_WORKDIR:-""}

# potjeraj skriptu
run-singlenode.sh multigpu-singlenode.py
Code Block
languagepy
titlemultigpu-singlenode.py
linenumberstrue
collapsetrue
 #!/usr/bin/env python3

# source:
# - https://github.com/leondgarse/Keras_insightface/discussions/17

import sys
import time
import argparse
import numpy as np
import tensorflow as tf

def main(): https://github.com/leondgarse/Keras_insightface/discussions/17

import sys
import time
import argparse
import numpy as np
import tensorflow as tf

def main():

    # vars
    batch_size = 256
    samples = 256 * 20
    epochs = 10

    # dovars
 not allocate all GPU  batch_size = 256
    samples = 256 * 20
    epochs = 10

    # do not allocate all GPU memory
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

    # use fp16 for faster inference
    tf.keras.mixed_precision.set_global_policy('mixed_float16')

    # strategy
    gpus = tf.config.experimental.list_physical_devices('GPU')
    devices = [ gpu.name[-5:] for gpu in gpus ]
    strategy = tf.distribute.MirroredStrategy(devices=devices)

    # dataset
    data = np.random.uniform(size=[samples, 224, 224, 3])
    target = np.random.uniform(size=[samples, 1], low=0, high=999).astype("int64")
    dataset = tf.data.Dataset.from_tensor_slices((data, target))
    dataset = dataset.batch(batch_size*strategy.num_replicas_in_sync)

    # define model
    with strategy.scope():
        model = tf.keras.applications.ResNet50(weights=None)
        loss = tf.keras.losses.SparseCategoricalCrossentropy()
        optimizer = tf.optimizers.SGD(0.01)
        model.compile(optimizer=optimizer, loss=loss)

    # fit
    callbacks = []
    model.fit(dataset,
              callbacks=callbacks,
              epochs=epochs,
              verbose=2)

if __name__ == "__main__":
    main()
	

...

Code Block
languagebash
titlemultigpu-multinode.sh
linenumberstrue
collapsetrue
#!/bin/bash

#PBS -q gpu
#PBS -l select=2:ncpus=8:ngpus=2:mem=10GB
#PBS -l place=freescatter

#PBS# -opozovi output/
#PBS -e output/

# pozovi modul
modul
module load scientific/tensorflow/2.10.1-ngc

# pomakni se u direktorij gdje se nalazi skripta
cd ${PBS_O_WORKDIR:-""}

# potjeraj skriptu
run-multinode.sh multigpu-multinode.py
 
Code Block
languagepy
titlemultigpu-multinode.py
linenumberstrue
collapsetrue
 #!/usr/bin/env python3

# source:
# - https://github.com/leondgarse/Keras_insightface/discussions/17

import os
import sys
import time
import socket
import argparse
import numpy as np
import tensorflow as tf

def main():

    # vars
    batch_size = 256
    samples = 256*20
    epochs = 10

    # do not allocate all GPU memory
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

    # use fp16 for faster inference
    tf.keras.mixed_precision.set_global_policy('mixed_float16')

    # strategy
    communication_options = tf.distribute.experimental.CommunicationOptions(
        implementation=tf.distribute.experimental.CommunicationImplementation.NCCL)
    strategy = tf.distribute.MultiWorkerMirroredStrategy(
        communication_options=communication_options)

    # dataset
    data = np.random.uniform(size=[samples, 224, 224, 3])
    target = np.random.uniform(size=[samples, 1], low=0, high=999).astype("int64")
    dataset = tf.data.Dataset.from_tensor_slices((data, target))
    dataset = dataset.batch(batch_size*strategy.num_replicas_in_sync)

    # define model
    with strategy.scope():
        model = tf.keras.applications.ResNet50(weights=None)
        loss = tf.keras.losses.SparseCategoricalCrossentropy()
        optimizer = tf.optimizers.SGD(0.01)
        model.compile(optimizer=optimizer, loss=loss)

    # fit
    callbacks = []
    verbose = 2 if os.environ['PMI_RANK'] == '0' else 0
    model.fit(dataset,
              callbacks=callbacks,
              epochs=epochs,
              verbose=verbose)

if __name__ == "__main__":
    main()

Padobran

Ispod se nalaze primjeri aplikacija umjetnog benchmarka koji testira performanse na modelu Resnet50.

Primjeri su redom:

  • singlenode.* - skripte za pokretanje na jednom čvoru

Jedan čvor

Code Block
languagebash
titlesinglenode.sh
linenumberstrue
collapsetrue
#PBS -q cpu
#PBS -l ncpus=32
#PBS -l mem=50GB

# dopremi modul
module load scientific/tensorflow/2.12.0

# postavi broj cpu jezgri
export OMP_NUM_THREADS=${NCPUS}
export TF_NUM_INTEROP_THREADS=${NCPUS}
export TF_NUM_INTRAOP_THREADS=${NCPUS}

# pomakni se u direktorij i pokreni
cd ${PBS_O_WORKDIR}
python singlenode.py
Code Block
languagepy
titlesinglenode.py
linenumberstrue
collapsetrue
import sys
import time
import argparse
import numpy as np
import tensorflow as tf

def main():

    # vars
    batch_size = 16
    samples = 16*10
    epochs = 3for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

    # use fp16 for faster inference
    tf.keras.mixed_precision.set_global_policy('mixed_float16')

    # strategy
    communication_options = tf.distribute.experimental.CommunicationOptions(
        implementation=tf.distribute.experimental.CommunicationImplementation.NCCL)
    strategy = tf.distribute.MultiWorkerMirroredStrategy(
        communication_options=communication_options)

    # dataset
    data = np.random.uniform(size=[samples, 224, 224, 3])
    target = np.random.uniform(size=[samples, 1], low=0, high=999).astype("int64")
    dataset = tf.data.Dataset.from_tensor_slices((data, target))
    dataset = dataset.batch(batch_size*strategy.num_replicas_in_sync)

    # define model
    with strategy.scope():_size)

    # define model
    model = tf.keras.applications.ResNet50(weights=None)
        loss = tf.keras.losses.SparseCategoricalCrossentropy()
        optimizer = tf.optimizers.SGD(0.01)
        model.compile(optimizer=optimizer, loss=loss)

    # fit
    callbacks = []
    verbose = 2 if os.environ['PMI_RANK'] == '0' else 0
    model.fit(dataset,
              callbacks=callbacks,
              epochs=epochs,
              verbose=verbose1)

if __name__ == "__main__":
    main()

...