flowchart TD A[start] --> B(hello) B --> C[end]
2025-06-18
Press Space or click the green arrow icons to navigate the slides ->
This was awesome, thanks for sharing.
Source: sparecores.com
>>> from rich import print as pp
>>> from sc_crawler.tables import Server
>>> from sqlmodel import create_engine, Session, select
>>> engine = create_engine("sqlite:///sc-data-all.db")
>>> session = Session(engine)
>>> server = session.exec(select(Server).where(Server.server_id == 'g4dn.xlarge')).one()
>>> pp(server)
Server(
server_id='g4dn.xlarge',
vendor_id='aws',
display_name='g4dn.xlarge',
api_reference='g4dn.xlarge',
name='g4dn.xlarge',
family='g4dn',
description='Graphics intensive [Instance store volumes] [Network and EBS optimized] Gen4 xlarge',
status=<Status.ACTIVE: 'active'>,
observed_at=datetime.datetime(2024, 6, 6, 10, 18, 4, 127254),
hypervisor='nitro',
vcpus=4,
cpu_cores=2,
cpu_allocation=<CpuAllocation.DEDICATED: 'Dedicated'>,
cpu_manufacturer='Intel',
cpu_family='Xeon',
cpu_model='8259CL',
cpu_architecture=<CpuArchitecture.X86_64: 'x86_64'>,
cpu_speed=3.5,
cpu_l1_cache=None,
cpu_l2_cache=None,
cpu_l3_cache=None,
cpu_flags=[],
memory_amount=16384,
memory_generation=<DdrGeneration.DDR4: 'DDR4'>,
memory_speed=3200,
memory_ecc=None,
gpu_count=1,
gpu_memory_min=16384,
gpu_memory_total=16384,
gpu_manufacturer='Nvidia',
gpu_family='Turing',
gpu_model='Tesla T4',
gpus=[
{
'manufacturer': 'Nvidia',
'family': 'Turing',
'model': 'Tesla T4',
'memory': 15360,
'firmware_version': '535.171.04',
'bios_version': '90.04.96.00.A0',
'graphics_clock': 1590,
'sm_clock': 1590,
'mem_clock': 5001,
'video_clock': 1470
}
],
storage_size=125,
storage_type=<StorageType.NVME_SSD: 'nvme ssd'>,
storages=[{'size': 125, 'storage_type': 'nvme ssd'}],
network_speed=5.0,
inbound_traffic=0.0,
outbound_traffic=0.0,
ipv4=0,
)
Workflow orchestration for real-life ML, AI, and DS projects.
from metaflow import FlowSpec, step
class HelloFlow(FlowSpec):
@step
def start(self):
print("HelloFlow is starting.")
self.next(self.hello)
@step
def hello(self):
print("Metaflow says: Hi!")
self.next(self.end)
@step
def end(self):
print("HelloFlow is all done.")
if __name__ == "__main__":
HelloFlow()
flowchart TD A[start] --> B(hello) B --> C[end]
from metaflow import FlowSpec, step
class HelloFlow(FlowSpec):
@step
def start(self):
print("HelloFlow is starting.")
self.next(self.hello)
@step
def hello(self):
print("Metaflow says: Hi!")
self.next(self.end)
@step
def end(self):
print("HelloFlow is all done.")
if __name__ == "__main__":
HelloFlow()
flowchart TD A[start] -. dice <br> rolls .-> B(roll) B -. dice <br> rolls <br> dices ..-> C[end]
from random import sample, randint
from metaflow import FlowSpec, step
class DiceRollFlow(FlowSpec):
# D3, D4, D6, D8, D10, D12, ...
DICES = [3, 4, 6, 8, 10, 12, 100]
@step
def start(self):
self.dice = sample(self.DICES, 1)[0]
self.rolls = sample(range(3), 1)[0] + 1
self.next(self.roll)
@step
def roll(self):
self.dices = [randint(1, self.dice) for _ in range(self.rolls)]
self.next(self.end)
@step
def end(self):
print(f"Rolled {self.rolls} times with a {self.dice} sided dice: {self.dices}")
if __name__ == "__main__":
DiceRollFlow()
flowchart TD A[start] -. dice <br> rolls .-> B(roll) B -. dice <br> rolls <br> dices ..-> C[end]
from random import sample, randint
from metaflow import FlowSpec, step
class DiceRollFlow(FlowSpec):
# D3, D4, D6, D8, D10, D12, ...
DICES = [3, 4, 6, 8, 10, 12, 100]
@step
def start(self):
self.dice = sample(self.DICES, 1)[0]
self.rolls = sample(range(3), 1)[0] + 1
self.next(self.roll)
@step
def roll(self):
self.dices = [randint(1, self.dice) for _ in range(self.rolls)]
self.next(self.end)
@step
def end(self):
print(f"Rolled {self.rolls} times with a {self.dice} sided dice: {self.dices}")
if __name__ == "__main__":
DiceRollFlow()
>>> from metaflow import Flow
>>> print(run := Flow("DiceRollFlow").latest_successful_run)
Run('DiceRollFlow/1749762602890590')
>>> run.created_at
datetime.datetime(2025, 6, 12, 23, 10, 2, 891000)
>>> run.data
<MetaflowData: dices, rolls, name, dice>
>>> run.data.dices
[5, 2, 2]
>>> list(run.steps())
[Step('DiceRollFlow/1749762602890590/end'), Step('DiceRollFlow/1749762602890590/roll'), Step('DiceRollFlow/1749762602890590/start')]
>>> step = next(run.steps())
>>> step.task.metadata
[Metadata(name='user', value='daroczig', created_at=1749762604068, type='user', task=Task('DiceRollFlow/1749762602890590/end/3')), ...
>>> step.task.stdout
'Rolled 3 time(s) with a 10 sided dice: [5, 2, 2]\n'
>>> step.task.artifacts.dices.data
[5, 2, 2]
flowchart TD A[start] --> B1(fitA) A[start] --> B2(fitB) B1 --> C[eval] B2 --> C[eval] C --> D[end]
class ModelingFlow(FlowSpec):
@step
def start(self):
self.data = load_data()
self.next(self.fitA, self.fitB)
@step
def fitA(self):
self.model = fit(self.data, model='A')
self.next(self.eval)
@step
def fitB(self):
self.model = fit(self.data, model='B')
self.next(self.eval)
@step
def eval(self, inputs):
self.best = max((i.model.score, i.model)
for i in inputs)[1]
self.next(self.end)
@step
def end(self):
print('done!')
flowchart TD A[start] -- data --> B1(fitA) A[start] -- data --> B2(fitB) B1 -. data<br>model .-> C[eval] B2 -. data<br>model .-> C[eval] C -- data<br>best --> D[end]
class ModelingFlow(FlowSpec):
@step
def start(self):
self.data = load_data()
self.next(self.fitA, self.fitB)
@step
def fitA(self):
self.model = fit(self.data, model='A')
self.next(self.eval)
@step
def fitB(self):
self.model = fit(self.data, model='B')
self.next(self.eval)
@step
def eval(self, inputs):
self.best = max((i.model.score, i.model)
for i in inputs)[1]
self.next(self.end)
@step
def end(self):
print('done!')
flowchart TD style B2 fill:#eab308 A[start] --> B1(fitA) A[start] --> B2(fitB) B1 --> C[eval] B2 --> C[eval] C --> D[end]
class ModelingFlow(FlowSpec):
@step
def start(self):
self.data = load_data()
self.next(self.fitA, self.fitB)
@step
def fitA(self):
self.model = fit(self.data, model='A')
self.next(self.eval)
@pypi(python='3.10.11', packages={'scikit-learn': '1.7'})
@step
def fitB(self):
self.model = fit(self.data, model='B')
self.next(self.eval)
@step
def eval(self, inputs):
self.best = max((i.model.score, i.model)
for i in inputs)[1]
self.next(self.end)
@step
def end(self):
print('done!')
flowchart TD subgraph AWS Batch B1(fitA) B2(fitB) end A[start] .-> B1 A[start] .-> B2 B1 .-> C[eval] B2 .-> C[eval] C --> D[end]
class ModelingFlow(FlowSpec):
@step
def start(self):
self.data = load_data()
self.next(self.fitA, self.fitB)
@batch(cpu=32, memory=64)
@step
def fitA(self):
self.model = fit(self.data, model='A')
self.next(self.eval)
@batch(cpu=4, memory=16, gpu=1)
@step
def fitB(self):
self.model = fit(self.data, model='B')
self.next(self.eval)
@step
def eval(self, inputs):
self.best = max((i.model.score, i.model)
for i in inputs)[1]
self.next(self.end)
@step
def end(self):
print('done!')
psutil
on other OS.
Install from PyPI:
>>> from time import sleep
>>> from resource_tracker import ResourceTracker
>>> tracker = ResourceTracker()
>>> tracker.pid_tracker
TinyDataFrame with 0 rows and 0 columns. First row as a dict: {}
>>> sleep(1)
>>> tracker.pid_tracker
TinyDataFrame with 1 rows and 12 columns. First row as a dict: {'timestamp': 1750021603.0219927, 'pid': 265803.0, 'children': 3.0, 'utime': 0.05999999999999997, 'stime': 0.1, 'cpu_usage': 0.16, 'memory': 62343.0, 'read_bytes': 0.0, 'write_bytes': 0.0, 'gpu_usage': 0.0, 'gpu_vram': 0.0, 'gpu_utilized': 0.0}
>>> big_array = bytearray(500 * 1024 * 1024) # 500 MiB
>>> total = 0
>>> for i in range(int(1e7)):
... total += i**3
>>> tracker.pid_tracker.tail(1)
{'timestamp': 1750022019.3893294, 'pid': 265803.0, 'children': 3.0, 'utime': 1.0299999999999994, 'stime': 0.06999999999999984, 'cpu_usage': 1.0999, 'memory': 573832.0, 'read_bytes': 0.0, 'write_bytes': 0.0, 'gpu_usage': 0.0, 'gpu_vram': 0.0, 'gpu_utilized': 0.0}
>>> tracker.system_tracker.tail(1)
{'timestamp': 1750022019.3464136, 'processes': 778.0, 'utime': 2.139999999999418, 'stime': 2.60999999998603, 'cpu_usage': 4.7495, 'memory_free': 18372680.0, 'memory_used': 37015416.0, 'memory_buffers': 3612.0, 'memory_cached': 10155400.0, 'memory_active': 29048656.0, 'memory_inactive': 130048.0, 'disk_read_bytes': 95047680.0, 'disk_write_bytes': 7380992.0, 'disk_space_total_gb': 3699.99, 'disk_space_used_gb': 2373.84, 'disk_space_free_gb': 1326.15, 'net_recv_bytes': 8960.0, 'net_sent_bytes': 4732.0, 'gpu_usage': 0.3, 'gpu_vram': 791.0, 'gpu_utilized': 1.0}
>>> tracker.system_tracker
TinyDataFrame with 4 rows and 21 columns. First row as a dict: {'timestamp': 1750022016.3461385, 'processes': 778.0, 'utime': 0.6399999999994179, 'stime': 1.6000000000058208, 'cpu_usage': 2.2398, 'memory_free': 18415448.0, 'memory_used': 36966424.0, 'memory_buffers': 3612.0, 'memory_cached': 10161624.0, 'memory_active': 28951856.0, 'memory_inactive': 130048.0, 'disk_read_bytes': 126976.0, 'disk_write_bytes': 827392.0, 'disk_space_total_gb': 3699.99, 'disk_space_used_gb': 2373.84, 'disk_space_free_gb': 1326.15, 'net_recv_bytes': 9896.0, 'net_sent_bytes': 9101.0, 'gpu_usage': 0.31, 'gpu_vram': 799.0, 'gpu_utilized': 1.0}
>>> del big_array
>>> tracker.stop()
>>> print(tracker.pid_tracker)
TinyDataFrame with 7 rows and 12 columns:
timestamp | pid | children | utime | stime | cpu_usage | memory | read_bytes | write_bytes | gpu_usage | gpu_vram | gpu_utilized
-------------------+----------+----------+-------+-------+-----------+----------+------------+-------------+-----------+----------+-------------
1750022710.052515 | 326380.0 | 3.0 | 0.040 | 0.069 | 0.11 | 62017.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0
1750022711.0526254 | 326380.0 | 3.0 | 0.139 | 0.099 | 0.24 | 573959.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0
1750022712.05273 | 326380.0 | 3.0 | 1.020 | 0.072 | 1.0899 | 573913.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0
1750022713.0528054 | 326380.0 | 3.0 | 1.029 | 0.090 | 1.1199 | 573968.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0
1750022714.052872 | 326380.0 | 3.0 | 1.010 | 0.099 | 1.1099 | 573923.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0
1750022715.0529463 | 326380.0 | 3.0 | 0.049 | 0.079 | 0.13 | 71775.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0
1750022716.0530148 | 326380.0 | 3.0 | 0.060 | 0.099 | 0.16 | 61954.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0
PidTracker
and SystemTracker
directly (blocking)Learn more by reading the documentation at
📖 sparecores.github.io/resource-tracker
from time import sleep
from metaflow import FlowSpec, step, track_resources
class MinimalFlow(FlowSpec):
@step
def start(self):
self.next(self.do_heavy_computation)
@track_resources
@step
def do_heavy_computation(self):
big_array = bytearray(500 * 1024 * 1024) # 500 MiB
total = 0
for i in range(int(1e7)):
total += i**3 # heavy calcs
del big_array
sleep(1) # do nothing for bit after releasing memory
self.next(self.end)
@step
def end(self):
pass
if __name__ == "__main__":
MinimalFlow()
Source: Outerbounds sandbox
@resources
tuning
@bra-fsn
@palabola
@daroczig
@bra-fsn
Infrastructure and Python veteran.
@palabola
Guardian of the front-end and Node.js tools.
@daroczig
Hack of all trades, master of NaN
.
Slides: sparecores.com/talks