Bitorch Engine
Contents:
Installation
Build options
Full Documentation
Bitorch Engine
Index
Index
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
K
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
U
|
V
|
W
|
X
|
Y
|
Z
_
__getattr__() (bitorch_engine.utils.safe_import.ExtensionModulePlaceholder method)
__init__() (bitorch_engine.layers.qconv.binary.cpp.layer.BinaryConv2dCPP method)
(bitorch_engine.layers.qconv.binary.cutlass.layer.BinaryConv2dCutlass method)
(bitorch_engine.layers.qconv.binary.layer.BinaryConv2dBase method)
(bitorch_engine.layers.qconv.nbit.cutlass.layer.Q4Conv2dCutlass method)
(bitorch_engine.layers.qconv.nbit.layer.nBitConv2dBase method)
(bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingBag method)
(bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingCuda method)
(bitorch_engine.layers.qlinear.binary.cpp.layer.BinaryLinearCPP method)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearCuda method)
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryLinearCutlass method)
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryMatMul method)
(bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase method)
(bitorch_engine.layers.qlinear.layer.QLinearInf method)
(bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda method)
(bitorch_engine.layers.qlinear.nbit.cuda.mpq_layer.MPQLinearCuda method)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4LinearCutlass method)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4MatMul method)
(bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer.Q8LinearCutlass method)
(bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase method)
(bitorch_engine.layers.qlinear.nbit.layer.MPQWeightParameter method)
(bitorch_engine.layers.qlinear.nbit.layer.nBitLinearBase method)
(bitorch_engine.layers.qlinear.nbit.mps.mpq_layer.MPQLinearMlx method)
(bitorch_engine.layers.qmha.binary.layer.BMHA method)
(bitorch_engine.layers.qmha.binary.layer.LearnableBias method)
(bitorch_engine.optim.diode_beta.DiodeMix method)
,
[1]
(bitorch_engine.optim.galore_projector.GaLoreProjector method)
(bitorch_engine.utils.safe_import.ExtensionModulePlaceholder method)
,
[1]
__setattr__() (bitorch_engine.utils.safe_import.ExtensionModulePlaceholder method)
_check_forward() (bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase method)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4LinearCutlass method)
(bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer.Q8LinearCutlass method)
_name (bitorch_engine.utils.safe_import.ExtensionModulePlaceholder attribute)
A
a_bit (bitorch_engine.layers.qlinear.nbit.cuda.mpq_layer.MPQLinearCuda attribute)
(bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase attribute)
(bitorch_engine.layers.qlinear.nbit.layer.nBitLinearBase attribute)
(bitorch_engine.layers.qlinear.nbit.mps.mpq_layer.MPQLinearMlx attribute)
active_indices (bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingParameter attribute)
ADAPTIVE (bitorch_engine.layers.qlinear.binary.cuda.bmm.BMM attribute)
ARCH_CPU (class in bitorch_engine.utils.arch_helper)
asym (bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase attribute)
(bitorch_engine.layers.qlinear.nbit.layer.MPQWeightParameter attribute)
B
backward (bitorch_engine.layers.qconv.nbit.cutlass.layer.Q4Conv2dCutlassForward attribute)
backward() (bitorch_engine.layers.qconv.binary.cutlass.layer.BinaryConv2dForward static method)
(bitorch_engine.layers.qconv.nbit.cutlass.layer.Q4Conv2dCutlassForward static method)
(bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingBagForward static method)
(bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingForward static method)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearForward static method)
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryLinearForward static method)
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryMatMulFunction static method)
(bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCudaFunction static method)
(bitorch_engine.layers.qlinear.nbit.cuda.mpq_layer.MPQLinearCudaFunction static method)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4LinearFunction static method)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4MatMulFunction static method)
(bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer.Q8LinearFunction static method)
(bitorch_engine.layers.qlinear.nbit.mps.mpq_layer.MPQLinearMlxFunction static method)
betas (bitorch_engine.optim.diode_beta.DiodeMix attribute)
bias (bitorch_engine.layers.qmha.binary.layer.LearnableBias attribute)
bias_a (bitorch_engine.layers.qconv.binary.cutlass.layer.BinaryConv2dCutlass attribute)
(bitorch_engine.layers.qconv.nbit.cutlass.layer.Q4Conv2dCutlass attribute)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearCuda attribute)
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryLinearCutlass attribute)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4LinearCutlass attribute)
(bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer.Q8LinearCutlass attribute)
binary_matmul_forward_post_processing() (in module bitorch_engine.utils.model_helper)
BinaryConv2dBase (class in bitorch_engine.layers.qconv.binary.layer)
BinaryConv2dCPP (class in bitorch_engine.layers.qconv.binary.cpp.layer)
BinaryConv2dCutlass (class in bitorch_engine.layers.qconv.binary.cutlass.layer)
BinaryConv2dForward (class in bitorch_engine.layers.qconv.binary.cpp.layer)
(class in bitorch_engine.layers.qconv.binary.cutlass.layer)
BinaryConvParameter (class in bitorch_engine.layers.qconv.binary.layer)
BinaryEmbeddingBag (class in bitorch_engine.layers.qembedding.binary.layer)
BinaryEmbeddingBagForward (class in bitorch_engine.layers.qembedding.binary.layer)
BinaryEmbeddingCuda (class in bitorch_engine.layers.qembedding.binary.layer)
BinaryEmbeddingForward (class in bitorch_engine.layers.qembedding.binary.layer)
BinaryEmbeddingParameter (class in bitorch_engine.layers.qembedding.binary.layer)
BinaryLinearBase (class in bitorch_engine.layers.qlinear.binary.layer)
BinaryLinearCPP (class in bitorch_engine.layers.qlinear.binary.cpp.layer)
BinaryLinearCuda (class in bitorch_engine.layers.qlinear.binary.cuda.layer)
BinaryLinearCutlass (class in bitorch_engine.layers.qlinear.binary.cutlass.layer)
BinaryLinearForward (class in bitorch_engine.layers.qlinear.binary.cpp.layer)
(class in bitorch_engine.layers.qlinear.binary.cuda.layer)
(class in bitorch_engine.layers.qlinear.binary.cutlass.layer)
BinaryLinearImplementationMixin (class in bitorch_engine.layers.qlinear.binary.binary_implementation)
BinaryLinearParameter (class in bitorch_engine.layers.qlinear.binary.layer)
BinaryMatMul (class in bitorch_engine.layers.qlinear.binary.cutlass.layer)
BinaryMatMulFunction (class in bitorch_engine.layers.qlinear.binary.cutlass.layer)
bit_set() (in module bitorch_engine.utils.quant_operators)
bitorch_engine
module
bitorch_engine.functions
module
bitorch_engine.functions.cuda
module
bitorch_engine.functions.cuda.extension
module
bitorch_engine.functions.cuda.functions
module
bitorch_engine.layers
module
bitorch_engine.layers.qconv
module
bitorch_engine.layers.qconv.binary
module
bitorch_engine.layers.qconv.binary.cpp
module
bitorch_engine.layers.qconv.binary.cpp.extension
module
bitorch_engine.layers.qconv.binary.cpp.layer
module
bitorch_engine.layers.qconv.binary.cutlass
module
bitorch_engine.layers.qconv.binary.cutlass.extension
module
bitorch_engine.layers.qconv.binary.cutlass.layer
module
bitorch_engine.layers.qconv.binary.layer
module
bitorch_engine.layers.qconv.nbit
module
bitorch_engine.layers.qconv.nbit.cutlass
module
bitorch_engine.layers.qconv.nbit.cutlass.extension
module
bitorch_engine.layers.qconv.nbit.cutlass.layer
module
bitorch_engine.layers.qconv.nbit.layer
module
bitorch_engine.layers.qembedding
module
bitorch_engine.layers.qembedding.binary
module
bitorch_engine.layers.qembedding.binary.layer
module
bitorch_engine.layers.qlinear
module
bitorch_engine.layers.qlinear.binary
module
bitorch_engine.layers.qlinear.binary.binary_implementation
module
bitorch_engine.layers.qlinear.binary.cpp
module
bitorch_engine.layers.qlinear.binary.cpp.extension
module
bitorch_engine.layers.qlinear.binary.cpp.layer
module
bitorch_engine.layers.qlinear.binary.cuda
module
bitorch_engine.layers.qlinear.binary.cuda.bmm
module
bitorch_engine.layers.qlinear.binary.cuda.extension
module
bitorch_engine.layers.qlinear.binary.cuda.layer
module
bitorch_engine.layers.qlinear.binary.cutlass
module
bitorch_engine.layers.qlinear.binary.cutlass.extension
module
bitorch_engine.layers.qlinear.binary.cutlass.layer
module
bitorch_engine.layers.qlinear.binary.layer
module
bitorch_engine.layers.qlinear.layer
module
bitorch_engine.layers.qlinear.nbit
module
bitorch_engine.layers.qlinear.nbit.cuda
module
bitorch_engine.layers.qlinear.nbit.cuda.extension
module
bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer
module
bitorch_engine.layers.qlinear.nbit.cuda.mpq_layer
module
bitorch_engine.layers.qlinear.nbit.cuda.utils
module
bitorch_engine.layers.qlinear.nbit.cutlass
module
bitorch_engine.layers.qlinear.nbit.cutlass.extension
module
bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer
module
bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer
module
bitorch_engine.layers.qlinear.nbit.layer
module
bitorch_engine.layers.qlinear.nbit.mps
module
bitorch_engine.layers.qlinear.nbit.mps.extension
module
bitorch_engine.layers.qlinear.nbit.mps.mpq_layer
module
bitorch_engine.layers.qlinear.qlinear_implementation
module
bitorch_engine.layers.qmha
module
bitorch_engine.layers.qmha.binary
module
bitorch_engine.layers.qmha.binary.layer
module
bitorch_engine.optim
module
bitorch_engine.optim.diode_beta
module
bitorch_engine.optim.galore_projector
module
bitorch_engine.utils
module
bitorch_engine.utils.arch_helper
module
bitorch_engine.utils.convert
module
bitorch_engine.utils.cpp_extension
module
bitorch_engine.utils.cuda_extension
module
bitorch_engine.utils.cutlass_path
module
bitorch_engine.utils.mlx_extension
module
bitorch_engine.utils.mlx_path
module
bitorch_engine.utils.model_helper
module
bitorch_engine.utils.quant_operators
module
bitorch_engine.utils.safe_import
module
bits_binary_word (bitorch_engine.layers.qconv.binary.cpp.layer.BinaryConv2dCPP attribute)
(bitorch_engine.layers.qconv.binary.cutlass.layer.BinaryConv2dCutlass attribute)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearCuda attribute)
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryLinearCutlass attribute)
(bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase attribute)
BMHA (class in bitorch_engine.layers.qmha.binary.layer)
BMM (class in bitorch_engine.layers.qlinear.binary.cuda.bmm)
bmm_type (bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearCuda attribute)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearForward attribute)
BSTC32 (bitorch_engine.layers.qlinear.binary.cuda.bmm.BMM attribute)
BTC32 (bitorch_engine.layers.qlinear.binary.cuda.bmm.BMM attribute)
C
can_clone() (bitorch_engine.layers.qlinear.binary.binary_implementation.BinaryLinearImplementationMixin class method)
(bitorch_engine.layers.qlinear.binary.binary_implementation.BinaryLinearImplementationMixin method)
(bitorch_engine.layers.qlinear.qlinear_implementation.QLinearImplementationMixin class method)
check_cpu_instruction_support() (in module bitorch_engine.utils.arch_helper)
check_parameters() (bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.cuda.mpq_layer.MPQLinearCuda method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.mps.mpq_layer.MPQLinearMlx method)
,
[1]
check_path() (in module bitorch_engine.utils.cutlass_path)
check_pytorch_version() (in module bitorch_engine.optim.diode_beta)
collect_layers() (in module bitorch_engine.utils.convert)
correct_bias (bitorch_engine.optim.diode_beta.DiodeMix attribute)
create_clone_from() (bitorch_engine.layers.qlinear.binary.cpp.layer.BinaryLinearCPP class method)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearCuda class method)
(bitorch_engine.layers.qlinear.layer.QLinearInf class method)
ctx (bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearForward attribute)
D
device (bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase attribute)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4MatMul attribute)
(bitorch_engine.layers.qlinear.nbit.layer.nBitLinearBase attribute)
device_id (bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearCuda property)
DiodeMix (class in bitorch_engine.optim.diode_beta)
disable_bias (bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase attribute)
dq_group_size (bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase attribute)
dq_mode (bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase attribute)
dropout (bitorch_engine.layers.qmha.binary.layer.BMHA attribute)
dtype (bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryMatMul attribute)
(bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase attribute)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4MatMul attribute)
(bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase attribute)
(bitorch_engine.layers.qlinear.nbit.layer.nBitLinearBase attribute)
(bitorch_engine.layers.qmha.binary.layer.BMHA attribute)
(bitorch_engine.optim.diode_beta.DiodeMix attribute)
E
eps (bitorch_engine.layers.qconv.nbit.cutlass.layer.Q4Conv2dCutlass attribute)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4LinearCutlass attribute)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4MatMul attribute)
(bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer.Q8LinearCutlass attribute)
(bitorch_engine.optim.diode_beta.DiodeMix attribute)
exl2fp_weight() (bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda method)
(bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda static method)
ExtensionModulePlaceholder (class in bitorch_engine.utils.safe_import)
F
find_cutlass() (in module bitorch_engine.utils.cutlass_path)
flatten_x() (in module bitorch_engine.utils.model_helper)
forward (bitorch_engine.layers.qconv.nbit.cutlass.layer.Q4Conv2dCutlassForward attribute)
forward() (bitorch_engine.layers.qconv.binary.cpp.layer.BinaryConv2dCPP method)
(bitorch_engine.layers.qconv.binary.cpp.layer.BinaryConv2dForward static method)
(bitorch_engine.layers.qconv.binary.cutlass.layer.BinaryConv2dCutlass method)
(bitorch_engine.layers.qconv.binary.cutlass.layer.BinaryConv2dForward static method)
(bitorch_engine.layers.qconv.nbit.cutlass.layer.Q4Conv2dCutlass method)
(bitorch_engine.layers.qconv.nbit.cutlass.layer.Q4Conv2dCutlassForward static method)
(bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingBag method)
(bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingBagForward static method)
(bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingCuda method)
(bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingForward static method)
(bitorch_engine.layers.qlinear.binary.cpp.layer.BinaryLinearCPP method)
(bitorch_engine.layers.qlinear.binary.cpp.layer.BinaryLinearForward static method)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearCuda method)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearForward static method)
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryLinearCutlass method)
,
[1]
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryLinearForward static method)
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryMatMul method)
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryMatMulFunction static method)
(bitorch_engine.layers.qlinear.layer.QLinearInf method)
(bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCudaFunction static method)
(bitorch_engine.layers.qlinear.nbit.cuda.mpq_layer.MPQLinearCuda method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.cuda.mpq_layer.MPQLinearCudaFunction static method)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4LinearCutlass method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4LinearFunction static method)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4MatMul method)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4MatMulFunction static method)
(bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer.Q8LinearCutlass method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer.Q8LinearFunction static method)
(bitorch_engine.layers.qlinear.nbit.mps.mpq_layer.MPQLinearMlx method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.mps.mpq_layer.MPQLinearMlxFunction static method)
(bitorch_engine.layers.qmha.binary.layer.BMHA method)
(bitorch_engine.layers.qmha.binary.layer.LearnableBias method)
fp32toint4() (in module bitorch_engine.functions.cuda.functions)
G
g_idx (bitorch_engine.layers.qlinear.nbit.layer.MPQWeightParameter attribute)
GaLoreProjector (class in bitorch_engine.optim.galore_projector)
gcc_version() (in module bitorch_engine.utils.cuda_extension)
gemm_kernel_id (bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryLinearCutlass attribute)
generate_quantized_weight() (bitorch_engine.layers.qconv.binary.cpp.layer.BinaryConv2dCPP method)
(bitorch_engine.layers.qconv.binary.cutlass.layer.BinaryConv2dCutlass method)
(bitorch_engine.layers.qconv.binary.layer.BinaryConv2dBase method)
(bitorch_engine.layers.qconv.nbit.cutlass.layer.Q4Conv2dCutlass method)
(bitorch_engine.layers.qconv.nbit.layer.nBitConv2dBase method)
(bitorch_engine.layers.qlinear.binary.cpp.layer.BinaryLinearCPP method)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearCuda method)
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryLinearCutlass method)
,
[1]
(bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase method)
,
[1]
(bitorch_engine.layers.qlinear.layer.QLinearInf method)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4LinearCutlass method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer.Q8LinearCutlass method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.layer.nBitLinearBase method)
get_arm_model() (bitorch_engine.utils.arch_helper.linux_arch_ident static method)
get_best_binary_implementation() (in module bitorch_engine.layers.qlinear.binary)
get_binary_col() (in module bitorch_engine.utils.quant_operators)
get_binary_row() (in module bitorch_engine.utils.quant_operators)
get_cpp_extension() (in module bitorch_engine.utils.cpp_extension)
get_cuda_arch() (in module bitorch_engine.utils.cuda_extension)
get_cuda_extension() (in module bitorch_engine.utils.cuda_extension)
get_cutlass_include_path() (in module bitorch_engine.utils.cutlass_path)
get_ext() (in module bitorch_engine.functions.cuda.extension)
(in module bitorch_engine.layers.qconv.binary.cpp.extension)
(in module bitorch_engine.layers.qconv.binary.cutlass.extension)
(in module bitorch_engine.layers.qconv.nbit.cutlass.extension)
(in module bitorch_engine.layers.qlinear.binary.cpp.extension)
(in module bitorch_engine.layers.qlinear.binary.cuda.extension)
(in module bitorch_engine.layers.qlinear.binary.cutlass.extension)
(in module bitorch_engine.layers.qlinear.nbit.cuda.extension)
(in module bitorch_engine.layers.qlinear.nbit.cutlass.extension)
(in module bitorch_engine.layers.qlinear.nbit.mps.extension)
get_kwargs() (in module bitorch_engine.utils.cpp_extension)
(in module bitorch_engine.utils.cuda_extension)
get_mlx_extension() (in module bitorch_engine.utils.mlx_extension)
get_mlx_include_path() (in module bitorch_engine.utils.mlx_path)
get_mlx_lib_path() (in module bitorch_engine.utils.mlx_path)
get_mpq_config() (in module bitorch_engine.utils.convert)
gptq_style_unpacking() (in module bitorch_engine.utils.quant_operators)
gptq_style_zeros_packing() (in module bitorch_engine.utils.quant_operators)
group_size (bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase attribute)
(bitorch_engine.layers.qlinear.nbit.layer.MPQWeightParameter attribute)
H
head_dim (bitorch_engine.layers.qmha.binary.layer.BMHA attribute)
hidden_dim (bitorch_engine.layers.qmha.binary.layer.BMHA attribute)
I
import_extension() (in module bitorch_engine.utils.safe_import)
in_channels (bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase attribute)
(bitorch_engine.layers.qlinear.nbit.layer.nBitLinearBase attribute)
init_gba() (bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase method)
,
[1]
init_gptq() (bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase method)
,
[1]
init_weight() (bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingCuda method)
(in module bitorch_engine.utils.model_helper)
initialize() (bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase method)
,
[1]
input (bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearForward attribute)
input_dim (bitorch_engine.layers.qmha.binary.layer.BMHA attribute)
input_features (bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase attribute)
is_arm() (bitorch_engine.utils.arch_helper.linux_arch_ident static method)
is_cutlass_available() (in module bitorch_engine.utils.cutlass_path)
is_mlx_available() (in module bitorch_engine.utils.mlx_path)
is_train (bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearForward attribute)
K
k_linear (bitorch_engine.layers.qmha.binary.layer.BMHA attribute)
L
layer_type (bitorch_engine.layers.qlinear.nbit.layer.MPQWeightParameter attribute)
LearnableBias (class in bitorch_engine.layers.qmha.binary.layer)
linux_arch_ident (class in bitorch_engine.utils.arch_helper)
load_checkpoint() (in module bitorch_engine.utils.model_helper)
load_state_dict() (bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda method)
,
[1]
lr (bitorch_engine.optim.diode_beta.DiodeMix attribute)
M
make_group_map() (in module bitorch_engine.layers.qlinear.nbit.cuda.utils)
MBWQLinearCuda (class in bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer)
MBWQLinearCudaFunction (class in bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer)
module
bitorch_engine
bitorch_engine.functions
bitorch_engine.functions.cuda
bitorch_engine.functions.cuda.extension
bitorch_engine.functions.cuda.functions
bitorch_engine.layers
bitorch_engine.layers.qconv
bitorch_engine.layers.qconv.binary
bitorch_engine.layers.qconv.binary.cpp
bitorch_engine.layers.qconv.binary.cpp.extension
bitorch_engine.layers.qconv.binary.cpp.layer
bitorch_engine.layers.qconv.binary.cutlass
bitorch_engine.layers.qconv.binary.cutlass.extension
bitorch_engine.layers.qconv.binary.cutlass.layer
bitorch_engine.layers.qconv.binary.layer
bitorch_engine.layers.qconv.nbit
bitorch_engine.layers.qconv.nbit.cutlass
bitorch_engine.layers.qconv.nbit.cutlass.extension
bitorch_engine.layers.qconv.nbit.cutlass.layer
bitorch_engine.layers.qconv.nbit.layer
bitorch_engine.layers.qembedding
bitorch_engine.layers.qembedding.binary
bitorch_engine.layers.qembedding.binary.layer
bitorch_engine.layers.qlinear
bitorch_engine.layers.qlinear.binary
bitorch_engine.layers.qlinear.binary.binary_implementation
bitorch_engine.layers.qlinear.binary.cpp
bitorch_engine.layers.qlinear.binary.cpp.extension
bitorch_engine.layers.qlinear.binary.cpp.layer
bitorch_engine.layers.qlinear.binary.cuda
bitorch_engine.layers.qlinear.binary.cuda.bmm
bitorch_engine.layers.qlinear.binary.cuda.extension
bitorch_engine.layers.qlinear.binary.cuda.layer
bitorch_engine.layers.qlinear.binary.cutlass
bitorch_engine.layers.qlinear.binary.cutlass.extension
bitorch_engine.layers.qlinear.binary.cutlass.layer
bitorch_engine.layers.qlinear.binary.layer
bitorch_engine.layers.qlinear.layer
bitorch_engine.layers.qlinear.nbit
bitorch_engine.layers.qlinear.nbit.cuda
bitorch_engine.layers.qlinear.nbit.cuda.extension
bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer
bitorch_engine.layers.qlinear.nbit.cuda.mpq_layer
bitorch_engine.layers.qlinear.nbit.cuda.utils
bitorch_engine.layers.qlinear.nbit.cutlass
bitorch_engine.layers.qlinear.nbit.cutlass.extension
bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer
bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer
bitorch_engine.layers.qlinear.nbit.layer
bitorch_engine.layers.qlinear.nbit.mps
bitorch_engine.layers.qlinear.nbit.mps.extension
bitorch_engine.layers.qlinear.nbit.mps.mpq_layer
bitorch_engine.layers.qlinear.qlinear_implementation
bitorch_engine.layers.qmha
bitorch_engine.layers.qmha.binary
bitorch_engine.layers.qmha.binary.layer
bitorch_engine.optim
bitorch_engine.optim.diode_beta
bitorch_engine.optim.galore_projector
bitorch_engine.utils
bitorch_engine.utils.arch_helper
bitorch_engine.utils.convert
bitorch_engine.utils.cpp_extension
bitorch_engine.utils.cuda_extension
bitorch_engine.utils.cutlass_path
bitorch_engine.utils.mlx_extension
bitorch_engine.utils.mlx_path
bitorch_engine.utils.model_helper
bitorch_engine.utils.quant_operators
bitorch_engine.utils.safe_import
MPQLinearBase (class in bitorch_engine.layers.qlinear.nbit.layer)
MPQLinearCuda (class in bitorch_engine.layers.qlinear.nbit.cuda.mpq_layer)
MPQLinearCudaFunction (class in bitorch_engine.layers.qlinear.nbit.cuda.mpq_layer)
MPQLinearMlx (class in bitorch_engine.layers.qlinear.nbit.mps.mpq_layer)
MPQLinearMlxFunction (class in bitorch_engine.layers.qlinear.nbit.mps.mpq_layer)
MPQWeightParameter (class in bitorch_engine.layers.qlinear.nbit.layer)
N
nBitConv2dBase (class in bitorch_engine.layers.qconv.nbit.layer)
nBitConvParameter (class in bitorch_engine.layers.qconv.nbit.layer)
nBitLinearBase (class in bitorch_engine.layers.qlinear.nbit.layer)
nBitLinearParameter (class in bitorch_engine.layers.qlinear.nbit.layer)
num_heads (bitorch_engine.layers.qmha.binary.layer.BMHA attribute)
nv_tensor_quant() (in module bitorch_engine.utils.quant_operators)
O
opt_weight (bitorch_engine.layers.qconv.binary.layer.BinaryConv2dBase property)
(bitorch_engine.layers.qconv.nbit.layer.nBitConv2dBase property)
(bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase property)
(bitorch_engine.layers.qlinear.layer.QLinearInf property)
(bitorch_engine.layers.qlinear.nbit.layer.nBitLinearBase property)
opt_weight() (bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase method)
out (bitorch_engine.layers.qmha.binary.layer.BMHA attribute)
out_channels (bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase attribute)
(bitorch_engine.layers.qlinear.nbit.layer.nBitLinearBase attribute)
output_features (bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase attribute)
P
pack_bie_layers() (in module bitorch_engine.utils.model_helper)
pack_fp_weight() (in module bitorch_engine.layers.qlinear.nbit.cuda.utils)
pad_embedding_dim() (in module bitorch_engine.utils.model_helper)
pad_last_2_dims_to_multiple_of_128() (in module bitorch_engine.utils.model_helper)
params (bitorch_engine.optim.diode_beta.DiodeMix attribute)
prepare_bie_layers() (in module bitorch_engine.utils.model_helper)
prepare_params() (bitorch_engine.layers.qconv.binary.cpp.layer.BinaryConv2dCPP method)
(bitorch_engine.layers.qconv.binary.cutlass.layer.BinaryConv2dCutlass method)
(bitorch_engine.layers.qconv.binary.layer.BinaryConv2dBase method)
(bitorch_engine.layers.qconv.nbit.cutlass.layer.Q4Conv2dCutlass method)
(bitorch_engine.layers.qconv.nbit.layer.nBitConv2dBase method)
(bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingCuda method)
(bitorch_engine.layers.qlinear.binary.cpp.layer.BinaryLinearCPP method)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearCuda method)
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryLinearCutlass method)
,
[1]
(bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase method)
(bitorch_engine.layers.qlinear.layer.QLinearInf method)
(bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.cuda.mpq_layer.MPQLinearCuda method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4LinearCutlass method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer.Q8LinearCutlass method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.layer.nBitLinearBase method)
(bitorch_engine.layers.qlinear.nbit.mps.mpq_layer.MPQLinearMlx method)
,
[1]
privileged_grad (bitorch_engine.layers.qlinear.nbit.layer.MPQWeightParameter attribute)
Q
q42fp_weight() (bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda method)
(bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda static method)
q4_pack_tensor() (in module bitorch_engine.functions.cuda.functions)
q4_quantization() (in module bitorch_engine.utils.quant_operators)
q4_unpack_and_scaling_tensor() (in module bitorch_engine.functions.cuda.functions)
q4_unpack_tensor() (in module bitorch_engine.functions.cuda.functions)
Q4Conv2dCutlass (class in bitorch_engine.layers.qconv.nbit.cutlass.layer)
Q4Conv2dCutlassForward (class in bitorch_engine.layers.qconv.nbit.cutlass.layer)
Q4LinearCutlass (class in bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer)
Q4LinearFunction (class in bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer)
Q4MatMul (class in bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer)
Q4MatMulFunction (class in bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer)
q8_quantization() (in module bitorch_engine.utils.quant_operators)
Q8LinearCutlass (class in bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer)
Q8LinearFunction (class in bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer)
q_group_map (bitorch_engine.layers.qlinear.nbit.layer.MPQWeightParameter attribute)
q_linear (bitorch_engine.layers.qmha.binary.layer.BMHA attribute)
q_perm (bitorch_engine.layers.qlinear.nbit.layer.MPQWeightParameter attribute)
QLinearImplementationMixin (class in bitorch_engine.layers.qlinear.qlinear_implementation)
QLinearInf (class in bitorch_engine.layers.qlinear.layer)
quantize_linear_with_binary_linear_cuda() (in module bitorch_engine.utils.convert)
quantize_linear_with_mpq_linear_cuda() (in module bitorch_engine.utils.convert)
quantize_linear_with_q4_linear_cutlass() (in module bitorch_engine.utils.convert)
qweight (bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingCuda attribute)
(bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase attribute)
(bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda attribute)
(bitorch_engine.layers.qlinear.nbit.cuda.mpq_layer.MPQLinearCuda attribute)
(bitorch_engine.layers.qlinear.nbit.mps.mpq_layer.MPQLinearMlx attribute)
qweight_update_fn() (in module bitorch_engine.utils.model_helper)
R
replace_layers() (in module bitorch_engine.utils.convert)
reset_parameters() (bitorch_engine.layers.qconv.binary.layer.BinaryConv2dBase method)
(bitorch_engine.layers.qconv.nbit.layer.nBitConv2dBase method)
(bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingBag method)
(bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingCuda method)
(bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.layer.nBitLinearBase method)
rows (bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda attribute)
(bitorch_engine.layers.qlinear.nbit.layer.MPQWeightParameter attribute)
S
save_checkpoint() (in module bitorch_engine.utils.model_helper)
scale_a (bitorch_engine.layers.qconv.binary.cutlass.layer.BinaryConv2dCutlass attribute)
(bitorch_engine.layers.qconv.nbit.cutlass.layer.Q4Conv2dCutlass attribute)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearCuda attribute)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearForward attribute)
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryLinearCutlass attribute)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4LinearCutlass attribute)
(bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer.Q8LinearCutlass attribute)
scale_w (bitorch_engine.layers.qconv.binary.cutlass.layer.BinaryConv2dCutlass attribute)
(bitorch_engine.layers.qconv.nbit.cutlass.layer.Q4Conv2dCutlass attribute)
(bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingCuda attribute)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearCuda attribute)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearForward attribute)
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryLinearCutlass attribute)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4LinearCutlass attribute)
(bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer.Q8LinearCutlass attribute)
scales (bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda attribute)
(bitorch_engine.layers.qlinear.nbit.cuda.mpq_layer.MPQLinearCuda attribute)
(bitorch_engine.layers.qlinear.nbit.mps.mpq_layer.MPQLinearMlx attribute)
select_gemm_kernel() (bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryLinearCutlass method)
,
[1]
set_activation() (bitorch_engine.layers.qconv.binary.cutlass.layer.BinaryConv2dCutlass method)
(bitorch_engine.layers.qconv.nbit.cutlass.layer.Q4Conv2dCutlass method)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearCuda method)
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryLinearCutlass method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4LinearCutlass method)
,
[1]
(bitorch_engine.layers.qlinear.nbit.cutlass.q8_layer.Q8LinearCutlass method)
,
[1]
set_activation_scale() (bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryMatMul method)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4MatMul method)
set_bits_binary_word() (bitorch_engine.layers.qconv.binary.layer.BinaryConv2dBase method)
(bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase method)
,
[1]
set_quantized_weight_data() (bitorch_engine.layers.qconv.binary.layer.BinaryConv2dBase method)
(bitorch_engine.layers.qconv.nbit.layer.nBitConv2dBase method)
(bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase method)
,
[1]
(bitorch_engine.layers.qlinear.layer.QLinearInf method)
(bitorch_engine.layers.qlinear.nbit.layer.nBitLinearBase method)
set_qweight_data() (bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase method)
,
[1]
set_scales() (bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda method)
,
[1]
set_weight_data() (bitorch_engine.layers.qconv.binary.cutlass.layer.BinaryConv2dCutlass method)
(bitorch_engine.layers.qconv.binary.layer.BinaryConv2dBase method)
(bitorch_engine.layers.qconv.nbit.layer.nBitConv2dBase method)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearCuda method)
(bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryLinearCutlass method)
,
[1]
(bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase method)
,
[1]
(bitorch_engine.layers.qlinear.layer.QLinearInf method)
(bitorch_engine.layers.qlinear.nbit.layer.nBitLinearBase method)
set_zeros() (bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda method)
,
[1]
step() (bitorch_engine.optim.diode_beta.DiodeMix method)
,
[1]
symmetric (bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase attribute)
T
tensor_to_packed_uint8() (in module bitorch_engine.functions.cuda.functions)
U
unflatten_x() (in module bitorch_engine.utils.model_helper)
unpack_qweight() (in module bitorch_engine.layers.qlinear.nbit.cuda.utils)
unpack_uint8_tensor() (in module bitorch_engine.functions.cuda.functions)
update() (bitorch_engine.layers.qconv.binary.layer.BinaryConvParameter static method)
(bitorch_engine.layers.qconv.nbit.layer.nBitConvParameter static method)
(bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingParameter static method)
(bitorch_engine.layers.qlinear.binary.layer.BinaryLinearParameter static method)
(bitorch_engine.layers.qlinear.nbit.layer.MPQWeightParameter static method)
(bitorch_engine.layers.qlinear.nbit.layer.nBitLinearParameter static method)
update_zeros() (in module bitorch_engine.utils.model_helper)
use_gba_quant (bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase attribute)
use_mbw (bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda attribute)
V
v_linear (bitorch_engine.layers.qmha.binary.layer.BMHA attribute)
W
w_bit (bitorch_engine.layers.qlinear.nbit.cuda.mpq_layer.MPQLinearCuda attribute)
(bitorch_engine.layers.qlinear.nbit.layer.MPQLinearBase attribute)
(bitorch_engine.layers.qlinear.nbit.layer.MPQWeightParameter attribute)
(bitorch_engine.layers.qlinear.nbit.layer.nBitLinearBase attribute)
(bitorch_engine.layers.qlinear.nbit.mps.mpq_layer.MPQLinearMlx attribute)
w_pack() (bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearCuda static method)
weight (bitorch_engine.layers.qembedding.binary.layer.BinaryEmbeddingCuda attribute)
(bitorch_engine.layers.qlinear.binary.cuda.layer.BinaryLinearForward attribute)
(bitorch_engine.layers.qlinear.binary.layer.BinaryLinearBase attribute)
(bitorch_engine.layers.qlinear.layer.QLinearInf property)
weight_decay (bitorch_engine.optim.diode_beta.DiodeMix attribute)
X
x_clip (bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryMatMul attribute)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4MatMul attribute)
Y
y_clip (bitorch_engine.layers.qlinear.binary.cutlass.layer.BinaryMatMul attribute)
(bitorch_engine.layers.qlinear.nbit.cutlass.q4_layer.Q4MatMul attribute)
Z
zeros (bitorch_engine.layers.qlinear.nbit.cuda.mbwq_layer.MBWQLinearCuda attribute)
(bitorch_engine.layers.qlinear.nbit.cuda.mpq_layer.MPQLinearCuda attribute)
(bitorch_engine.layers.qlinear.nbit.mps.mpq_layer.MPQLinearMlx attribute)