diff --git a/python/paddle/incubate/distributed/models/moe/__init__.py b/python/paddle/incubate/distributed/models/moe/__init__.py index e1663029ef1f8..fd06b4b8e5287 100644 --- a/python/paddle/incubate/distributed/models/moe/__init__.py +++ b/python/paddle/incubate/distributed/models/moe/__init__.py @@ -11,3 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from .gate import GShardGate, BaseGate, SwitchGate, NaiveGate +from .moe_layer import MoELayer +from .grad_clip import ClipGradForMOEByGlobalNorm +ClipGradByGlobalNorm = ClipGradForMOEByGlobalNorm diff --git a/python/setup.py.in b/python/setup.py.in index 0f231e34168d9..4cf8bc3fc6a2e 100755 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -312,6 +312,8 @@ packages=['paddle', 'paddle.distributed.auto_parallel.tuner', 'paddle.distributed.auto_parallel.cost', 'paddle.distributed.passes', + 'paddle.distributed.models', + 'paddle.distributed.models.moe', 'paddle.framework', 'paddle.jit', 'paddle.jit.dy2static', @@ -366,6 +368,10 @@ packages=['paddle', 'paddle.incubate.nn.functional', 'paddle.incubate.nn.layer', 'paddle.incubate.optimizer.functional', + 'paddle.incubate.distributed', + 'paddle.incubate.distributed.models', + 'paddle.incubate.distributed.models.moe', + 'paddle.incubate.distributed.models.moe.gate', 'paddle.io', 'paddle.optimizer', 'paddle.nn',